diff options
Diffstat (limited to 'net')
222 files changed, 3284 insertions, 2151 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8dfdd94e430f..bad01b14a4ad 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c | |||
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) | |||
111 | vlan_gvrp_uninit_applicant(real_dev); | 111 | vlan_gvrp_uninit_applicant(real_dev); |
112 | } | 112 | } |
113 | 113 | ||
114 | /* Take it out of our own structures, but be sure to interlock with | 114 | vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); |
115 | * HW accelerating devices or SW vlan input packet processing if | ||
116 | * VLAN is not 0 (leave it there for 802.1p). | ||
117 | */ | ||
118 | if (vlan_id) | ||
119 | vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); | ||
120 | 115 | ||
121 | /* Get rid of the vlan's reference to real_dev */ | 116 | /* Get rid of the vlan's reference to real_dev */ |
122 | dev_put(real_dev); | 117 | dev_put(real_dev); |
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 985046ae4231..d6f7f7cb79c4 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c | |||
@@ -228,32 +228,31 @@ static void p9_conn_cancel(struct p9_conn *m, int err) | |||
228 | } | 228 | } |
229 | } | 229 | } |
230 | 230 | ||
231 | static int | 231 | static __poll_t |
232 | p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) | 232 | p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) |
233 | { | 233 | { |
234 | int ret, n; | 234 | __poll_t ret, n; |
235 | struct p9_trans_fd *ts = NULL; | 235 | struct p9_trans_fd *ts = NULL; |
236 | 236 | ||
237 | if (client && client->status == Connected) | 237 | if (client && client->status == Connected) |
238 | ts = client->trans; | 238 | ts = client->trans; |
239 | 239 | ||
240 | if (!ts) | 240 | if (!ts) { |
241 | return -EREMOTEIO; | 241 | if (err) |
242 | *err = -EREMOTEIO; | ||
243 | return POLLERR; | ||
244 | } | ||
242 | 245 | ||
243 | if (!ts->rd->f_op->poll) | 246 | if (!ts->rd->f_op->poll) |
244 | return -EIO; | 247 | ret = DEFAULT_POLLMASK; |
245 | 248 | else | |
246 | if (!ts->wr->f_op->poll) | 249 | ret = ts->rd->f_op->poll(ts->rd, pt); |
247 | return -EIO; | ||
248 | |||
249 | ret = ts->rd->f_op->poll(ts->rd, pt); | ||
250 | if (ret < 0) | ||
251 | return ret; | ||
252 | 250 | ||
253 | if (ts->rd != ts->wr) { | 251 | if (ts->rd != ts->wr) { |
254 | n = ts->wr->f_op->poll(ts->wr, pt); | 252 | if (!ts->wr->f_op->poll) |
255 | if (n < 0) | 253 | n = DEFAULT_POLLMASK; |
256 | return n; | 254 | else |
255 | n = ts->wr->f_op->poll(ts->wr, pt); | ||
257 | ret = (ret & ~POLLOUT) | (n & ~POLLIN); | 256 | ret = (ret & ~POLLOUT) | (n & ~POLLIN); |
258 | } | 257 | } |
259 | 258 | ||
@@ -298,7 +297,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len) | |||
298 | 297 | ||
299 | static void p9_read_work(struct work_struct *work) | 298 | static void p9_read_work(struct work_struct *work) |
300 | { | 299 | { |
301 | int n, err; | 300 | __poll_t n; |
301 | int err; | ||
302 | struct p9_conn *m; | 302 | struct p9_conn *m; |
303 | int status = REQ_STATUS_ERROR; | 303 | int status = REQ_STATUS_ERROR; |
304 | 304 | ||
@@ -398,7 +398,7 @@ end_clear: | |||
398 | if (test_and_clear_bit(Rpending, &m->wsched)) | 398 | if (test_and_clear_bit(Rpending, &m->wsched)) |
399 | n = POLLIN; | 399 | n = POLLIN; |
400 | else | 400 | else |
401 | n = p9_fd_poll(m->client, NULL); | 401 | n = p9_fd_poll(m->client, NULL, NULL); |
402 | 402 | ||
403 | if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { | 403 | if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { |
404 | p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); | 404 | p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); |
@@ -448,7 +448,8 @@ static int p9_fd_write(struct p9_client *client, void *v, int len) | |||
448 | 448 | ||
449 | static void p9_write_work(struct work_struct *work) | 449 | static void p9_write_work(struct work_struct *work) |
450 | { | 450 | { |
451 | int n, err; | 451 | __poll_t n; |
452 | int err; | ||
452 | struct p9_conn *m; | 453 | struct p9_conn *m; |
453 | struct p9_req_t *req; | 454 | struct p9_req_t *req; |
454 | 455 | ||
@@ -506,7 +507,7 @@ end_clear: | |||
506 | if (test_and_clear_bit(Wpending, &m->wsched)) | 507 | if (test_and_clear_bit(Wpending, &m->wsched)) |
507 | n = POLLOUT; | 508 | n = POLLOUT; |
508 | else | 509 | else |
509 | n = p9_fd_poll(m->client, NULL); | 510 | n = p9_fd_poll(m->client, NULL, NULL); |
510 | 511 | ||
511 | if ((n & POLLOUT) && | 512 | if ((n & POLLOUT) && |
512 | !test_and_set_bit(Wworksched, &m->wsched)) { | 513 | !test_and_set_bit(Wworksched, &m->wsched)) { |
@@ -581,7 +582,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) | |||
581 | 582 | ||
582 | static void p9_conn_create(struct p9_client *client) | 583 | static void p9_conn_create(struct p9_client *client) |
583 | { | 584 | { |
584 | int n; | 585 | __poll_t n; |
585 | struct p9_trans_fd *ts = client->trans; | 586 | struct p9_trans_fd *ts = client->trans; |
586 | struct p9_conn *m = &ts->conn; | 587 | struct p9_conn *m = &ts->conn; |
587 | 588 | ||
@@ -597,7 +598,7 @@ static void p9_conn_create(struct p9_client *client) | |||
597 | INIT_LIST_HEAD(&m->poll_pending_link); | 598 | INIT_LIST_HEAD(&m->poll_pending_link); |
598 | init_poll_funcptr(&m->pt, p9_pollwait); | 599 | init_poll_funcptr(&m->pt, p9_pollwait); |
599 | 600 | ||
600 | n = p9_fd_poll(client, &m->pt); | 601 | n = p9_fd_poll(client, &m->pt, NULL); |
601 | if (n & POLLIN) { | 602 | if (n & POLLIN) { |
602 | p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); | 603 | p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); |
603 | set_bit(Rpending, &m->wsched); | 604 | set_bit(Rpending, &m->wsched); |
@@ -617,17 +618,16 @@ static void p9_conn_create(struct p9_client *client) | |||
617 | 618 | ||
618 | static void p9_poll_mux(struct p9_conn *m) | 619 | static void p9_poll_mux(struct p9_conn *m) |
619 | { | 620 | { |
620 | int n; | 621 | __poll_t n; |
622 | int err = -ECONNRESET; | ||
621 | 623 | ||
622 | if (m->err < 0) | 624 | if (m->err < 0) |
623 | return; | 625 | return; |
624 | 626 | ||
625 | n = p9_fd_poll(m->client, NULL); | 627 | n = p9_fd_poll(m->client, NULL, &err); |
626 | if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { | 628 | if (n & (POLLERR | POLLHUP | POLLNVAL)) { |
627 | p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); | 629 | p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); |
628 | if (n >= 0) | 630 | p9_conn_cancel(m, err); |
629 | n = -ECONNRESET; | ||
630 | p9_conn_cancel(m, n); | ||
631 | } | 631 | } |
632 | 632 | ||
633 | if (n & POLLIN) { | 633 | if (n & POLLIN) { |
@@ -663,7 +663,7 @@ static void p9_poll_mux(struct p9_conn *m) | |||
663 | 663 | ||
664 | static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) | 664 | static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) |
665 | { | 665 | { |
666 | int n; | 666 | __poll_t n; |
667 | struct p9_trans_fd *ts = client->trans; | 667 | struct p9_trans_fd *ts = client->trans; |
668 | struct p9_conn *m = &ts->conn; | 668 | struct p9_conn *m = &ts->conn; |
669 | 669 | ||
@@ -680,7 +680,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) | |||
680 | if (test_and_clear_bit(Wpending, &m->wsched)) | 680 | if (test_and_clear_bit(Wpending, &m->wsched)) |
681 | n = POLLOUT; | 681 | n = POLLOUT; |
682 | else | 682 | else |
683 | n = p9_fd_poll(m->client, NULL); | 683 | n = p9_fd_poll(m->client, NULL, NULL); |
684 | 684 | ||
685 | if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) | 685 | if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) |
686 | schedule_work(&m->wq); | 686 | schedule_work(&m->wq); |
@@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) | |||
839 | if (IS_ERR(file)) { | 839 | if (IS_ERR(file)) { |
840 | pr_err("%s (%d): failed to map fd\n", | 840 | pr_err("%s (%d): failed to map fd\n", |
841 | __func__, task_pid_nr(current)); | 841 | __func__, task_pid_nr(current)); |
842 | sock_release(csocket); | ||
843 | kfree(p); | 842 | kfree(p); |
844 | return PTR_ERR(file); | 843 | return PTR_ERR(file); |
845 | } | 844 | } |
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 325c56043007..086a4abdfa7c 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c | |||
@@ -543,3 +543,7 @@ static void p9_trans_xen_exit(void) | |||
543 | return xenbus_unregister_driver(&xen_9pfs_front_driver); | 543 | return xenbus_unregister_driver(&xen_9pfs_front_driver); |
544 | } | 544 | } |
545 | module_exit(p9_trans_xen_exit); | 545 | module_exit(p9_trans_xen_exit); |
546 | |||
547 | MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>"); | ||
548 | MODULE_DESCRIPTION("Xen Transport for 9P"); | ||
549 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/atm/common.c b/net/atm/common.c index 8a4f99114cd2..8f12f1c6fa14 100644 --- a/net/atm/common.c +++ b/net/atm/common.c | |||
@@ -648,11 +648,11 @@ out: | |||
648 | return error; | 648 | return error; |
649 | } | 649 | } |
650 | 650 | ||
651 | unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) | 651 | __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) |
652 | { | 652 | { |
653 | struct sock *sk = sock->sk; | 653 | struct sock *sk = sock->sk; |
654 | struct atm_vcc *vcc; | 654 | struct atm_vcc *vcc; |
655 | unsigned int mask; | 655 | __poll_t mask; |
656 | 656 | ||
657 | sock_poll_wait(file, sk_sleep(sk), wait); | 657 | sock_poll_wait(file, sk_sleep(sk), wait); |
658 | mask = 0; | 658 | mask = 0; |
diff --git a/net/atm/common.h b/net/atm/common.h index d9d583712a91..5850649068bb 100644 --- a/net/atm/common.h +++ b/net/atm/common.h | |||
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci); | |||
17 | int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, | 17 | int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
18 | int flags); | 18 | int flags); |
19 | int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); | 19 | int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); |
20 | unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); | 20 | __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait); |
21 | int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); | 21 | int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); |
22 | int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); | 22 | int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); |
23 | int vcc_setsockopt(struct socket *sock, int level, int optname, | 23 | int vcc_setsockopt(struct socket *sock, int level, int optname, |
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1b659ab652fb..bbe8414b6ee7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c | |||
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, | |||
1214 | orig_node->last_seen = jiffies; | 1214 | orig_node->last_seen = jiffies; |
1215 | 1215 | ||
1216 | /* find packet count of corresponding one hop neighbor */ | 1216 | /* find packet count of corresponding one hop neighbor */ |
1217 | spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); | 1217 | spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); |
1218 | if_num = if_incoming->if_num; | 1218 | if_num = if_incoming->if_num; |
1219 | orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; | 1219 | orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; |
1220 | neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); | 1220 | neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); |
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, | |||
1224 | } else { | 1224 | } else { |
1225 | neigh_rq_count = 0; | 1225 | neigh_rq_count = 0; |
1226 | } | 1226 | } |
1227 | spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); | 1227 | spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); |
1228 | 1228 | ||
1229 | /* pay attention to not get a value bigger than 100 % */ | 1229 | /* pay attention to not get a value bigger than 100 % */ |
1230 | if (orig_eq_count > neigh_rq_count) | 1230 | if (orig_eq_count > neigh_rq_count) |
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 341ceab8338d..e0e2bfcd6b3e 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c | |||
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, | |||
814 | } | 814 | } |
815 | 815 | ||
816 | orig_gw = batadv_gw_node_get(bat_priv, orig_node); | 816 | orig_gw = batadv_gw_node_get(bat_priv, orig_node); |
817 | if (!orig_node) | 817 | if (!orig_gw) |
818 | goto out; | 818 | goto out; |
819 | 819 | ||
820 | if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) | 820 | if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) |
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index a98cf1104a30..ebe6e38934e4 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c | |||
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, | |||
499 | */ | 499 | */ |
500 | if (skb->priority >= 256 && skb->priority <= 263) | 500 | if (skb->priority >= 256 && skb->priority <= 263) |
501 | frag_header.priority = skb->priority - 256; | 501 | frag_header.priority = skb->priority - 256; |
502 | else | ||
503 | frag_header.priority = 0; | ||
502 | 504 | ||
503 | ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); | 505 | ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); |
504 | ether_addr_copy(frag_header.dest, orig_node->orig); | 506 | ether_addr_copy(frag_header.dest, orig_node->orig); |
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index bded31121d12..a98e0a986cef 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c | |||
@@ -292,7 +292,7 @@ out: | |||
292 | return len; | 292 | return len; |
293 | } | 293 | } |
294 | 294 | ||
295 | static unsigned int batadv_socket_poll(struct file *file, poll_table *wait) | 295 | static __poll_t batadv_socket_poll(struct file *file, poll_table *wait) |
296 | { | 296 | { |
297 | struct batadv_socket_client *socket_client = file->private_data; | 297 | struct batadv_socket_client *socket_client = file->private_data; |
298 | 298 | ||
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 4ef4bde2cc2d..76451460c98d 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c | |||
@@ -176,7 +176,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, | |||
176 | return error; | 176 | return error; |
177 | } | 177 | } |
178 | 178 | ||
179 | static unsigned int batadv_log_poll(struct file *file, poll_table *wait) | 179 | static __poll_t batadv_log_poll(struct file *file, poll_table *wait) |
180 | { | 180 | { |
181 | struct batadv_priv *bat_priv = file->private_data; | 181 | struct batadv_priv *bat_priv = file->private_data; |
182 | struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; | 182 | struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; |
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 15cd2139381e..ebc4e2241c77 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c | |||
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) | |||
482 | 482 | ||
483 | /** | 483 | /** |
484 | * batadv_tp_sender_timeout - timer that fires in case of packet loss | 484 | * batadv_tp_sender_timeout - timer that fires in case of packet loss |
485 | * @arg: address of the related tp_vars | 485 | * @t: address to timer_list inside tp_vars |
486 | * | 486 | * |
487 | * If fired it means that there was packet loss. | 487 | * If fired it means that there was packet loss. |
488 | * Switch to Slow Start, set the ss_threshold to half of the current cwnd and | 488 | * Switch to Slow Start, set the ss_threshold to half of the current cwnd and |
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars) | |||
1106 | /** | 1106 | /** |
1107 | * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is | 1107 | * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is |
1108 | * reached without received ack | 1108 | * reached without received ack |
1109 | * @arg: address of the related tp_vars | 1109 | * @t: address to timer_list inside tp_vars |
1110 | */ | 1110 | */ |
1111 | static void batadv_tp_receiver_shutdown(struct timer_list *t) | 1111 | static void batadv_tp_receiver_shutdown(struct timer_list *t) |
1112 | { | 1112 | { |
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 91e3ba280706..671b907ba678 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c | |||
@@ -421,7 +421,7 @@ out: | |||
421 | } | 421 | } |
422 | EXPORT_SYMBOL(bt_sock_stream_recvmsg); | 422 | EXPORT_SYMBOL(bt_sock_stream_recvmsg); |
423 | 423 | ||
424 | static inline unsigned int bt_accept_poll(struct sock *parent) | 424 | static inline __poll_t bt_accept_poll(struct sock *parent) |
425 | { | 425 | { |
426 | struct bt_sock *s, *n; | 426 | struct bt_sock *s, *n; |
427 | struct sock *sk; | 427 | struct sock *sk; |
@@ -437,11 +437,11 @@ static inline unsigned int bt_accept_poll(struct sock *parent) | |||
437 | return 0; | 437 | return 0; |
438 | } | 438 | } |
439 | 439 | ||
440 | unsigned int bt_sock_poll(struct file *file, struct socket *sock, | 440 | __poll_t bt_sock_poll(struct file *file, struct socket *sock, |
441 | poll_table *wait) | 441 | poll_table *wait) |
442 | { | 442 | { |
443 | struct sock *sk = sock->sk; | 443 | struct sock *sk = sock->sk; |
444 | unsigned int mask = 0; | 444 | __poll_t mask = 0; |
445 | 445 | ||
446 | BT_DBG("sock %p, sk %p", sock, sk); | 446 | BT_DBG("sock %p, sk %p", sock, sk); |
447 | 447 | ||
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 43ba91c440bc..fc6615d59165 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c | |||
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data | |||
3363 | break; | 3363 | break; |
3364 | 3364 | ||
3365 | case L2CAP_CONF_EFS: | 3365 | case L2CAP_CONF_EFS: |
3366 | remote_efs = 1; | 3366 | if (olen == sizeof(efs)) { |
3367 | if (olen == sizeof(efs)) | 3367 | remote_efs = 1; |
3368 | memcpy(&efs, (void *) val, olen); | 3368 | memcpy(&efs, (void *) val, olen); |
3369 | } | ||
3369 | break; | 3370 | break; |
3370 | 3371 | ||
3371 | case L2CAP_CONF_EWS: | 3372 | case L2CAP_CONF_EWS: |
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, | |||
3584 | break; | 3585 | break; |
3585 | 3586 | ||
3586 | case L2CAP_CONF_EFS: | 3587 | case L2CAP_CONF_EFS: |
3587 | if (olen == sizeof(efs)) | 3588 | if (olen == sizeof(efs)) { |
3588 | memcpy(&efs, (void *)val, olen); | 3589 | memcpy(&efs, (void *)val, olen); |
3589 | 3590 | ||
3590 | if (chan->local_stype != L2CAP_SERV_NOTRAFIC && | 3591 | if (chan->local_stype != L2CAP_SERV_NOTRAFIC && |
3591 | efs.stype != L2CAP_SERV_NOTRAFIC && | 3592 | efs.stype != L2CAP_SERV_NOTRAFIC && |
3592 | efs.stype != chan->local_stype) | 3593 | efs.stype != chan->local_stype) |
3593 | return -ECONNREFUSED; | 3594 | return -ECONNREFUSED; |
3594 | 3595 | ||
3595 | l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), | 3596 | l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), |
3596 | (unsigned long) &efs, endptr - ptr); | 3597 | (unsigned long) &efs, endptr - ptr); |
3598 | } | ||
3597 | break; | 3599 | break; |
3598 | 3600 | ||
3599 | case L2CAP_CONF_FCS: | 3601 | case L2CAP_CONF_FCS: |
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d0ef0a8e8831..015f465c514b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c | |||
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, | |||
1262 | struct net_bridge *br = netdev_priv(dev); | 1262 | struct net_bridge *br = netdev_priv(dev); |
1263 | int err; | 1263 | int err; |
1264 | 1264 | ||
1265 | err = register_netdevice(dev); | ||
1266 | if (err) | ||
1267 | return err; | ||
1268 | |||
1265 | if (tb[IFLA_ADDRESS]) { | 1269 | if (tb[IFLA_ADDRESS]) { |
1266 | spin_lock_bh(&br->lock); | 1270 | spin_lock_bh(&br->lock); |
1267 | br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); | 1271 | br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); |
1268 | spin_unlock_bh(&br->lock); | 1272 | spin_unlock_bh(&br->lock); |
1269 | } | 1273 | } |
1270 | 1274 | ||
1271 | err = register_netdevice(dev); | ||
1272 | if (err) | ||
1273 | return err; | ||
1274 | |||
1275 | err = br_changelink(dev, tb, data, extack); | 1275 | err = br_changelink(dev, tb, data, extack); |
1276 | if (err) | 1276 | if (err) |
1277 | unregister_netdevice(dev); | 1277 | br_dev_delete(dev, NULL); |
1278 | |||
1278 | return err; | 1279 | return err; |
1279 | } | 1280 | } |
1280 | 1281 | ||
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 2d38b6e34203..e0adcd123f48 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c | |||
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, | |||
334 | mutex_lock(&caifdevs->lock); | 334 | mutex_lock(&caifdevs->lock); |
335 | list_add_rcu(&caifd->list, &caifdevs->list); | 335 | list_add_rcu(&caifd->list, &caifdevs->list); |
336 | 336 | ||
337 | strncpy(caifd->layer.name, dev->name, | 337 | strlcpy(caifd->layer.name, dev->name, |
338 | sizeof(caifd->layer.name) - 1); | 338 | sizeof(caifd->layer.name)); |
339 | caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0; | ||
340 | caifd->layer.transmit = transmit; | 339 | caifd->layer.transmit = transmit; |
341 | cfcnfg_add_phy_layer(cfg, | 340 | cfcnfg_add_phy_layer(cfg, |
342 | dev, | 341 | dev, |
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 632d5a416d97..64048cec41e0 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c | |||
@@ -934,11 +934,11 @@ static int caif_release(struct socket *sock) | |||
934 | } | 934 | } |
935 | 935 | ||
936 | /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ | 936 | /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ |
937 | static unsigned int caif_poll(struct file *file, | 937 | static __poll_t caif_poll(struct file *file, |
938 | struct socket *sock, poll_table *wait) | 938 | struct socket *sock, poll_table *wait) |
939 | { | 939 | { |
940 | struct sock *sk = sock->sk; | 940 | struct sock *sk = sock->sk; |
941 | unsigned int mask; | 941 | __poll_t mask; |
942 | struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); | 942 | struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); |
943 | 943 | ||
944 | sock_poll_wait(file, sk_sleep(sk), wait); | 944 | sock_poll_wait(file, sk_sleep(sk), wait); |
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 5cd44f001f64..1a082a946045 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c | |||
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, | |||
176 | dev_add_pack(&caif_usb_type); | 176 | dev_add_pack(&caif_usb_type); |
177 | pack_added = true; | 177 | pack_added = true; |
178 | 178 | ||
179 | strncpy(layer->name, dev->name, | 179 | strlcpy(layer->name, dev->name, sizeof(layer->name)); |
180 | sizeof(layer->name) - 1); | ||
181 | layer->name[sizeof(layer->name) - 1] = 0; | ||
182 | 180 | ||
183 | return 0; | 181 | return 0; |
184 | } | 182 | } |
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 273cb07f57d8..8f00bea093b9 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c | |||
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, | |||
268 | case CAIFPROTO_RFM: | 268 | case CAIFPROTO_RFM: |
269 | l->linktype = CFCTRL_SRV_RFM; | 269 | l->linktype = CFCTRL_SRV_RFM; |
270 | l->u.datagram.connid = s->sockaddr.u.rfm.connection_id; | 270 | l->u.datagram.connid = s->sockaddr.u.rfm.connection_id; |
271 | strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, | 271 | strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, |
272 | sizeof(l->u.rfm.volume)-1); | 272 | sizeof(l->u.rfm.volume)); |
273 | l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0; | ||
274 | break; | 273 | break; |
275 | case CAIFPROTO_UTIL: | 274 | case CAIFPROTO_UTIL: |
276 | l->linktype = CFCTRL_SRV_UTIL; | 275 | l->linktype = CFCTRL_SRV_UTIL; |
277 | l->endpoint = 0x00; | 276 | l->endpoint = 0x00; |
278 | l->chtype = 0x00; | 277 | l->chtype = 0x00; |
279 | strncpy(l->u.utility.name, s->sockaddr.u.util.service, | 278 | strlcpy(l->u.utility.name, s->sockaddr.u.util.service, |
280 | sizeof(l->u.utility.name)-1); | 279 | sizeof(l->u.utility.name)); |
281 | l->u.utility.name[sizeof(l->u.utility.name)-1] = 0; | ||
282 | caif_assert(sizeof(l->u.utility.name) > 10); | 280 | caif_assert(sizeof(l->u.utility.name) > 10); |
283 | l->u.utility.paramlen = s->param.size; | 281 | l->u.utility.paramlen = s->param.size; |
284 | if (l->u.utility.paramlen > sizeof(l->u.utility.params)) | 282 | if (l->u.utility.paramlen > sizeof(l->u.utility.params)) |
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index f5afda1abc76..655ed7032150 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c | |||
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer, | |||
258 | tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); | 258 | tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); |
259 | cfpkt_add_body(pkt, &tmp16, 2); | 259 | cfpkt_add_body(pkt, &tmp16, 2); |
260 | memset(utility_name, 0, sizeof(utility_name)); | 260 | memset(utility_name, 0, sizeof(utility_name)); |
261 | strncpy(utility_name, param->u.utility.name, | 261 | strlcpy(utility_name, param->u.utility.name, |
262 | UTILITY_NAME_LENGTH - 1); | 262 | UTILITY_NAME_LENGTH); |
263 | cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); | 263 | cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); |
264 | tmp8 = param->u.utility.paramlen; | 264 | tmp8 = param->u.utility.paramlen; |
265 | cfpkt_add_body(pkt, &tmp8, 1); | 265 | cfpkt_add_body(pkt, &tmp8, 1); |
diff --git a/net/can/af_can.c b/net/can/af_can.c index 003b2d6d655f..4d7f988a3130 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c | |||
@@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, | |||
721 | { | 721 | { |
722 | struct canfd_frame *cfd = (struct canfd_frame *)skb->data; | 722 | struct canfd_frame *cfd = (struct canfd_frame *)skb->data; |
723 | 723 | ||
724 | if (WARN_ONCE(dev->type != ARPHRD_CAN || | 724 | if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || |
725 | skb->len != CAN_MTU || | 725 | cfd->len > CAN_MAX_DLEN)) { |
726 | cfd->len > CAN_MAX_DLEN, | 726 | pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", |
727 | "PF_CAN: dropped non conform CAN skbuf: " | 727 | dev->type, skb->len, cfd->len); |
728 | "dev type %d, len %d, datalen %d\n", | 728 | kfree_skb(skb); |
729 | dev->type, skb->len, cfd->len)) | 729 | return NET_RX_DROP; |
730 | goto drop; | 730 | } |
731 | 731 | ||
732 | can_receive(skb, dev); | 732 | can_receive(skb, dev); |
733 | return NET_RX_SUCCESS; | 733 | return NET_RX_SUCCESS; |
734 | |||
735 | drop: | ||
736 | kfree_skb(skb); | ||
737 | return NET_RX_DROP; | ||
738 | } | 734 | } |
739 | 735 | ||
740 | static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, | 736 | static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, |
@@ -742,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, | |||
742 | { | 738 | { |
743 | struct canfd_frame *cfd = (struct canfd_frame *)skb->data; | 739 | struct canfd_frame *cfd = (struct canfd_frame *)skb->data; |
744 | 740 | ||
745 | if (WARN_ONCE(dev->type != ARPHRD_CAN || | 741 | if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || |
746 | skb->len != CANFD_MTU || | 742 | cfd->len > CANFD_MAX_DLEN)) { |
747 | cfd->len > CANFD_MAX_DLEN, | 743 | pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", |
748 | "PF_CAN: dropped non conform CAN FD skbuf: " | 744 | dev->type, skb->len, cfd->len); |
749 | "dev type %d, len %d, datalen %d\n", | 745 | kfree_skb(skb); |
750 | dev->type, skb->len, cfd->len)) | 746 | return NET_RX_DROP; |
751 | goto drop; | 747 | } |
752 | 748 | ||
753 | can_receive(skb, dev); | 749 | can_receive(skb, dev); |
754 | return NET_RX_SUCCESS; | 750 | return NET_RX_SUCCESS; |
755 | |||
756 | drop: | ||
757 | kfree_skb(skb); | ||
758 | return NET_RX_DROP; | ||
759 | } | 751 | } |
760 | 752 | ||
761 | /* | 753 | /* |
diff --git a/net/core/datagram.c b/net/core/datagram.c index 522873ed120b..b7d9293940b5 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -72,12 +72,10 @@ static inline int connection_based(struct sock *sk) | |||
72 | static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, | 72 | static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, |
73 | void *key) | 73 | void *key) |
74 | { | 74 | { |
75 | unsigned long bits = (unsigned long)key; | ||
76 | |||
77 | /* | 75 | /* |
78 | * Avoid a wakeup if event not interesting for us | 76 | * Avoid a wakeup if event not interesting for us |
79 | */ | 77 | */ |
80 | if (bits && !(bits & (POLLIN | POLLERR))) | 78 | if (key && !(key_to_poll(key) & (POLLIN | POLLERR))) |
81 | return 0; | 79 | return 0; |
82 | return autoremove_wake_function(wait, mode, sync, key); | 80 | return autoremove_wake_function(wait, mode, sync, key); |
83 | } | 81 | } |
@@ -833,11 +831,11 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); | |||
833 | * and you use a different write policy from sock_writeable() | 831 | * and you use a different write policy from sock_writeable() |
834 | * then please supply your own write_space callback. | 832 | * then please supply your own write_space callback. |
835 | */ | 833 | */ |
836 | unsigned int datagram_poll(struct file *file, struct socket *sock, | 834 | __poll_t datagram_poll(struct file *file, struct socket *sock, |
837 | poll_table *wait) | 835 | poll_table *wait) |
838 | { | 836 | { |
839 | struct sock *sk = sock->sk; | 837 | struct sock *sk = sock->sk; |
840 | unsigned int mask; | 838 | __poll_t mask; |
841 | 839 | ||
842 | sock_poll_wait(file, sk_sleep(sk), wait); | 840 | sock_poll_wait(file, sk_sleep(sk), wait); |
843 | mask = 0; | 841 | mask = 0; |
diff --git a/net/core/dev.c b/net/core/dev.c index 07ed21d64f92..613fb4066be7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) | |||
1106 | * when the name is long and there isn't enough space left | 1106 | * when the name is long and there isn't enough space left |
1107 | * for the digits, or if all bits are used. | 1107 | * for the digits, or if all bits are used. |
1108 | */ | 1108 | */ |
1109 | return p ? -ENFILE : -EEXIST; | 1109 | return -ENFILE; |
1110 | } | 1110 | } |
1111 | 1111 | ||
1112 | static int dev_alloc_name_ns(struct net *net, | 1112 | static int dev_alloc_name_ns(struct net *net, |
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name); | |||
1146 | int dev_get_valid_name(struct net *net, struct net_device *dev, | 1146 | int dev_get_valid_name(struct net *net, struct net_device *dev, |
1147 | const char *name) | 1147 | const char *name) |
1148 | { | 1148 | { |
1149 | return dev_alloc_name_ns(net, dev, name); | 1149 | BUG_ON(!net); |
1150 | |||
1151 | if (!dev_valid_name(name)) | ||
1152 | return -EINVAL; | ||
1153 | |||
1154 | if (strchr(name, '%')) | ||
1155 | return dev_alloc_name_ns(net, dev, name); | ||
1156 | else if (__dev_get_by_name(net, name)) | ||
1157 | return -EEXIST; | ||
1158 | else if (dev->name != name) | ||
1159 | strlcpy(dev->name, name, IFNAMSIZ); | ||
1160 | |||
1161 | return 0; | ||
1150 | } | 1162 | } |
1151 | EXPORT_SYMBOL(dev_get_valid_name); | 1163 | EXPORT_SYMBOL(dev_get_valid_name); |
1152 | 1164 | ||
@@ -3139,10 +3151,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) | |||
3139 | hdr_len = skb_transport_header(skb) - skb_mac_header(skb); | 3151 | hdr_len = skb_transport_header(skb) - skb_mac_header(skb); |
3140 | 3152 | ||
3141 | /* + transport layer */ | 3153 | /* + transport layer */ |
3142 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | 3154 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { |
3143 | hdr_len += tcp_hdrlen(skb); | 3155 | const struct tcphdr *th; |
3144 | else | 3156 | struct tcphdr _tcphdr; |
3145 | hdr_len += sizeof(struct udphdr); | 3157 | |
3158 | th = skb_header_pointer(skb, skb_transport_offset(skb), | ||
3159 | sizeof(_tcphdr), &_tcphdr); | ||
3160 | if (likely(th)) | ||
3161 | hdr_len += __tcp_hdrlen(th); | ||
3162 | } else { | ||
3163 | struct udphdr _udphdr; | ||
3164 | |||
3165 | if (skb_header_pointer(skb, skb_transport_offset(skb), | ||
3166 | sizeof(_udphdr), &_udphdr)) | ||
3167 | hdr_len += sizeof(struct udphdr); | ||
3168 | } | ||
3146 | 3169 | ||
3147 | if (shinfo->gso_type & SKB_GSO_DODGY) | 3170 | if (shinfo->gso_type & SKB_GSO_DODGY) |
3148 | gso_segs = DIV_ROUND_UP(skb->len - hdr_len, | 3171 | gso_segs = DIV_ROUND_UP(skb->len - hdr_len, |
@@ -3904,7 +3927,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
3904 | hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, | 3927 | hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, |
3905 | troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) | 3928 | troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) |
3906 | goto do_drop; | 3929 | goto do_drop; |
3907 | if (troom > 0 && __skb_linearize(skb)) | 3930 | if (skb_linearize(skb)) |
3908 | goto do_drop; | 3931 | goto do_drop; |
3909 | } | 3932 | } |
3910 | 3933 | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f8fcf450a36e..8225416911ae 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, | |||
770 | return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); | 770 | return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); |
771 | } | 771 | } |
772 | 772 | ||
773 | static void | ||
774 | warn_incomplete_ethtool_legacy_settings_conversion(const char *details) | ||
775 | { | ||
776 | char name[sizeof(current->comm)]; | ||
777 | |||
778 | pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", | ||
779 | get_task_comm(name, current), details); | ||
780 | } | ||
781 | |||
782 | /* Query device for its ethtool_cmd settings. | 773 | /* Query device for its ethtool_cmd settings. |
783 | * | 774 | * |
784 | * Backward compatibility note: for compatibility with legacy ethtool, | 775 | * Backward compatibility note: for compatibility with legacy ethtool, |
@@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) | |||
805 | &link_ksettings); | 796 | &link_ksettings); |
806 | if (err < 0) | 797 | if (err < 0) |
807 | return err; | 798 | return err; |
808 | if (!convert_link_ksettings_to_legacy_settings(&cmd, | 799 | convert_link_ksettings_to_legacy_settings(&cmd, |
809 | &link_ksettings)) | 800 | &link_ksettings); |
810 | warn_incomplete_ethtool_legacy_settings_conversion( | ||
811 | "link modes are only partially reported"); | ||
812 | 801 | ||
813 | /* send a sensible cmd tag back to user */ | 802 | /* send a sensible cmd tag back to user */ |
814 | cmd.cmd = ETHTOOL_GSET; | 803 | cmd.cmd = ETHTOOL_GSET; |
diff --git a/net/core/filter.c b/net/core/filter.c index 6a85e67fafce..1c0eb436671f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -458,6 +458,10 @@ do_pass: | |||
458 | convert_bpf_extensions(fp, &insn)) | 458 | convert_bpf_extensions(fp, &insn)) |
459 | break; | 459 | break; |
460 | 460 | ||
461 | if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || | ||
462 | fp->code == (BPF_ALU | BPF_MOD | BPF_X)) | ||
463 | *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); | ||
464 | |||
461 | *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); | 465 | *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); |
462 | break; | 466 | break; |
463 | 467 | ||
@@ -1054,11 +1058,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) | |||
1054 | */ | 1058 | */ |
1055 | goto out_err_free; | 1059 | goto out_err_free; |
1056 | 1060 | ||
1057 | /* We are guaranteed to never error here with cBPF to eBPF | ||
1058 | * transitions, since there's no issue with type compatibility | ||
1059 | * checks on program arrays. | ||
1060 | */ | ||
1061 | fp = bpf_prog_select_runtime(fp, &err); | 1061 | fp = bpf_prog_select_runtime(fp, &err); |
1062 | if (err) | ||
1063 | goto out_err_free; | ||
1062 | 1064 | ||
1063 | kfree(old_prog); | 1065 | kfree(old_prog); |
1064 | return fp; | 1066 | return fp; |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 15ce30063765..544bddf08e13 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c | |||
@@ -976,8 +976,8 @@ ip_proto_again: | |||
976 | out_good: | 976 | out_good: |
977 | ret = true; | 977 | ret = true; |
978 | 978 | ||
979 | key_control->thoff = (u16)nhoff; | ||
980 | out: | 979 | out: |
980 | key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); | ||
981 | key_basic->n_proto = proto; | 981 | key_basic->n_proto = proto; |
982 | key_basic->ip_proto = ip_proto; | 982 | key_basic->ip_proto = ip_proto; |
983 | 983 | ||
@@ -985,7 +985,6 @@ out: | |||
985 | 985 | ||
986 | out_bad: | 986 | out_bad: |
987 | ret = false; | 987 | ret = false; |
988 | key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); | ||
989 | goto out; | 988 | goto out; |
990 | } | 989 | } |
991 | EXPORT_SYMBOL(__skb_flow_dissect); | 990 | EXPORT_SYMBOL(__skb_flow_dissect); |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d1f5fe986edd..7f831711b6e0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, | |||
532 | if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) | 532 | if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) |
533 | nht = neigh_hash_grow(tbl, nht->hash_shift + 1); | 533 | nht = neigh_hash_grow(tbl, nht->hash_shift + 1); |
534 | 534 | ||
535 | hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); | 535 | hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); |
536 | 536 | ||
537 | if (n->parms->dead) { | 537 | if (n->parms->dead) { |
538 | rc = ERR_PTR(-EINVAL); | 538 | rc = ERR_PTR(-EINVAL); |
@@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, | |||
544 | n1 != NULL; | 544 | n1 != NULL; |
545 | n1 = rcu_dereference_protected(n1->next, | 545 | n1 = rcu_dereference_protected(n1->next, |
546 | lockdep_is_held(&tbl->lock))) { | 546 | lockdep_is_held(&tbl->lock))) { |
547 | if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { | 547 | if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { |
548 | if (want_ref) | 548 | if (want_ref) |
549 | neigh_hold(n1); | 549 | neigh_hold(n1); |
550 | rc = n1; | 550 | rc = n1; |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b797832565d3..60a71be75aea 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) | |||
267 | spin_lock_bh(&net->nsid_lock); | 267 | spin_lock_bh(&net->nsid_lock); |
268 | peer = idr_find(&net->netns_ids, id); | 268 | peer = idr_find(&net->netns_ids, id); |
269 | if (peer) | 269 | if (peer) |
270 | get_net(peer); | 270 | peer = maybe_get_net(peer); |
271 | spin_unlock_bh(&net->nsid_lock); | 271 | spin_unlock_bh(&net->nsid_lock); |
272 | rcu_read_unlock(); | 272 | rcu_read_unlock(); |
273 | 273 | ||
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 1c4810919a0a..b9057478d69c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/types.h> | 16 | #include <linux/types.h> |
17 | #include <linux/module.h> | ||
18 | #include <linux/string.h> | 17 | #include <linux/string.h> |
19 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
20 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dabba2a91fc8..778d7f03404a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev, | |||
1681 | return false; | 1681 | return false; |
1682 | } | 1682 | } |
1683 | 1683 | ||
1684 | static struct net *get_target_net(struct sk_buff *skb, int netnsid) | 1684 | static struct net *get_target_net(struct sock *sk, int netnsid) |
1685 | { | 1685 | { |
1686 | struct net *net; | 1686 | struct net *net; |
1687 | 1687 | ||
1688 | net = get_net_ns_by_id(sock_net(skb->sk), netnsid); | 1688 | net = get_net_ns_by_id(sock_net(sk), netnsid); |
1689 | if (!net) | 1689 | if (!net) |
1690 | return ERR_PTR(-EINVAL); | 1690 | return ERR_PTR(-EINVAL); |
1691 | 1691 | ||
1692 | /* For now, the caller is required to have CAP_NET_ADMIN in | 1692 | /* For now, the caller is required to have CAP_NET_ADMIN in |
1693 | * the user namespace owning the target net ns. | 1693 | * the user namespace owning the target net ns. |
1694 | */ | 1694 | */ |
1695 | if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { | 1695 | if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) { |
1696 | put_net(net); | 1696 | put_net(net); |
1697 | return ERR_PTR(-EACCES); | 1697 | return ERR_PTR(-EACCES); |
1698 | } | 1698 | } |
@@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
1733 | ifla_policy, NULL) >= 0) { | 1733 | ifla_policy, NULL) >= 0) { |
1734 | if (tb[IFLA_IF_NETNSID]) { | 1734 | if (tb[IFLA_IF_NETNSID]) { |
1735 | netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); | 1735 | netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); |
1736 | tgt_net = get_target_net(skb, netnsid); | 1736 | tgt_net = get_target_net(skb->sk, netnsid); |
1737 | if (IS_ERR(tgt_net)) { | 1737 | if (IS_ERR(tgt_net)) { |
1738 | tgt_net = net; | 1738 | tgt_net = net; |
1739 | netnsid = -1; | 1739 | netnsid = -1; |
@@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
2883 | 2883 | ||
2884 | if (tb[IFLA_IF_NETNSID]) { | 2884 | if (tb[IFLA_IF_NETNSID]) { |
2885 | netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); | 2885 | netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); |
2886 | tgt_net = get_target_net(skb, netnsid); | 2886 | tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); |
2887 | if (IS_ERR(tgt_net)) | 2887 | if (IS_ERR(tgt_net)) |
2888 | return PTR_ERR(tgt_net); | 2888 | return PTR_ERR(tgt_net); |
2889 | } | 2889 | } |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6b0ff396fa9d..08f574081315 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) | |||
1177 | int i, new_frags; | 1177 | int i, new_frags; |
1178 | u32 d_off; | 1178 | u32 d_off; |
1179 | 1179 | ||
1180 | if (!num_frags) | ||
1181 | return 0; | ||
1182 | |||
1183 | if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) | 1180 | if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) |
1184 | return -EINVAL; | 1181 | return -EINVAL; |
1185 | 1182 | ||
1183 | if (!num_frags) | ||
1184 | goto release; | ||
1185 | |||
1186 | new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1186 | new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1187 | for (i = 0; i < new_frags; i++) { | 1187 | for (i = 0; i < new_frags; i++) { |
1188 | page = alloc_page(gfp_mask); | 1188 | page = alloc_page(gfp_mask); |
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) | |||
1238 | __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); | 1238 | __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); |
1239 | skb_shinfo(skb)->nr_frags = new_frags; | 1239 | skb_shinfo(skb)->nr_frags = new_frags; |
1240 | 1240 | ||
1241 | release: | ||
1241 | skb_zcopy_clear(skb, false); | 1242 | skb_zcopy_clear(skb, false); |
1242 | return 0; | 1243 | return 0; |
1243 | } | 1244 | } |
@@ -3654,8 +3655,6 @@ normal: | |||
3654 | 3655 | ||
3655 | skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & | 3656 | skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & |
3656 | SKBTX_SHARED_FRAG; | 3657 | SKBTX_SHARED_FRAG; |
3657 | if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC)) | ||
3658 | goto err; | ||
3659 | 3658 | ||
3660 | while (pos < offset + len) { | 3659 | while (pos < offset + len) { |
3661 | if (i >= nfrags) { | 3660 | if (i >= nfrags) { |
@@ -3681,6 +3680,8 @@ normal: | |||
3681 | 3680 | ||
3682 | if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) | 3681 | if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) |
3683 | goto err; | 3682 | goto err; |
3683 | if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) | ||
3684 | goto err; | ||
3684 | 3685 | ||
3685 | *nskb_frag = *frag; | 3686 | *nskb_frag = *frag; |
3686 | __skb_frag_ref(nskb_frag); | 3687 | __skb_frag_ref(nskb_frag); |
@@ -4293,7 +4294,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, | |||
4293 | struct sock *sk = skb->sk; | 4294 | struct sock *sk = skb->sk; |
4294 | 4295 | ||
4295 | if (!skb_may_tx_timestamp(sk, false)) | 4296 | if (!skb_may_tx_timestamp(sk, false)) |
4296 | return; | 4297 | goto err; |
4297 | 4298 | ||
4298 | /* Take a reference to prevent skb_orphan() from freeing the socket, | 4299 | /* Take a reference to prevent skb_orphan() from freeing the socket, |
4299 | * but only if the socket refcount is not zero. | 4300 | * but only if the socket refcount is not zero. |
@@ -4302,7 +4303,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, | |||
4302 | *skb_hwtstamps(skb) = *hwtstamps; | 4303 | *skb_hwtstamps(skb) = *hwtstamps; |
4303 | __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); | 4304 | __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); |
4304 | sock_put(sk); | 4305 | sock_put(sk); |
4306 | return; | ||
4305 | } | 4307 | } |
4308 | |||
4309 | err: | ||
4310 | kfree_skb(skb); | ||
4306 | } | 4311 | } |
4307 | EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); | 4312 | EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); |
4308 | 4313 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index c0b5b2f17412..1211159718ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -2496,7 +2496,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr, | |||
2496 | } | 2496 | } |
2497 | EXPORT_SYMBOL(sock_no_getname); | 2497 | EXPORT_SYMBOL(sock_no_getname); |
2498 | 2498 | ||
2499 | unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) | 2499 | __poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) |
2500 | { | 2500 | { |
2501 | return 0; | 2501 | return 0; |
2502 | } | 2502 | } |
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 217f4e3b82f6..146b50e30659 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c | |||
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group) | |||
288 | case SKNLGRP_INET6_UDP_DESTROY: | 288 | case SKNLGRP_INET6_UDP_DESTROY: |
289 | if (!sock_diag_handlers[AF_INET6]) | 289 | if (!sock_diag_handlers[AF_INET6]) |
290 | request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, | 290 | request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, |
291 | NETLINK_SOCK_DIAG, AF_INET); | 291 | NETLINK_SOCK_DIAG, AF_INET6); |
292 | break; | 292 | break; |
293 | } | 293 | } |
294 | return 0; | 294 | return 0; |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cbc3dde4cfcc..a47ad6cd41c0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = { | |||
325 | .data = &bpf_jit_enable, | 325 | .data = &bpf_jit_enable, |
326 | .maxlen = sizeof(int), | 326 | .maxlen = sizeof(int), |
327 | .mode = 0644, | 327 | .mode = 0644, |
328 | #ifndef CONFIG_BPF_JIT_ALWAYS_ON | ||
328 | .proc_handler = proc_dointvec | 329 | .proc_handler = proc_dointvec |
330 | #else | ||
331 | .proc_handler = proc_dointvec_minmax, | ||
332 | .extra1 = &one, | ||
333 | .extra2 = &one, | ||
334 | #endif | ||
329 | }, | 335 | }, |
330 | # ifdef CONFIG_HAVE_EBPF_JIT | 336 | # ifdef CONFIG_HAVE_EBPF_JIT |
331 | { | 337 | { |
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 1c75cd1255f6..92d016e87816 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t) | |||
140 | 140 | ||
141 | ccid2_pr_debug("RTO_EXPIRE\n"); | 141 | ccid2_pr_debug("RTO_EXPIRE\n"); |
142 | 142 | ||
143 | if (sk->sk_state == DCCP_CLOSED) | ||
144 | goto out; | ||
145 | |||
143 | /* back-off timer */ | 146 | /* back-off timer */ |
144 | hc->tx_rto <<= 1; | 147 | hc->tx_rto <<= 1; |
145 | if (hc->tx_rto > DCCP_RTO_MAX) | 148 | if (hc->tx_rto > DCCP_RTO_MAX) |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 0c55ffb859bf..f91e3816806b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -316,7 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, | |||
316 | int flags, int *addr_len); | 316 | int flags, int *addr_len); |
317 | void dccp_shutdown(struct sock *sk, int how); | 317 | void dccp_shutdown(struct sock *sk, int how); |
318 | int inet_dccp_listen(struct socket *sock, int backlog); | 318 | int inet_dccp_listen(struct socket *sock, int backlog); |
319 | unsigned int dccp_poll(struct file *file, struct socket *sock, | 319 | __poll_t dccp_poll(struct file *file, struct socket *sock, |
320 | poll_table *wait); | 320 | poll_table *wait); |
321 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); | 321 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); |
322 | void dccp_req_err(struct sock *sk, u64 seq); | 322 | void dccp_req_err(struct sock *sk, u64 seq); |
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index abd07a443219..178bb9833311 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) | |||
57 | if (state == DCCP_TIME_WAIT) | 57 | if (state == DCCP_TIME_WAIT) |
58 | timeo = DCCP_TIMEWAIT_LEN; | 58 | timeo = DCCP_TIMEWAIT_LEN; |
59 | 59 | ||
60 | /* tw_timer is pinned, so we need to make sure BH are disabled | ||
61 | * in following section, otherwise timer handler could run before | ||
62 | * we complete the initialization. | ||
63 | */ | ||
64 | local_bh_disable(); | ||
60 | inet_twsk_schedule(tw, timeo); | 65 | inet_twsk_schedule(tw, timeo); |
61 | /* Linkage updates. */ | 66 | /* Linkage updates. */ |
62 | __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); | 67 | __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); |
63 | inet_twsk_put(tw); | 68 | inet_twsk_put(tw); |
69 | local_bh_enable(); | ||
64 | } else { | 70 | } else { |
65 | /* Sorry, if we're out of memory, just CLOSE this | 71 | /* Sorry, if we're out of memory, just CLOSE this |
66 | * socket up. We've got bigger problems than | 72 | * socket up. We've got bigger problems than |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b68168fcc06a..8b8db3d481bd 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags) | |||
259 | { | 259 | { |
260 | struct inet_connection_sock *icsk = inet_csk(sk); | 260 | struct inet_connection_sock *icsk = inet_csk(sk); |
261 | struct inet_sock *inet = inet_sk(sk); | 261 | struct inet_sock *inet = inet_sk(sk); |
262 | struct dccp_sock *dp = dccp_sk(sk); | ||
262 | int err = 0; | 263 | int err = 0; |
263 | const int old_state = sk->sk_state; | 264 | const int old_state = sk->sk_state; |
264 | 265 | ||
@@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags) | |||
278 | sk->sk_err = ECONNRESET; | 279 | sk->sk_err = ECONNRESET; |
279 | 280 | ||
280 | dccp_clear_xmit_timers(sk); | 281 | dccp_clear_xmit_timers(sk); |
282 | ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); | ||
283 | ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); | ||
284 | dp->dccps_hc_rx_ccid = NULL; | ||
285 | dp->dccps_hc_tx_ccid = NULL; | ||
281 | 286 | ||
282 | __skb_queue_purge(&sk->sk_receive_queue); | 287 | __skb_queue_purge(&sk->sk_receive_queue); |
283 | __skb_queue_purge(&sk->sk_write_queue); | 288 | __skb_queue_purge(&sk->sk_write_queue); |
@@ -313,10 +318,10 @@ EXPORT_SYMBOL_GPL(dccp_disconnect); | |||
313 | * take care of normal races (between the test and the event) and we don't | 318 | * take care of normal races (between the test and the event) and we don't |
314 | * go look at any of the socket buffers directly. | 319 | * go look at any of the socket buffers directly. |
315 | */ | 320 | */ |
316 | unsigned int dccp_poll(struct file *file, struct socket *sock, | 321 | __poll_t dccp_poll(struct file *file, struct socket *sock, |
317 | poll_table *wait) | 322 | poll_table *wait) |
318 | { | 323 | { |
319 | unsigned int mask; | 324 | __poll_t mask; |
320 | struct sock *sk = sock->sk; | 325 | struct sock *sk = sock->sk; |
321 | 326 | ||
322 | sock_poll_wait(file, sk_sleep(sk), wait); | 327 | sock_poll_wait(file, sk_sleep(sk), wait); |
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 518cea17b811..9c2dde819817 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c | |||
@@ -1209,11 +1209,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len | |||
1209 | } | 1209 | } |
1210 | 1210 | ||
1211 | 1211 | ||
1212 | static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table *wait) | 1212 | static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) |
1213 | { | 1213 | { |
1214 | struct sock *sk = sock->sk; | 1214 | struct sock *sk = sock->sk; |
1215 | struct dn_scp *scp = DN_SK(sk); | 1215 | struct dn_scp *scp = DN_SK(sk); |
1216 | int mask = datagram_poll(file, sock, wait); | 1216 | __poll_t mask = datagram_poll(file, sock, wait); |
1217 | 1217 | ||
1218 | if (!skb_queue_empty(&scp->other_receive_queue)) | 1218 | if (!skb_queue_empty(&scp->other_receive_queue)) |
1219 | mask |= POLLRDBAND; | 1219 | mask |= POLLRDBAND; |
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 44e3fb7dec8c..1e287420ff49 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c | |||
@@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) | |||
51 | INIT_LIST_HEAD(&dst->list); | 51 | INIT_LIST_HEAD(&dst->list); |
52 | list_add_tail(&dsa_tree_list, &dst->list); | 52 | list_add_tail(&dsa_tree_list, &dst->list); |
53 | 53 | ||
54 | /* Initialize the reference counter to the number of switches, not 1 */ | ||
55 | kref_init(&dst->refcount); | 54 | kref_init(&dst->refcount); |
56 | refcount_set(&dst->refcount.refcount, 0); | ||
57 | 55 | ||
58 | return dst; | 56 | return dst; |
59 | } | 57 | } |
@@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst) | |||
64 | kfree(dst); | 62 | kfree(dst); |
65 | } | 63 | } |
66 | 64 | ||
67 | static struct dsa_switch_tree *dsa_tree_touch(int index) | 65 | static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst) |
68 | { | 66 | { |
69 | struct dsa_switch_tree *dst; | 67 | if (dst) |
70 | 68 | kref_get(&dst->refcount); | |
71 | dst = dsa_tree_find(index); | ||
72 | if (!dst) | ||
73 | dst = dsa_tree_alloc(index); | ||
74 | 69 | ||
75 | return dst; | 70 | return dst; |
76 | } | 71 | } |
77 | 72 | ||
78 | static void dsa_tree_get(struct dsa_switch_tree *dst) | 73 | static struct dsa_switch_tree *dsa_tree_touch(int index) |
79 | { | 74 | { |
80 | kref_get(&dst->refcount); | 75 | struct dsa_switch_tree *dst; |
76 | |||
77 | dst = dsa_tree_find(index); | ||
78 | if (dst) | ||
79 | return dsa_tree_get(dst); | ||
80 | else | ||
81 | return dsa_tree_alloc(index); | ||
81 | } | 82 | } |
82 | 83 | ||
83 | static void dsa_tree_release(struct kref *ref) | 84 | static void dsa_tree_release(struct kref *ref) |
@@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref) | |||
91 | 92 | ||
92 | static void dsa_tree_put(struct dsa_switch_tree *dst) | 93 | static void dsa_tree_put(struct dsa_switch_tree *dst) |
93 | { | 94 | { |
94 | kref_put(&dst->refcount, dsa_tree_release); | 95 | if (dst) |
96 | kref_put(&dst->refcount, dsa_tree_release); | ||
95 | } | 97 | } |
96 | 98 | ||
97 | static bool dsa_port_is_dsa(struct dsa_port *port) | 99 | static bool dsa_port_is_dsa(struct dsa_port *port) |
@@ -765,6 +767,7 @@ int dsa_register_switch(struct dsa_switch *ds) | |||
765 | 767 | ||
766 | mutex_lock(&dsa2_mutex); | 768 | mutex_lock(&dsa2_mutex); |
767 | err = dsa_switch_probe(ds); | 769 | err = dsa_switch_probe(ds); |
770 | dsa_tree_put(ds->dst); | ||
768 | mutex_unlock(&dsa2_mutex); | 771 | mutex_unlock(&dsa2_mutex); |
769 | 772 | ||
770 | return err; | 773 | return err; |
diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d6e7a642493b..a95a55f79137 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/of_net.h> | 16 | #include <linux/of_net.h> |
17 | #include <linux/of_mdio.h> | 17 | #include <linux/of_mdio.h> |
18 | #include <linux/mdio.h> | 18 | #include <linux/mdio.h> |
19 | #include <linux/list.h> | ||
20 | #include <net/rtnetlink.h> | 19 | #include <net/rtnetlink.h> |
21 | #include <net/pkt_cls.h> | 20 | #include <net/pkt_cls.h> |
22 | #include <net/tc_act/tc_mirred.h> | 21 | #include <net/tc_act/tc_mirred.h> |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a8d7c5a9fb05..6c231b43974d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey) | |||
223 | 223 | ||
224 | static int arp_constructor(struct neighbour *neigh) | 224 | static int arp_constructor(struct neighbour *neigh) |
225 | { | 225 | { |
226 | __be32 addr = *(__be32 *)neigh->primary_key; | 226 | __be32 addr; |
227 | struct net_device *dev = neigh->dev; | 227 | struct net_device *dev = neigh->dev; |
228 | struct in_device *in_dev; | 228 | struct in_device *in_dev; |
229 | struct neigh_parms *parms; | 229 | struct neigh_parms *parms; |
230 | u32 inaddr_any = INADDR_ANY; | ||
230 | 231 | ||
232 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) | ||
233 | memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); | ||
234 | |||
235 | addr = *(__be32 *)neigh->primary_key; | ||
231 | rcu_read_lock(); | 236 | rcu_read_lock(); |
232 | in_dev = __in_dev_get_rcu(dev); | 237 | in_dev = __in_dev_get_rcu(dev); |
233 | if (!in_dev) { | 238 | if (!in_dev) { |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a4573bccd6da..7a93359fbc72 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1428,7 +1428,7 @@ skip: | |||
1428 | 1428 | ||
1429 | static bool inetdev_valid_mtu(unsigned int mtu) | 1429 | static bool inetdev_valid_mtu(unsigned int mtu) |
1430 | { | 1430 | { |
1431 | return mtu >= 68; | 1431 | return mtu >= IPV4_MIN_MTU; |
1432 | } | 1432 | } |
1433 | 1433 | ||
1434 | static void inetdev_send_gratuitous_arp(struct net_device *dev, | 1434 | static void inetdev_send_gratuitous_arp(struct net_device *dev, |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d57aa64fa7c7..61fe6e4d23fc 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -981,6 +981,7 @@ static int esp_init_state(struct xfrm_state *x) | |||
981 | 981 | ||
982 | switch (encap->encap_type) { | 982 | switch (encap->encap_type) { |
983 | default: | 983 | default: |
984 | err = -EINVAL; | ||
984 | goto error; | 985 | goto error; |
985 | case UDP_ENCAP_ESPINUDP: | 986 | case UDP_ENCAP_ESPINUDP: |
986 | x->props.header_len += sizeof(struct udphdr); | 987 | x->props.header_len += sizeof(struct udphdr); |
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index f8b918c766b0..29b333a62ab0 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c | |||
@@ -38,7 +38,8 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head, | |||
38 | __be32 spi; | 38 | __be32 spi; |
39 | int err; | 39 | int err; |
40 | 40 | ||
41 | skb_pull(skb, offset); | 41 | if (!pskb_pull(skb, offset)) |
42 | return NULL; | ||
42 | 43 | ||
43 | if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) | 44 | if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) |
44 | goto out; | 45 | goto out; |
@@ -121,6 +122,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, | |||
121 | if (!xo) | 122 | if (!xo) |
122 | goto out; | 123 | goto out; |
123 | 124 | ||
125 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) | ||
126 | goto out; | ||
127 | |||
124 | seq = xo->seq.low; | 128 | seq = xo->seq.low; |
125 | 129 | ||
126 | x = skb->sp->xvec[skb->sp->len - 1]; | 130 | x = skb->sp->xvec[skb->sp->len - 1]; |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f52d27a422c3..08259d078b1c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -1298,14 +1298,19 @@ err_table_hash_alloc: | |||
1298 | 1298 | ||
1299 | static void ip_fib_net_exit(struct net *net) | 1299 | static void ip_fib_net_exit(struct net *net) |
1300 | { | 1300 | { |
1301 | unsigned int i; | 1301 | int i; |
1302 | 1302 | ||
1303 | rtnl_lock(); | 1303 | rtnl_lock(); |
1304 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1304 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1305 | RCU_INIT_POINTER(net->ipv4.fib_main, NULL); | 1305 | RCU_INIT_POINTER(net->ipv4.fib_main, NULL); |
1306 | RCU_INIT_POINTER(net->ipv4.fib_default, NULL); | 1306 | RCU_INIT_POINTER(net->ipv4.fib_default, NULL); |
1307 | #endif | 1307 | #endif |
1308 | for (i = 0; i < FIB_TABLE_HASHSZ; i++) { | 1308 | /* Destroy the tables in reverse order to guarantee that the |
1309 | * local table, ID 255, is destroyed before the main table, ID | ||
1310 | * 254. This is necessary as the local table may contain | ||
1311 | * references to data contained in the main table. | ||
1312 | */ | ||
1313 | for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) { | ||
1309 | struct hlist_head *head = &net->ipv4.fib_table_hash[i]; | 1314 | struct hlist_head *head = &net->ipv4.fib_table_hash[i]; |
1310 | struct hlist_node *tmp; | 1315 | struct hlist_node *tmp; |
1311 | struct fib_table *tb; | 1316 | struct fib_table *tb; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f04d944f8abe..c586597da20d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) | |||
698 | 698 | ||
699 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { | 699 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { |
700 | int type = nla_type(nla); | 700 | int type = nla_type(nla); |
701 | u32 val; | 701 | u32 fi_val, val; |
702 | 702 | ||
703 | if (!type) | 703 | if (!type) |
704 | continue; | 704 | continue; |
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) | |||
715 | val = nla_get_u32(nla); | 715 | val = nla_get_u32(nla); |
716 | } | 716 | } |
717 | 717 | ||
718 | if (fi->fib_metrics->metrics[type - 1] != val) | 718 | fi_val = fi->fib_metrics->metrics[type - 1]; |
719 | if (type == RTAX_FEATURES) | ||
720 | fi_val &= ~DST_FEATURE_ECN_CA; | ||
721 | |||
722 | if (fi_val != val) | ||
719 | return false; | 723 | return false; |
720 | } | 724 | } |
721 | 725 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d1f8f302dbf3..2d49717a7421 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -89,6 +89,7 @@ | |||
89 | #include <linux/rtnetlink.h> | 89 | #include <linux/rtnetlink.h> |
90 | #include <linux/times.h> | 90 | #include <linux/times.h> |
91 | #include <linux/pkt_sched.h> | 91 | #include <linux/pkt_sched.h> |
92 | #include <linux/byteorder/generic.h> | ||
92 | 93 | ||
93 | #include <net/net_namespace.h> | 94 | #include <net/net_namespace.h> |
94 | #include <net/arp.h> | 95 | #include <net/arp.h> |
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) | |||
321 | return scount; | 322 | return scount; |
322 | } | 323 | } |
323 | 324 | ||
325 | /* source address selection per RFC 3376 section 4.2.13 */ | ||
326 | static __be32 igmpv3_get_srcaddr(struct net_device *dev, | ||
327 | const struct flowi4 *fl4) | ||
328 | { | ||
329 | struct in_device *in_dev = __in_dev_get_rcu(dev); | ||
330 | |||
331 | if (!in_dev) | ||
332 | return htonl(INADDR_ANY); | ||
333 | |||
334 | for_ifa(in_dev) { | ||
335 | if (fl4->saddr == ifa->ifa_local) | ||
336 | return fl4->saddr; | ||
337 | } endfor_ifa(in_dev); | ||
338 | |||
339 | return htonl(INADDR_ANY); | ||
340 | } | ||
341 | |||
324 | static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) | 342 | static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) |
325 | { | 343 | { |
326 | struct sk_buff *skb; | 344 | struct sk_buff *skb; |
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) | |||
368 | pip->frag_off = htons(IP_DF); | 386 | pip->frag_off = htons(IP_DF); |
369 | pip->ttl = 1; | 387 | pip->ttl = 1; |
370 | pip->daddr = fl4.daddr; | 388 | pip->daddr = fl4.daddr; |
371 | pip->saddr = fl4.saddr; | 389 | pip->saddr = igmpv3_get_srcaddr(dev, &fl4); |
372 | pip->protocol = IPPROTO_IGMP; | 390 | pip->protocol = IPPROTO_IGMP; |
373 | pip->tot_len = 0; /* filled in later */ | 391 | pip->tot_len = 0; /* filled in later */ |
374 | ip_select_ident(net, skb, NULL); | 392 | ip_select_ident(net, skb, NULL); |
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) | |||
404 | } | 422 | } |
405 | 423 | ||
406 | static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, | 424 | static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, |
407 | int type, struct igmpv3_grec **ppgr) | 425 | int type, struct igmpv3_grec **ppgr, unsigned int mtu) |
408 | { | 426 | { |
409 | struct net_device *dev = pmc->interface->dev; | 427 | struct net_device *dev = pmc->interface->dev; |
410 | struct igmpv3_report *pih; | 428 | struct igmpv3_report *pih; |
411 | struct igmpv3_grec *pgr; | 429 | struct igmpv3_grec *pgr; |
412 | 430 | ||
413 | if (!skb) | 431 | if (!skb) { |
414 | skb = igmpv3_newpack(dev, dev->mtu); | 432 | skb = igmpv3_newpack(dev, mtu); |
415 | if (!skb) | 433 | if (!skb) |
416 | return NULL; | 434 | return NULL; |
435 | } | ||
417 | pgr = skb_put(skb, sizeof(struct igmpv3_grec)); | 436 | pgr = skb_put(skb, sizeof(struct igmpv3_grec)); |
418 | pgr->grec_type = type; | 437 | pgr->grec_type = type; |
419 | pgr->grec_auxwords = 0; | 438 | pgr->grec_auxwords = 0; |
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, | |||
436 | struct igmpv3_grec *pgr = NULL; | 455 | struct igmpv3_grec *pgr = NULL; |
437 | struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; | 456 | struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; |
438 | int scount, stotal, first, isquery, truncate; | 457 | int scount, stotal, first, isquery, truncate; |
458 | unsigned int mtu; | ||
439 | 459 | ||
440 | if (pmc->multiaddr == IGMP_ALL_HOSTS) | 460 | if (pmc->multiaddr == IGMP_ALL_HOSTS) |
441 | return skb; | 461 | return skb; |
442 | if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) | 462 | if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) |
443 | return skb; | 463 | return skb; |
444 | 464 | ||
465 | mtu = READ_ONCE(dev->mtu); | ||
466 | if (mtu < IPV4_MIN_MTU) | ||
467 | return skb; | ||
468 | |||
445 | isquery = type == IGMPV3_MODE_IS_INCLUDE || | 469 | isquery = type == IGMPV3_MODE_IS_INCLUDE || |
446 | type == IGMPV3_MODE_IS_EXCLUDE; | 470 | type == IGMPV3_MODE_IS_EXCLUDE; |
447 | truncate = type == IGMPV3_MODE_IS_EXCLUDE || | 471 | truncate = type == IGMPV3_MODE_IS_EXCLUDE || |
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, | |||
462 | AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { | 486 | AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { |
463 | if (skb) | 487 | if (skb) |
464 | igmpv3_sendpack(skb); | 488 | igmpv3_sendpack(skb); |
465 | skb = igmpv3_newpack(dev, dev->mtu); | 489 | skb = igmpv3_newpack(dev, mtu); |
466 | } | 490 | } |
467 | } | 491 | } |
468 | first = 1; | 492 | first = 1; |
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, | |||
498 | pgr->grec_nsrcs = htons(scount); | 522 | pgr->grec_nsrcs = htons(scount); |
499 | if (skb) | 523 | if (skb) |
500 | igmpv3_sendpack(skb); | 524 | igmpv3_sendpack(skb); |
501 | skb = igmpv3_newpack(dev, dev->mtu); | 525 | skb = igmpv3_newpack(dev, mtu); |
502 | first = 1; | 526 | first = 1; |
503 | scount = 0; | 527 | scount = 0; |
504 | } | 528 | } |
505 | if (first) { | 529 | if (first) { |
506 | skb = add_grhead(skb, pmc, type, &pgr); | 530 | skb = add_grhead(skb, pmc, type, &pgr, mtu); |
507 | first = 0; | 531 | first = 0; |
508 | } | 532 | } |
509 | if (!skb) | 533 | if (!skb) |
@@ -538,7 +562,7 @@ empty_source: | |||
538 | igmpv3_sendpack(skb); | 562 | igmpv3_sendpack(skb); |
539 | skb = NULL; /* add_grhead will get a new one */ | 563 | skb = NULL; /* add_grhead will get a new one */ |
540 | } | 564 | } |
541 | skb = add_grhead(skb, pmc, type, &pgr); | 565 | skb = add_grhead(skb, pmc, type, &pgr, mtu); |
542 | } | 566 | } |
543 | } | 567 | } |
544 | if (pgr) | 568 | if (pgr) |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c690cd0d9b3f..b563e0c46bac 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -93,7 +93,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, | |||
93 | } | 93 | } |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * Enter the time wait state. | 96 | * Enter the time wait state. This is called with locally disabled BH. |
97 | * Essentially we whip up a timewait bucket, copy the relevant info into it | 97 | * Essentially we whip up a timewait bucket, copy the relevant info into it |
98 | * from the SK, and mess with hash chains and list linkage. | 98 | * from the SK, and mess with hash chains and list linkage. |
99 | */ | 99 | */ |
@@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
111 | */ | 111 | */ |
112 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, | 112 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, |
113 | hashinfo->bhash_size)]; | 113 | hashinfo->bhash_size)]; |
114 | spin_lock_bh(&bhead->lock); | 114 | spin_lock(&bhead->lock); |
115 | tw->tw_tb = icsk->icsk_bind_hash; | 115 | tw->tw_tb = icsk->icsk_bind_hash; |
116 | WARN_ON(!icsk->icsk_bind_hash); | 116 | WARN_ON(!icsk->icsk_bind_hash); |
117 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); | 117 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); |
@@ -137,7 +137,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
137 | if (__sk_nulls_del_node_init_rcu(sk)) | 137 | if (__sk_nulls_del_node_init_rcu(sk)) |
138 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 138 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
139 | 139 | ||
140 | spin_unlock_bh(lock); | 140 | spin_unlock(lock); |
141 | } | 141 | } |
142 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); | 142 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); |
143 | 143 | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index bb6239169b1a..45ffd3d045d2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, | |||
266 | len = gre_hdr_len + sizeof(*ershdr); | 266 | len = gre_hdr_len + sizeof(*ershdr); |
267 | 267 | ||
268 | if (unlikely(!pskb_may_pull(skb, len))) | 268 | if (unlikely(!pskb_may_pull(skb, len))) |
269 | return -ENOMEM; | 269 | return PACKET_REJECT; |
270 | 270 | ||
271 | iph = ip_hdr(skb); | 271 | iph = ip_hdr(skb); |
272 | ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); | 272 | ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); |
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = { | |||
1310 | static void ipgre_tap_setup(struct net_device *dev) | 1310 | static void ipgre_tap_setup(struct net_device *dev) |
1311 | { | 1311 | { |
1312 | ether_setup(dev); | 1312 | ether_setup(dev); |
1313 | dev->max_mtu = 0; | ||
1313 | dev->netdev_ops = &gre_tap_netdev_ops; | 1314 | dev->netdev_ops = &gre_tap_netdev_ops; |
1314 | dev->priv_flags &= ~IFF_TX_SKB_SHARING; | 1315 | dev->priv_flags &= ~IFF_TX_SKB_SHARING; |
1315 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; | 1316 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; |
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fe6fee728ce4..6d21068f9b55 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) | |||
349 | dev->needed_headroom = t_hlen + hlen; | 349 | dev->needed_headroom = t_hlen + hlen; |
350 | mtu -= (dev->hard_header_len + t_hlen); | 350 | mtu -= (dev->hard_header_len + t_hlen); |
351 | 351 | ||
352 | if (mtu < 68) | 352 | if (mtu < IPV4_MIN_MTU) |
353 | mtu = 68; | 353 | mtu = IPV4_MIN_MTU; |
354 | 354 | ||
355 | return mtu; | 355 | return mtu; |
356 | } | 356 | } |
@@ -520,8 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, | |||
520 | else | 520 | else |
521 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; | 521 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
522 | 522 | ||
523 | if (skb_dst(skb)) | 523 | skb_dst_update_pmtu(skb, mtu); |
524 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | ||
525 | 524 | ||
526 | if (skb->protocol == htons(ETH_P_IP)) { | 525 | if (skb->protocol == htons(ETH_P_IP)) { |
527 | if (!skb_is_gso(skb) && | 526 | if (!skb_is_gso(skb) && |
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 949f432a5f04..51b1669334fe 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c | |||
@@ -200,7 +200,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, | |||
200 | 200 | ||
201 | mtu = dst_mtu(dst); | 201 | mtu = dst_mtu(dst); |
202 | if (skb->len > mtu) { | 202 | if (skb->len > mtu) { |
203 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | 203 | skb_dst_update_pmtu(skb, mtu); |
204 | if (skb->protocol == htons(ETH_P_IP)) { | 204 | if (skb->protocol == htons(ETH_P_IP)) { |
205 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 205 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
206 | htonl(mtu)); | 206 | htonl(mtu)); |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f88221aebc9d..eb8246c39de0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -202,13 +202,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
202 | 202 | ||
203 | local_bh_disable(); | 203 | local_bh_disable(); |
204 | addend = xt_write_recseq_begin(); | 204 | addend = xt_write_recseq_begin(); |
205 | private = table->private; | 205 | private = READ_ONCE(table->private); /* Address dependency. */ |
206 | cpu = smp_processor_id(); | 206 | cpu = smp_processor_id(); |
207 | /* | ||
208 | * Ensure we load private-> members after we've fetched the base | ||
209 | * pointer. | ||
210 | */ | ||
211 | smp_read_barrier_depends(); | ||
212 | table_base = private->entries; | 207 | table_base = private->entries; |
213 | jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; | 208 | jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; |
214 | 209 | ||
@@ -373,7 +368,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, | |||
373 | if (!xt_find_jump_offset(offsets, newpos, | 368 | if (!xt_find_jump_offset(offsets, newpos, |
374 | newinfo->number)) | 369 | newinfo->number)) |
375 | return 0; | 370 | return 0; |
376 | e = entry0 + newpos; | ||
377 | } else { | 371 | } else { |
378 | /* ... this is a fallthru */ | 372 | /* ... this is a fallthru */ |
379 | newpos = pos + e->next_offset; | 373 | newpos = pos + e->next_offset; |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4cbe5e80f3bf..cc984d0e0c69 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -260,13 +260,8 @@ ipt_do_table(struct sk_buff *skb, | |||
260 | WARN_ON(!(table->valid_hooks & (1 << hook))); | 260 | WARN_ON(!(table->valid_hooks & (1 << hook))); |
261 | local_bh_disable(); | 261 | local_bh_disable(); |
262 | addend = xt_write_recseq_begin(); | 262 | addend = xt_write_recseq_begin(); |
263 | private = table->private; | 263 | private = READ_ONCE(table->private); /* Address dependency. */ |
264 | cpu = smp_processor_id(); | 264 | cpu = smp_processor_id(); |
265 | /* | ||
266 | * Ensure we load private-> members after we've fetched the base | ||
267 | * pointer. | ||
268 | */ | ||
269 | smp_read_barrier_depends(); | ||
270 | table_base = private->entries; | 265 | table_base = private->entries; |
271 | jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; | 266 | jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; |
272 | 267 | ||
@@ -439,7 +434,6 @@ mark_source_chains(const struct xt_table_info *newinfo, | |||
439 | if (!xt_find_jump_offset(offsets, newpos, | 434 | if (!xt_find_jump_offset(offsets, newpos, |
440 | newinfo->number)) | 435 | newinfo->number)) |
441 | return 0; | 436 | return 0; |
442 | e = entry0 + newpos; | ||
443 | } else { | 437 | } else { |
444 | /* ... this is a fallthru */ | 438 | /* ... this is a fallthru */ |
445 | newpos = pos + e->next_offset; | 439 | newpos = pos + e->next_offset; |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 17b4ca562944..69060e3abe85 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net) | |||
813 | 813 | ||
814 | static void clusterip_net_exit(struct net *net) | 814 | static void clusterip_net_exit(struct net *net) |
815 | { | 815 | { |
816 | #ifdef CONFIG_PROC_FS | ||
817 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); | 816 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
817 | #ifdef CONFIG_PROC_FS | ||
818 | proc_remove(cn->procdir); | 818 | proc_remove(cn->procdir); |
819 | cn->procdir = NULL; | 819 | cn->procdir = NULL; |
820 | #endif | 820 | #endif |
821 | nf_unregister_net_hook(net, &cip_arp_ops); | 821 | nf_unregister_net_hook(net, &cip_arp_ops); |
822 | WARN_ON_ONCE(!list_empty(&cn->configs)); | ||
822 | } | 823 | } |
823 | 824 | ||
824 | static struct pernet_operations clusterip_net_ops = { | 825 | static struct pernet_operations clusterip_net_ops = { |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 33b70bfd1122..5e570aa9e43b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -513,11 +513,18 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |||
513 | int err; | 513 | int err; |
514 | struct ip_options_data opt_copy; | 514 | struct ip_options_data opt_copy; |
515 | struct raw_frag_vec rfv; | 515 | struct raw_frag_vec rfv; |
516 | int hdrincl; | ||
516 | 517 | ||
517 | err = -EMSGSIZE; | 518 | err = -EMSGSIZE; |
518 | if (len > 0xFFFF) | 519 | if (len > 0xFFFF) |
519 | goto out; | 520 | goto out; |
520 | 521 | ||
522 | /* hdrincl should be READ_ONCE(inet->hdrincl) | ||
523 | * but READ_ONCE() doesn't work with bit fields. | ||
524 | * Doing this indirectly yields the same result. | ||
525 | */ | ||
526 | hdrincl = inet->hdrincl; | ||
527 | hdrincl = READ_ONCE(hdrincl); | ||
521 | /* | 528 | /* |
522 | * Check the flags. | 529 | * Check the flags. |
523 | */ | 530 | */ |
@@ -593,7 +600,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |||
593 | /* Linux does not mangle headers on raw sockets, | 600 | /* Linux does not mangle headers on raw sockets, |
594 | * so that IP options + IP_HDRINCL is non-sense. | 601 | * so that IP options + IP_HDRINCL is non-sense. |
595 | */ | 602 | */ |
596 | if (inet->hdrincl) | 603 | if (hdrincl) |
597 | goto done; | 604 | goto done; |
598 | if (ipc.opt->opt.srr) { | 605 | if (ipc.opt->opt.srr) { |
599 | if (!daddr) | 606 | if (!daddr) |
@@ -615,12 +622,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |||
615 | 622 | ||
616 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, | 623 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, |
617 | RT_SCOPE_UNIVERSE, | 624 | RT_SCOPE_UNIVERSE, |
618 | inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, | 625 | hdrincl ? IPPROTO_RAW : sk->sk_protocol, |
619 | inet_sk_flowi_flags(sk) | | 626 | inet_sk_flowi_flags(sk) | |
620 | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), | 627 | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), |
621 | daddr, saddr, 0, 0, sk->sk_uid); | 628 | daddr, saddr, 0, 0, sk->sk_uid); |
622 | 629 | ||
623 | if (!inet->hdrincl) { | 630 | if (!hdrincl) { |
624 | rfv.msg = msg; | 631 | rfv.msg = msg; |
625 | rfv.hlen = 0; | 632 | rfv.hlen = 0; |
626 | 633 | ||
@@ -645,7 +652,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |||
645 | goto do_confirm; | 652 | goto do_confirm; |
646 | back_from_confirm: | 653 | back_from_confirm: |
647 | 654 | ||
648 | if (inet->hdrincl) | 655 | if (hdrincl) |
649 | err = raw_send_hdrinc(sk, &fl4, msg, len, | 656 | err = raw_send_hdrinc(sk, &fl4, msg, len, |
650 | &rt, msg->msg_flags, &ipc.sockc); | 657 | &rt, msg->msg_flags, &ipc.sockc); |
651 | 658 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 43b69af242e1..4e153b23bcec 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -2762,6 +2762,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, | |||
2762 | if (err == 0 && rt->dst.error) | 2762 | if (err == 0 && rt->dst.error) |
2763 | err = -rt->dst.error; | 2763 | err = -rt->dst.error; |
2764 | } else { | 2764 | } else { |
2765 | fl4.flowi4_iif = LOOPBACK_IFINDEX; | ||
2765 | rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); | 2766 | rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); |
2766 | err = 0; | 2767 | err = 0; |
2767 | if (IS_ERR(rt)) | 2768 | if (IS_ERR(rt)) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bf97317e6c97..1b38b4282cc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -493,9 +493,9 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) | |||
493 | * take care of normal races (between the test and the event) and we don't | 493 | * take care of normal races (between the test and the event) and we don't |
494 | * go look at any of the socket buffers directly. | 494 | * go look at any of the socket buffers directly. |
495 | */ | 495 | */ |
496 | unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | 496 | __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) |
497 | { | 497 | { |
498 | unsigned int mask; | 498 | __poll_t mask; |
499 | struct sock *sk = sock->sk; | 499 | struct sock *sk = sock->sk; |
500 | const struct tcp_sock *tp = tcp_sk(sk); | 500 | const struct tcp_sock *tp = tcp_sk(sk); |
501 | int state; | 501 | int state; |
@@ -2298,6 +2298,9 @@ adjudge_to_death: | |||
2298 | tcp_send_active_reset(sk, GFP_ATOMIC); | 2298 | tcp_send_active_reset(sk, GFP_ATOMIC); |
2299 | __NET_INC_STATS(sock_net(sk), | 2299 | __NET_INC_STATS(sock_net(sk), |
2300 | LINUX_MIB_TCPABORTONMEMORY); | 2300 | LINUX_MIB_TCPABORTONMEMORY); |
2301 | } else if (!check_net(sock_net(sk))) { | ||
2302 | /* Not possible to send reset; just close */ | ||
2303 | tcp_set_state(sk, TCP_CLOSE); | ||
2301 | } | 2304 | } |
2302 | } | 2305 | } |
2303 | 2306 | ||
@@ -2412,6 +2415,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2412 | tp->snd_cwnd_cnt = 0; | 2415 | tp->snd_cwnd_cnt = 0; |
2413 | tp->window_clamp = 0; | 2416 | tp->window_clamp = 0; |
2414 | tcp_set_ca_state(sk, TCP_CA_Open); | 2417 | tcp_set_ca_state(sk, TCP_CA_Open); |
2418 | tp->is_sack_reneg = 0; | ||
2415 | tcp_clear_retrans(tp); | 2419 | tcp_clear_retrans(tp); |
2416 | inet_csk_delack_init(sk); | 2420 | inet_csk_delack_init(sk); |
2417 | /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 | 2421 | /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 |
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 69ee877574d0..8322f26e770e 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c | |||
@@ -110,7 +110,8 @@ struct bbr { | |||
110 | u32 lt_last_lost; /* LT intvl start: tp->lost */ | 110 | u32 lt_last_lost; /* LT intvl start: tp->lost */ |
111 | u32 pacing_gain:10, /* current gain for setting pacing rate */ | 111 | u32 pacing_gain:10, /* current gain for setting pacing rate */ |
112 | cwnd_gain:10, /* current gain for setting cwnd */ | 112 | cwnd_gain:10, /* current gain for setting cwnd */ |
113 | full_bw_cnt:3, /* number of rounds without large bw gains */ | 113 | full_bw_reached:1, /* reached full bw in Startup? */ |
114 | full_bw_cnt:2, /* number of rounds without large bw gains */ | ||
114 | cycle_idx:3, /* current index in pacing_gain cycle array */ | 115 | cycle_idx:3, /* current index in pacing_gain cycle array */ |
115 | has_seen_rtt:1, /* have we seen an RTT sample yet? */ | 116 | has_seen_rtt:1, /* have we seen an RTT sample yet? */ |
116 | unused_b:5; | 117 | unused_b:5; |
@@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk) | |||
180 | { | 181 | { |
181 | const struct bbr *bbr = inet_csk_ca(sk); | 182 | const struct bbr *bbr = inet_csk_ca(sk); |
182 | 183 | ||
183 | return bbr->full_bw_cnt >= bbr_full_bw_cnt; | 184 | return bbr->full_bw_reached; |
184 | } | 185 | } |
185 | 186 | ||
186 | /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ | 187 | /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ |
@@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk, | |||
717 | return; | 718 | return; |
718 | } | 719 | } |
719 | ++bbr->full_bw_cnt; | 720 | ++bbr->full_bw_cnt; |
721 | bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; | ||
720 | } | 722 | } |
721 | 723 | ||
722 | /* If pipe is probably full, drain the queue and then enter steady-state. */ | 724 | /* If pipe is probably full, drain the queue and then enter steady-state. */ |
@@ -850,6 +852,7 @@ static void bbr_init(struct sock *sk) | |||
850 | bbr->restore_cwnd = 0; | 852 | bbr->restore_cwnd = 0; |
851 | bbr->round_start = 0; | 853 | bbr->round_start = 0; |
852 | bbr->idle_restart = 0; | 854 | bbr->idle_restart = 0; |
855 | bbr->full_bw_reached = 0; | ||
853 | bbr->full_bw = 0; | 856 | bbr->full_bw = 0; |
854 | bbr->full_bw_cnt = 0; | 857 | bbr->full_bw_cnt = 0; |
855 | bbr->cycle_mstamp = 0; | 858 | bbr->cycle_mstamp = 0; |
@@ -871,6 +874,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk) | |||
871 | */ | 874 | */ |
872 | static u32 bbr_undo_cwnd(struct sock *sk) | 875 | static u32 bbr_undo_cwnd(struct sock *sk) |
873 | { | 876 | { |
877 | struct bbr *bbr = inet_csk_ca(sk); | ||
878 | |||
879 | bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ | ||
880 | bbr->full_bw_cnt = 0; | ||
881 | bbr_reset_lt_bw_sampling(sk); | ||
874 | return tcp_sk(sk)->snd_cwnd; | 882 | return tcp_sk(sk)->snd_cwnd; |
875 | } | 883 | } |
876 | 884 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 734cfc8ff76e..45f750e85714 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
508 | u32 new_sample = tp->rcv_rtt_est.rtt_us; | 508 | u32 new_sample = tp->rcv_rtt_est.rtt_us; |
509 | long m = sample; | 509 | long m = sample; |
510 | 510 | ||
511 | if (m == 0) | ||
512 | m = 1; | ||
513 | |||
514 | if (new_sample != 0) { | 511 | if (new_sample != 0) { |
515 | /* If we sample in larger samples in the non-timestamp | 512 | /* If we sample in larger samples in the non-timestamp |
516 | * case, we could grossly overestimate the RTT especially | 513 | * case, we could grossly overestimate the RTT especially |
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) | |||
547 | if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) | 544 | if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) |
548 | return; | 545 | return; |
549 | delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); | 546 | delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); |
547 | if (!delta_us) | ||
548 | delta_us = 1; | ||
550 | tcp_rcv_rtt_update(tp, delta_us, 1); | 549 | tcp_rcv_rtt_update(tp, delta_us, 1); |
551 | 550 | ||
552 | new_measure: | 551 | new_measure: |
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, | |||
563 | (TCP_SKB_CB(skb)->end_seq - | 562 | (TCP_SKB_CB(skb)->end_seq - |
564 | TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { | 563 | TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { |
565 | u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; | 564 | u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; |
566 | u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); | 565 | u32 delta_us; |
567 | 566 | ||
567 | if (!delta) | ||
568 | delta = 1; | ||
569 | delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); | ||
568 | tcp_rcv_rtt_update(tp, delta_us, 0); | 570 | tcp_rcv_rtt_update(tp, delta_us, 0); |
569 | } | 571 | } |
570 | } | 572 | } |
@@ -579,6 +581,7 @@ void tcp_rcv_space_adjust(struct sock *sk) | |||
579 | int time; | 581 | int time; |
580 | int copied; | 582 | int copied; |
581 | 583 | ||
584 | tcp_mstamp_refresh(tp); | ||
582 | time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); | 585 | time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); |
583 | if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) | 586 | if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) |
584 | return; | 587 | return; |
@@ -1941,6 +1944,8 @@ void tcp_enter_loss(struct sock *sk) | |||
1941 | if (is_reneg) { | 1944 | if (is_reneg) { |
1942 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); | 1945 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); |
1943 | tp->sacked_out = 0; | 1946 | tp->sacked_out = 0; |
1947 | /* Mark SACK reneging until we recover from this loss event. */ | ||
1948 | tp->is_sack_reneg = 1; | ||
1944 | } | 1949 | } |
1945 | tcp_clear_all_retrans_hints(tp); | 1950 | tcp_clear_all_retrans_hints(tp); |
1946 | 1951 | ||
@@ -2326,6 +2331,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) | |||
2326 | } | 2331 | } |
2327 | tp->snd_cwnd_stamp = tcp_jiffies32; | 2332 | tp->snd_cwnd_stamp = tcp_jiffies32; |
2328 | tp->undo_marker = 0; | 2333 | tp->undo_marker = 0; |
2334 | tp->rack.advanced = 1; /* Force RACK to re-exam losses */ | ||
2329 | } | 2335 | } |
2330 | 2336 | ||
2331 | static inline bool tcp_may_undo(const struct tcp_sock *tp) | 2337 | static inline bool tcp_may_undo(const struct tcp_sock *tp) |
@@ -2364,6 +2370,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) | |||
2364 | return true; | 2370 | return true; |
2365 | } | 2371 | } |
2366 | tcp_set_ca_state(sk, TCP_CA_Open); | 2372 | tcp_set_ca_state(sk, TCP_CA_Open); |
2373 | tp->is_sack_reneg = 0; | ||
2367 | return false; | 2374 | return false; |
2368 | } | 2375 | } |
2369 | 2376 | ||
@@ -2397,8 +2404,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) | |||
2397 | NET_INC_STATS(sock_net(sk), | 2404 | NET_INC_STATS(sock_net(sk), |
2398 | LINUX_MIB_TCPSPURIOUSRTOS); | 2405 | LINUX_MIB_TCPSPURIOUSRTOS); |
2399 | inet_csk(sk)->icsk_retransmits = 0; | 2406 | inet_csk(sk)->icsk_retransmits = 0; |
2400 | if (frto_undo || tcp_is_sack(tp)) | 2407 | if (frto_undo || tcp_is_sack(tp)) { |
2401 | tcp_set_ca_state(sk, TCP_CA_Open); | 2408 | tcp_set_ca_state(sk, TCP_CA_Open); |
2409 | tp->is_sack_reneg = 0; | ||
2410 | } | ||
2402 | return true; | 2411 | return true; |
2403 | } | 2412 | } |
2404 | return false; | 2413 | return false; |
@@ -3495,6 +3504,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3495 | struct tcp_sacktag_state sack_state; | 3504 | struct tcp_sacktag_state sack_state; |
3496 | struct rate_sample rs = { .prior_delivered = 0 }; | 3505 | struct rate_sample rs = { .prior_delivered = 0 }; |
3497 | u32 prior_snd_una = tp->snd_una; | 3506 | u32 prior_snd_una = tp->snd_una; |
3507 | bool is_sack_reneg = tp->is_sack_reneg; | ||
3498 | u32 ack_seq = TCP_SKB_CB(skb)->seq; | 3508 | u32 ack_seq = TCP_SKB_CB(skb)->seq; |
3499 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 3509 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
3500 | bool is_dupack = false; | 3510 | bool is_dupack = false; |
@@ -3611,7 +3621,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3611 | 3621 | ||
3612 | delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ | 3622 | delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ |
3613 | lost = tp->lost - lost; /* freshly marked lost */ | 3623 | lost = tp->lost - lost; /* freshly marked lost */ |
3614 | tcp_rate_gen(sk, delivered, lost, sack_state.rate); | 3624 | tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); |
3615 | tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); | 3625 | tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); |
3616 | tcp_xmit_recovery(sk, rexmit); | 3626 | tcp_xmit_recovery(sk, rexmit); |
3617 | return 1; | 3627 | return 1; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c6bc0c4d19c6..94e28350f420 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, | |||
848 | tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, | 848 | tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, |
849 | req->ts_recent, | 849 | req->ts_recent, |
850 | 0, | 850 | 0, |
851 | tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, | 851 | tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr, |
852 | AF_INET), | 852 | AF_INET), |
853 | inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, | 853 | inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, |
854 | ip_hdr(skb)->tos); | 854 | ip_hdr(skb)->tos); |
@@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb) | |||
1591 | } | 1591 | } |
1592 | EXPORT_SYMBOL(tcp_filter); | 1592 | EXPORT_SYMBOL(tcp_filter); |
1593 | 1593 | ||
1594 | static void tcp_v4_restore_cb(struct sk_buff *skb) | ||
1595 | { | ||
1596 | memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4, | ||
1597 | sizeof(struct inet_skb_parm)); | ||
1598 | } | ||
1599 | |||
1600 | static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, | ||
1601 | const struct tcphdr *th) | ||
1602 | { | ||
1603 | /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() | ||
1604 | * barrier() makes sure compiler wont play fool^Waliasing games. | ||
1605 | */ | ||
1606 | memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), | ||
1607 | sizeof(struct inet_skb_parm)); | ||
1608 | barrier(); | ||
1609 | |||
1610 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); | ||
1611 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + | ||
1612 | skb->len - th->doff * 4); | ||
1613 | TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); | ||
1614 | TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); | ||
1615 | TCP_SKB_CB(skb)->tcp_tw_isn = 0; | ||
1616 | TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); | ||
1617 | TCP_SKB_CB(skb)->sacked = 0; | ||
1618 | TCP_SKB_CB(skb)->has_rxtstamp = | ||
1619 | skb->tstamp || skb_hwtstamps(skb)->hwtstamp; | ||
1620 | } | ||
1621 | |||
1594 | /* | 1622 | /* |
1595 | * From tcp_input.c | 1623 | * From tcp_input.c |
1596 | */ | 1624 | */ |
@@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1631 | 1659 | ||
1632 | th = (const struct tcphdr *)skb->data; | 1660 | th = (const struct tcphdr *)skb->data; |
1633 | iph = ip_hdr(skb); | 1661 | iph = ip_hdr(skb); |
1634 | /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() | ||
1635 | * barrier() makes sure compiler wont play fool^Waliasing games. | ||
1636 | */ | ||
1637 | memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), | ||
1638 | sizeof(struct inet_skb_parm)); | ||
1639 | barrier(); | ||
1640 | |||
1641 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); | ||
1642 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + | ||
1643 | skb->len - th->doff * 4); | ||
1644 | TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); | ||
1645 | TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); | ||
1646 | TCP_SKB_CB(skb)->tcp_tw_isn = 0; | ||
1647 | TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); | ||
1648 | TCP_SKB_CB(skb)->sacked = 0; | ||
1649 | TCP_SKB_CB(skb)->has_rxtstamp = | ||
1650 | skb->tstamp || skb_hwtstamps(skb)->hwtstamp; | ||
1651 | |||
1652 | lookup: | 1662 | lookup: |
1653 | sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, | 1663 | sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, |
1654 | th->dest, sdif, &refcounted); | 1664 | th->dest, sdif, &refcounted); |
@@ -1679,14 +1689,19 @@ process: | |||
1679 | sock_hold(sk); | 1689 | sock_hold(sk); |
1680 | refcounted = true; | 1690 | refcounted = true; |
1681 | nsk = NULL; | 1691 | nsk = NULL; |
1682 | if (!tcp_filter(sk, skb)) | 1692 | if (!tcp_filter(sk, skb)) { |
1693 | th = (const struct tcphdr *)skb->data; | ||
1694 | iph = ip_hdr(skb); | ||
1695 | tcp_v4_fill_cb(skb, iph, th); | ||
1683 | nsk = tcp_check_req(sk, skb, req, false); | 1696 | nsk = tcp_check_req(sk, skb, req, false); |
1697 | } | ||
1684 | if (!nsk) { | 1698 | if (!nsk) { |
1685 | reqsk_put(req); | 1699 | reqsk_put(req); |
1686 | goto discard_and_relse; | 1700 | goto discard_and_relse; |
1687 | } | 1701 | } |
1688 | if (nsk == sk) { | 1702 | if (nsk == sk) { |
1689 | reqsk_put(req); | 1703 | reqsk_put(req); |
1704 | tcp_v4_restore_cb(skb); | ||
1690 | } else if (tcp_child_process(sk, nsk, skb)) { | 1705 | } else if (tcp_child_process(sk, nsk, skb)) { |
1691 | tcp_v4_send_reset(nsk, skb); | 1706 | tcp_v4_send_reset(nsk, skb); |
1692 | goto discard_and_relse; | 1707 | goto discard_and_relse; |
@@ -1712,6 +1727,7 @@ process: | |||
1712 | goto discard_and_relse; | 1727 | goto discard_and_relse; |
1713 | th = (const struct tcphdr *)skb->data; | 1728 | th = (const struct tcphdr *)skb->data; |
1714 | iph = ip_hdr(skb); | 1729 | iph = ip_hdr(skb); |
1730 | tcp_v4_fill_cb(skb, iph, th); | ||
1715 | 1731 | ||
1716 | skb->dev = NULL; | 1732 | skb->dev = NULL; |
1717 | 1733 | ||
@@ -1742,6 +1758,8 @@ no_tcp_socket: | |||
1742 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | 1758 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
1743 | goto discard_it; | 1759 | goto discard_it; |
1744 | 1760 | ||
1761 | tcp_v4_fill_cb(skb, iph, th); | ||
1762 | |||
1745 | if (tcp_checksum_complete(skb)) { | 1763 | if (tcp_checksum_complete(skb)) { |
1746 | csum_error: | 1764 | csum_error: |
1747 | __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); | 1765 | __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); |
@@ -1768,6 +1786,8 @@ do_time_wait: | |||
1768 | goto discard_it; | 1786 | goto discard_it; |
1769 | } | 1787 | } |
1770 | 1788 | ||
1789 | tcp_v4_fill_cb(skb, iph, th); | ||
1790 | |||
1771 | if (tcp_checksum_complete(skb)) { | 1791 | if (tcp_checksum_complete(skb)) { |
1772 | inet_twsk_put(inet_twsk(sk)); | 1792 | inet_twsk_put(inet_twsk(sk)); |
1773 | goto csum_error; | 1793 | goto csum_error; |
@@ -1784,6 +1804,7 @@ do_time_wait: | |||
1784 | if (sk2) { | 1804 | if (sk2) { |
1785 | inet_twsk_deschedule_put(inet_twsk(sk)); | 1805 | inet_twsk_deschedule_put(inet_twsk(sk)); |
1786 | sk = sk2; | 1806 | sk = sk2; |
1807 | tcp_v4_restore_cb(skb); | ||
1787 | refcounted = false; | 1808 | refcounted = false; |
1788 | goto process; | 1809 | goto process; |
1789 | } | 1810 | } |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e36eff0403f4..b079b619b60c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
310 | if (state == TCP_TIME_WAIT) | 310 | if (state == TCP_TIME_WAIT) |
311 | timeo = TCP_TIMEWAIT_LEN; | 311 | timeo = TCP_TIMEWAIT_LEN; |
312 | 312 | ||
313 | /* tw_timer is pinned, so we need to make sure BH are disabled | ||
314 | * in following section, otherwise timer handler could run before | ||
315 | * we complete the initialization. | ||
316 | */ | ||
317 | local_bh_disable(); | ||
313 | inet_twsk_schedule(tw, timeo); | 318 | inet_twsk_schedule(tw, timeo); |
314 | /* Linkage updates. */ | 319 | /* Linkage updates. */ |
315 | __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); | 320 | __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); |
316 | inet_twsk_put(tw); | 321 | inet_twsk_put(tw); |
322 | local_bh_enable(); | ||
317 | } else { | 323 | } else { |
318 | /* Sorry, if we're out of memory, just CLOSE this | 324 | /* Sorry, if we're out of memory, just CLOSE this |
319 | * socket up. We've got bigger problems than | 325 | * socket up. We've got bigger problems than |
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b6a2aa1dcf56..4d58e2ce0b5b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c | |||
@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, | |||
32 | static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, | 32 | static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, |
33 | netdev_features_t features) | 33 | netdev_features_t features) |
34 | { | 34 | { |
35 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) | ||
36 | return ERR_PTR(-EINVAL); | ||
37 | |||
35 | if (!pskb_may_pull(skb, sizeof(struct tcphdr))) | 38 | if (!pskb_may_pull(skb, sizeof(struct tcphdr))) |
36 | return ERR_PTR(-EINVAL); | 39 | return ERR_PTR(-EINVAL); |
37 | 40 | ||
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 3330a370d306..c61240e43923 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c | |||
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, | |||
106 | 106 | ||
107 | /* Update the connection delivery information and generate a rate sample. */ | 107 | /* Update the connection delivery information and generate a rate sample. */ |
108 | void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, | 108 | void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, |
109 | struct rate_sample *rs) | 109 | bool is_sack_reneg, struct rate_sample *rs) |
110 | { | 110 | { |
111 | struct tcp_sock *tp = tcp_sk(sk); | 111 | struct tcp_sock *tp = tcp_sk(sk); |
112 | u32 snd_us, ack_us; | 112 | u32 snd_us, ack_us; |
@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, | |||
124 | 124 | ||
125 | rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ | 125 | rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ |
126 | rs->losses = lost; /* freshly marked lost */ | 126 | rs->losses = lost; /* freshly marked lost */ |
127 | /* Return an invalid sample if no timing information is available. */ | 127 | /* Return an invalid sample if no timing information is available or |
128 | if (!rs->prior_mstamp) { | 128 | * in recovery from loss with SACK reneging. Rate samples taken during |
129 | * a SACK reneging event may overestimate bw by including packets that | ||
130 | * were SACKed before the reneg. | ||
131 | */ | ||
132 | if (!rs->prior_mstamp || is_sack_reneg) { | ||
129 | rs->delivered = -1; | 133 | rs->delivered = -1; |
130 | rs->interval_us = -1; | 134 | rs->interval_us = -1; |
131 | return; | 135 | return; |
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index d3ea89020c69..3a81720ac0c4 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c | |||
@@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) | |||
55 | * to queuing or delayed ACKs. | 55 | * to queuing or delayed ACKs. |
56 | */ | 56 | */ |
57 | reo_wnd = 1000; | 57 | reo_wnd = 1000; |
58 | if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { | 58 | if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) && |
59 | min_rtt != ~0U) { | ||
59 | reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); | 60 | reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); |
60 | reo_wnd = min(reo_wnd, tp->srtt_us >> 3); | 61 | reo_wnd = min(reo_wnd, tp->srtt_us >> 3); |
61 | } | 62 | } |
@@ -79,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) | |||
79 | */ | 80 | */ |
80 | remaining = tp->rack.rtt_us + reo_wnd - | 81 | remaining = tp->rack.rtt_us + reo_wnd - |
81 | tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); | 82 | tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); |
82 | if (remaining < 0) { | 83 | if (remaining <= 0) { |
83 | tcp_rack_mark_skb_lost(sk, skb); | 84 | tcp_rack_mark_skb_lost(sk, skb); |
84 | list_del_init(&skb->tcp_tsorted_anchor); | 85 | list_del_init(&skb->tcp_tsorted_anchor); |
85 | } else { | 86 | } else { |
86 | /* Record maximum wait time (+1 to avoid 0) */ | 87 | /* Record maximum wait time */ |
87 | *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); | 88 | *reo_timeout = max_t(u32, *reo_timeout, remaining); |
88 | } | 89 | } |
89 | } | 90 | } |
90 | } | 91 | } |
@@ -116,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, | |||
116 | { | 117 | { |
117 | u32 rtt_us; | 118 | u32 rtt_us; |
118 | 119 | ||
119 | if (tp->rack.mstamp && | ||
120 | !tcp_rack_sent_after(xmit_time, tp->rack.mstamp, | ||
121 | end_seq, tp->rack.end_seq)) | ||
122 | return; | ||
123 | |||
124 | rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); | 120 | rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); |
125 | if (sacked & TCPCB_RETRANS) { | 121 | if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { |
126 | /* If the sacked packet was retransmitted, it's ambiguous | 122 | /* If the sacked packet was retransmitted, it's ambiguous |
127 | * whether the retransmission or the original (or the prior | 123 | * whether the retransmission or the original (or the prior |
128 | * retransmission) was sacked. | 124 | * retransmission) was sacked. |
@@ -133,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, | |||
133 | * so it's at least one RTT (i.e., retransmission is at least | 129 | * so it's at least one RTT (i.e., retransmission is at least |
134 | * an RTT later). | 130 | * an RTT later). |
135 | */ | 131 | */ |
136 | if (rtt_us < tcp_min_rtt(tp)) | 132 | return; |
137 | return; | ||
138 | } | 133 | } |
139 | tp->rack.rtt_us = rtt_us; | ||
140 | tp->rack.mstamp = xmit_time; | ||
141 | tp->rack.end_seq = end_seq; | ||
142 | tp->rack.advanced = 1; | 134 | tp->rack.advanced = 1; |
135 | tp->rack.rtt_us = rtt_us; | ||
136 | if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp, | ||
137 | end_seq, tp->rack.end_seq)) { | ||
138 | tp->rack.mstamp = xmit_time; | ||
139 | tp->rack.end_seq = end_seq; | ||
140 | } | ||
143 | } | 141 | } |
144 | 142 | ||
145 | /* We have waited long enough to accommodate reordering. Mark the expired | 143 | /* We have waited long enough to accommodate reordering. Mark the expired |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 16df6dd44b98..388158c9d9f6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -48,11 +48,19 @@ static void tcp_write_err(struct sock *sk) | |||
48 | * to prevent DoS attacks. It is called when a retransmission timeout | 48 | * to prevent DoS attacks. It is called when a retransmission timeout |
49 | * or zero probe timeout occurs on orphaned socket. | 49 | * or zero probe timeout occurs on orphaned socket. |
50 | * | 50 | * |
51 | * Also close if our net namespace is exiting; in that case there is no | ||
52 | * hope of ever communicating again since all netns interfaces are already | ||
53 | * down (or about to be down), and we need to release our dst references, | ||
54 | * which have been moved to the netns loopback interface, so the namespace | ||
55 | * can finish exiting. This condition is only possible if we are a kernel | ||
56 | * socket, as those do not hold references to the namespace. | ||
57 | * | ||
51 | * Criteria is still not confirmed experimentally and may change. | 58 | * Criteria is still not confirmed experimentally and may change. |
52 | * We kill the socket, if: | 59 | * We kill the socket, if: |
53 | * 1. If number of orphaned sockets exceeds an administratively configured | 60 | * 1. If number of orphaned sockets exceeds an administratively configured |
54 | * limit. | 61 | * limit. |
55 | * 2. If we have strong memory pressure. | 62 | * 2. If we have strong memory pressure. |
63 | * 3. If our net namespace is exiting. | ||
56 | */ | 64 | */ |
57 | static int tcp_out_of_resources(struct sock *sk, bool do_reset) | 65 | static int tcp_out_of_resources(struct sock *sk, bool do_reset) |
58 | { | 66 | { |
@@ -81,6 +89,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) | |||
81 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); | 89 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); |
82 | return 1; | 90 | return 1; |
83 | } | 91 | } |
92 | |||
93 | if (!check_net(sock_net(sk))) { | ||
94 | /* Not possible to send reset; just close */ | ||
95 | tcp_done(sk); | ||
96 | return 1; | ||
97 | } | ||
98 | |||
84 | return 0; | 99 | return 0; |
85 | } | 100 | } |
86 | 101 | ||
@@ -264,6 +279,7 @@ void tcp_delack_timer_handler(struct sock *sk) | |||
264 | icsk->icsk_ack.pingpong = 0; | 279 | icsk->icsk_ack.pingpong = 0; |
265 | icsk->icsk_ack.ato = TCP_ATO_MIN; | 280 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
266 | } | 281 | } |
282 | tcp_mstamp_refresh(tcp_sk(sk)); | ||
267 | tcp_send_ack(sk); | 283 | tcp_send_ack(sk); |
268 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); | 284 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); |
269 | } | 285 | } |
@@ -632,6 +648,7 @@ static void tcp_keepalive_timer (struct timer_list *t) | |||
632 | goto out; | 648 | goto out; |
633 | } | 649 | } |
634 | 650 | ||
651 | tcp_mstamp_refresh(tp); | ||
635 | if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { | 652 | if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { |
636 | if (tp->linger2 >= 0) { | 653 | if (tp->linger2 >= 0) { |
637 | const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; | 654 | const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e4ff25c947c5..ef45adfc0edb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -2502,9 +2502,9 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, | |||
2502 | * but then block when reading it. Add special case code | 2502 | * but then block when reading it. Add special case code |
2503 | * to work around these arguably broken applications. | 2503 | * to work around these arguably broken applications. |
2504 | */ | 2504 | */ |
2505 | unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | 2505 | __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) |
2506 | { | 2506 | { |
2507 | unsigned int mask = datagram_poll(file, sock, wait); | 2507 | __poll_t mask = datagram_poll(file, sock, wait); |
2508 | struct sock *sk = sock->sk; | 2508 | struct sock *sk = sock->sk; |
2509 | 2509 | ||
2510 | if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) | 2510 | if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 01801b77bd0d..ea6e6e7df0ee 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c | |||
@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | |||
203 | goto out; | 203 | goto out; |
204 | } | 204 | } |
205 | 205 | ||
206 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) | ||
207 | goto out; | ||
208 | |||
206 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | 209 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
207 | goto out; | 210 | goto out; |
208 | 211 | ||
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index e50b7fea57ee..bcfc00e88756 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) | |||
23 | return xfrm4_extract_header(skb); | 23 | return xfrm4_extract_header(skb); |
24 | } | 24 | } |
25 | 25 | ||
26 | static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, | ||
27 | struct sk_buff *skb) | ||
28 | { | ||
29 | return dst_input(skb); | ||
30 | } | ||
31 | |||
26 | static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, | 32 | static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, |
27 | struct sk_buff *skb) | 33 | struct sk_buff *skb) |
28 | { | 34 | { |
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, | |||
33 | iph->tos, skb->dev)) | 39 | iph->tos, skb->dev)) |
34 | goto drop; | 40 | goto drop; |
35 | } | 41 | } |
36 | return dst_input(skb); | 42 | |
43 | if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2)) | ||
44 | goto drop; | ||
45 | |||
46 | return 0; | ||
37 | drop: | 47 | drop: |
38 | kfree_skb(skb); | 48 | kfree_skb(skb); |
39 | return NET_RX_DROP; | 49 | return NET_RX_DROP; |
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index e6265e2c274e..20ca486b3cad 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
@@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | |||
92 | 92 | ||
93 | skb_reset_network_header(skb); | 93 | skb_reset_network_header(skb); |
94 | skb_mac_header_rebuild(skb); | 94 | skb_mac_header_rebuild(skb); |
95 | eth_hdr(skb)->h_proto = skb->protocol; | ||
95 | 96 | ||
96 | err = 0; | 97 | err = 0; |
97 | 98 | ||
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c26f71234b9c..c9441ca45399 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c | |||
@@ -210,7 +210,6 @@ lookup_protocol: | |||
210 | np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; | 210 | np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; |
211 | np->mc_loop = 1; | 211 | np->mc_loop = 1; |
212 | np->pmtudisc = IPV6_PMTUDISC_WANT; | 212 | np->pmtudisc = IPV6_PMTUDISC_WANT; |
213 | np->autoflowlabel = ip6_default_np_autolabel(net); | ||
214 | np->repflow = net->ipv6.sysctl.flowlabel_reflect; | 213 | np->repflow = net->ipv6.sysctl.flowlabel_reflect; |
215 | sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; | 214 | sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; |
216 | 215 | ||
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index a902ff8f59be..1a7f00cd4803 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c | |||
@@ -890,13 +890,12 @@ static int esp6_init_state(struct xfrm_state *x) | |||
890 | x->props.header_len += IPV4_BEET_PHMAXLEN + | 890 | x->props.header_len += IPV4_BEET_PHMAXLEN + |
891 | (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); | 891 | (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); |
892 | break; | 892 | break; |
893 | default: | ||
893 | case XFRM_MODE_TRANSPORT: | 894 | case XFRM_MODE_TRANSPORT: |
894 | break; | 895 | break; |
895 | case XFRM_MODE_TUNNEL: | 896 | case XFRM_MODE_TUNNEL: |
896 | x->props.header_len += sizeof(struct ipv6hdr); | 897 | x->props.header_len += sizeof(struct ipv6hdr); |
897 | break; | 898 | break; |
898 | default: | ||
899 | goto error; | ||
900 | } | 899 | } |
901 | 900 | ||
902 | align = ALIGN(crypto_aead_blocksize(aead), 4); | 901 | align = ALIGN(crypto_aead_blocksize(aead), 4); |
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 333a478aa161..f52c314d4c97 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c | |||
@@ -60,7 +60,8 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, | |||
60 | int nhoff; | 60 | int nhoff; |
61 | int err; | 61 | int err; |
62 | 62 | ||
63 | skb_pull(skb, offset); | 63 | if (!pskb_pull(skb, offset)) |
64 | return NULL; | ||
64 | 65 | ||
65 | if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) | 66 | if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) |
66 | goto out; | 67 | goto out; |
@@ -148,6 +149,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, | |||
148 | if (!xo) | 149 | if (!xo) |
149 | goto out; | 150 | goto out; |
150 | 151 | ||
152 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) | ||
153 | goto out; | ||
154 | |||
151 | seq = xo->seq.low; | 155 | seq = xo->seq.low; |
152 | 156 | ||
153 | x = skb->sp->xvec[skb->sp->len - 1]; | 157 | x = skb->sp->xvec[skb->sp->len - 1]; |
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 83bd75713535..bc68eb661970 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c | |||
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, | |||
925 | sr_phdr->segments[0] = **addr_p; | 925 | sr_phdr->segments[0] = **addr_p; |
926 | *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; | 926 | *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; |
927 | 927 | ||
928 | if (sr_ihdr->hdrlen > hops * 2) { | ||
929 | int tlvs_offset, tlvs_length; | ||
930 | |||
931 | tlvs_offset = (1 + hops * 2) << 3; | ||
932 | tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3; | ||
933 | memcpy((char *)sr_phdr + tlvs_offset, | ||
934 | (char *)sr_ihdr + tlvs_offset, tlvs_length); | ||
935 | } | ||
936 | |||
928 | #ifdef CONFIG_IPV6_SEG6_HMAC | 937 | #ifdef CONFIG_IPV6_SEG6_HMAC |
929 | if (sr_has_hmac(sr_phdr)) { | 938 | if (sr_has_hmac(sr_phdr)) { |
930 | struct net *net = NULL; | 939 | struct net *net = NULL; |
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f5285f4e1d08..217683d40f12 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net, | |||
640 | if (!(fn->fn_flags & RTN_RTINFO)) { | 640 | if (!(fn->fn_flags & RTN_RTINFO)) { |
641 | RCU_INIT_POINTER(fn->leaf, NULL); | 641 | RCU_INIT_POINTER(fn->leaf, NULL); |
642 | rt6_release(leaf); | 642 | rt6_release(leaf); |
643 | /* remove null_entry in the root node */ | ||
644 | } else if (fn->fn_flags & RTN_TL_ROOT && | ||
645 | rcu_access_pointer(fn->leaf) == | ||
646 | net->ipv6.ip6_null_entry) { | ||
647 | RCU_INIT_POINTER(fn->leaf, NULL); | ||
643 | } | 648 | } |
644 | 649 | ||
645 | return fn; | 650 | return fn; |
@@ -1221,8 +1226,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, | |||
1221 | } | 1226 | } |
1222 | 1227 | ||
1223 | if (!rcu_access_pointer(fn->leaf)) { | 1228 | if (!rcu_access_pointer(fn->leaf)) { |
1224 | atomic_inc(&rt->rt6i_ref); | 1229 | if (fn->fn_flags & RTN_TL_ROOT) { |
1225 | rcu_assign_pointer(fn->leaf, rt); | 1230 | /* put back null_entry for root node */ |
1231 | rcu_assign_pointer(fn->leaf, | ||
1232 | info->nl_net->ipv6.ip6_null_entry); | ||
1233 | } else { | ||
1234 | atomic_inc(&rt->rt6i_ref); | ||
1235 | rcu_assign_pointer(fn->leaf, rt); | ||
1236 | } | ||
1226 | } | 1237 | } |
1227 | fn = sn; | 1238 | fn = sn; |
1228 | } | 1239 | } |
@@ -1241,23 +1252,28 @@ out: | |||
1241 | * If fib6_add_1 has cleared the old leaf pointer in the | 1252 | * If fib6_add_1 has cleared the old leaf pointer in the |
1242 | * super-tree leaf node we have to find a new one for it. | 1253 | * super-tree leaf node we have to find a new one for it. |
1243 | */ | 1254 | */ |
1244 | struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, | 1255 | if (pn != fn) { |
1245 | lockdep_is_held(&table->tb6_lock)); | 1256 | struct rt6_info *pn_leaf = |
1246 | if (pn != fn && pn_leaf == rt) { | 1257 | rcu_dereference_protected(pn->leaf, |
1247 | pn_leaf = NULL; | 1258 | lockdep_is_held(&table->tb6_lock)); |
1248 | RCU_INIT_POINTER(pn->leaf, NULL); | 1259 | if (pn_leaf == rt) { |
1249 | atomic_dec(&rt->rt6i_ref); | 1260 | pn_leaf = NULL; |
1250 | } | 1261 | RCU_INIT_POINTER(pn->leaf, NULL); |
1251 | if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { | 1262 | atomic_dec(&rt->rt6i_ref); |
1252 | pn_leaf = fib6_find_prefix(info->nl_net, table, pn); | ||
1253 | #if RT6_DEBUG >= 2 | ||
1254 | if (!pn_leaf) { | ||
1255 | WARN_ON(!pn_leaf); | ||
1256 | pn_leaf = info->nl_net->ipv6.ip6_null_entry; | ||
1257 | } | 1263 | } |
1264 | if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { | ||
1265 | pn_leaf = fib6_find_prefix(info->nl_net, table, | ||
1266 | pn); | ||
1267 | #if RT6_DEBUG >= 2 | ||
1268 | if (!pn_leaf) { | ||
1269 | WARN_ON(!pn_leaf); | ||
1270 | pn_leaf = | ||
1271 | info->nl_net->ipv6.ip6_null_entry; | ||
1272 | } | ||
1258 | #endif | 1273 | #endif |
1259 | atomic_inc(&pn_leaf->rt6i_ref); | 1274 | atomic_inc(&pn_leaf->rt6i_ref); |
1260 | rcu_assign_pointer(pn->leaf, pn_leaf); | 1275 | rcu_assign_pointer(pn->leaf, pn_leaf); |
1276 | } | ||
1261 | } | 1277 | } |
1262 | #endif | 1278 | #endif |
1263 | goto failure; | 1279 | goto failure; |
@@ -1265,13 +1281,17 @@ out: | |||
1265 | return err; | 1281 | return err; |
1266 | 1282 | ||
1267 | failure: | 1283 | failure: |
1268 | /* fn->leaf could be NULL if fn is an intermediate node and we | 1284 | /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if: |
1269 | * failed to add the new route to it in both subtree creation | 1285 | * 1. fn is an intermediate node and we failed to add the new |
1270 | * failure and fib6_add_rt2node() failure case. | 1286 | * route to it in both subtree creation failure and fib6_add_rt2node() |
1271 | * In both cases, fib6_repair_tree() should be called to fix | 1287 | * failure case. |
1272 | * fn->leaf. | 1288 | * 2. fn is the root node in the table and we fail to add the first |
1289 | * default route to it. | ||
1273 | */ | 1290 | */ |
1274 | if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) | 1291 | if (fn && |
1292 | (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) || | ||
1293 | (fn->fn_flags & RTN_TL_ROOT && | ||
1294 | !rcu_access_pointer(fn->leaf)))) | ||
1275 | fib6_repair_tree(info->nl_net, table, fn); | 1295 | fib6_repair_tree(info->nl_net, table, fn); |
1276 | /* Always release dst as dst->__refcnt is guaranteed | 1296 | /* Always release dst as dst->__refcnt is guaranteed |
1277 | * to be taken before entering this function | 1297 | * to be taken before entering this function |
@@ -1526,6 +1546,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net, | |||
1526 | struct fib6_walker *w; | 1546 | struct fib6_walker *w; |
1527 | int iter = 0; | 1547 | int iter = 0; |
1528 | 1548 | ||
1549 | /* Set fn->leaf to null_entry for root node. */ | ||
1550 | if (fn->fn_flags & RTN_TL_ROOT) { | ||
1551 | rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry); | ||
1552 | return fn; | ||
1553 | } | ||
1554 | |||
1529 | for (;;) { | 1555 | for (;;) { |
1530 | struct fib6_node *fn_r = rcu_dereference_protected(fn->right, | 1556 | struct fib6_node *fn_r = rcu_dereference_protected(fn->right, |
1531 | lockdep_is_held(&table->tb6_lock)); | 1557 | lockdep_is_held(&table->tb6_lock)); |
@@ -1680,10 +1706,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, | |||
1680 | } | 1706 | } |
1681 | read_unlock(&net->ipv6.fib6_walker_lock); | 1707 | read_unlock(&net->ipv6.fib6_walker_lock); |
1682 | 1708 | ||
1683 | /* If it was last route, expunge its radix tree node */ | 1709 | /* If it was last route, call fib6_repair_tree() to: |
1710 | * 1. For root node, put back null_entry as how the table was created. | ||
1711 | * 2. For other nodes, expunge its radix tree node. | ||
1712 | */ | ||
1684 | if (!rcu_access_pointer(fn->leaf)) { | 1713 | if (!rcu_access_pointer(fn->leaf)) { |
1685 | fn->fn_flags &= ~RTN_RTINFO; | 1714 | if (!(fn->fn_flags & RTN_TL_ROOT)) { |
1686 | net->ipv6.rt6_stats->fib_route_nodes--; | 1715 | fn->fn_flags &= ~RTN_RTINFO; |
1716 | net->ipv6.rt6_stats->fib_route_nodes--; | ||
1717 | } | ||
1687 | fn = fib6_repair_tree(net, table, fn); | 1718 | fn = fib6_repair_tree(net, table, fn); |
1688 | } | 1719 | } |
1689 | 1720 | ||
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4cfd8e0696fe..873549228ccb 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c | |||
@@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, | |||
337 | 337 | ||
338 | nt->dev = dev; | 338 | nt->dev = dev; |
339 | nt->net = dev_net(dev); | 339 | nt->net = dev_net(dev); |
340 | ip6gre_tnl_link_config(nt, 1); | ||
341 | 340 | ||
342 | if (register_netdevice(dev) < 0) | 341 | if (register_netdevice(dev) < 0) |
343 | goto failed_free; | 342 | goto failed_free; |
344 | 343 | ||
344 | ip6gre_tnl_link_config(nt, 1); | ||
345 | |||
345 | /* Can use a lockless transmit, unless we generate output sequences */ | 346 | /* Can use a lockless transmit, unless we generate output sequences */ |
346 | if (!(nt->parms.o_flags & TUNNEL_SEQ)) | 347 | if (!(nt->parms.o_flags & TUNNEL_SEQ)) |
347 | dev->features |= NETIF_F_LLTX; | 348 | dev->features |= NETIF_F_LLTX; |
@@ -1014,6 +1015,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev) | |||
1014 | eth_random_addr(dev->perm_addr); | 1015 | eth_random_addr(dev->perm_addr); |
1015 | } | 1016 | } |
1016 | 1017 | ||
1018 | #define GRE6_FEATURES (NETIF_F_SG | \ | ||
1019 | NETIF_F_FRAGLIST | \ | ||
1020 | NETIF_F_HIGHDMA | \ | ||
1021 | NETIF_F_HW_CSUM) | ||
1022 | |||
1023 | static void ip6gre_tnl_init_features(struct net_device *dev) | ||
1024 | { | ||
1025 | struct ip6_tnl *nt = netdev_priv(dev); | ||
1026 | |||
1027 | dev->features |= GRE6_FEATURES; | ||
1028 | dev->hw_features |= GRE6_FEATURES; | ||
1029 | |||
1030 | if (!(nt->parms.o_flags & TUNNEL_SEQ)) { | ||
1031 | /* TCP offload with GRE SEQ is not supported, nor | ||
1032 | * can we support 2 levels of outer headers requiring | ||
1033 | * an update. | ||
1034 | */ | ||
1035 | if (!(nt->parms.o_flags & TUNNEL_CSUM) || | ||
1036 | nt->encap.type == TUNNEL_ENCAP_NONE) { | ||
1037 | dev->features |= NETIF_F_GSO_SOFTWARE; | ||
1038 | dev->hw_features |= NETIF_F_GSO_SOFTWARE; | ||
1039 | } | ||
1040 | |||
1041 | /* Can use a lockless transmit, unless we generate | ||
1042 | * output sequences | ||
1043 | */ | ||
1044 | dev->features |= NETIF_F_LLTX; | ||
1045 | } | ||
1046 | } | ||
1047 | |||
1017 | static int ip6gre_tunnel_init_common(struct net_device *dev) | 1048 | static int ip6gre_tunnel_init_common(struct net_device *dev) |
1018 | { | 1049 | { |
1019 | struct ip6_tnl *tunnel; | 1050 | struct ip6_tnl *tunnel; |
@@ -1048,6 +1079,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) | |||
1048 | if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) | 1079 | if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) |
1049 | dev->mtu -= 8; | 1080 | dev->mtu -= 8; |
1050 | 1081 | ||
1082 | ip6gre_tnl_init_features(dev); | ||
1083 | |||
1051 | return 0; | 1084 | return 0; |
1052 | } | 1085 | } |
1053 | 1086 | ||
@@ -1271,7 +1304,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], | |||
1271 | 1304 | ||
1272 | static int ip6gre_tap_init(struct net_device *dev) | 1305 | static int ip6gre_tap_init(struct net_device *dev) |
1273 | { | 1306 | { |
1274 | struct ip6_tnl *tunnel; | ||
1275 | int ret; | 1307 | int ret; |
1276 | 1308 | ||
1277 | ret = ip6gre_tunnel_init_common(dev); | 1309 | ret = ip6gre_tunnel_init_common(dev); |
@@ -1280,10 +1312,6 @@ static int ip6gre_tap_init(struct net_device *dev) | |||
1280 | 1312 | ||
1281 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; | 1313 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; |
1282 | 1314 | ||
1283 | tunnel = netdev_priv(dev); | ||
1284 | |||
1285 | ip6gre_tnl_link_config(tunnel, 1); | ||
1286 | |||
1287 | return 0; | 1315 | return 0; |
1288 | } | 1316 | } |
1289 | 1317 | ||
@@ -1298,16 +1326,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { | |||
1298 | .ndo_get_iflink = ip6_tnl_get_iflink, | 1326 | .ndo_get_iflink = ip6_tnl_get_iflink, |
1299 | }; | 1327 | }; |
1300 | 1328 | ||
1301 | #define GRE6_FEATURES (NETIF_F_SG | \ | ||
1302 | NETIF_F_FRAGLIST | \ | ||
1303 | NETIF_F_HIGHDMA | \ | ||
1304 | NETIF_F_HW_CSUM) | ||
1305 | |||
1306 | static void ip6gre_tap_setup(struct net_device *dev) | 1329 | static void ip6gre_tap_setup(struct net_device *dev) |
1307 | { | 1330 | { |
1308 | 1331 | ||
1309 | ether_setup(dev); | 1332 | ether_setup(dev); |
1310 | 1333 | ||
1334 | dev->max_mtu = 0; | ||
1311 | dev->netdev_ops = &ip6gre_tap_netdev_ops; | 1335 | dev->netdev_ops = &ip6gre_tap_netdev_ops; |
1312 | dev->needs_free_netdev = true; | 1336 | dev->needs_free_netdev = true; |
1313 | dev->priv_destructor = ip6gre_dev_free; | 1337 | dev->priv_destructor = ip6gre_dev_free; |
@@ -1380,32 +1404,16 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, | |||
1380 | 1404 | ||
1381 | nt->dev = dev; | 1405 | nt->dev = dev; |
1382 | nt->net = dev_net(dev); | 1406 | nt->net = dev_net(dev); |
1383 | ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); | ||
1384 | |||
1385 | dev->features |= GRE6_FEATURES; | ||
1386 | dev->hw_features |= GRE6_FEATURES; | ||
1387 | |||
1388 | if (!(nt->parms.o_flags & TUNNEL_SEQ)) { | ||
1389 | /* TCP offload with GRE SEQ is not supported, nor | ||
1390 | * can we support 2 levels of outer headers requiring | ||
1391 | * an update. | ||
1392 | */ | ||
1393 | if (!(nt->parms.o_flags & TUNNEL_CSUM) || | ||
1394 | (nt->encap.type == TUNNEL_ENCAP_NONE)) { | ||
1395 | dev->features |= NETIF_F_GSO_SOFTWARE; | ||
1396 | dev->hw_features |= NETIF_F_GSO_SOFTWARE; | ||
1397 | } | ||
1398 | |||
1399 | /* Can use a lockless transmit, unless we generate | ||
1400 | * output sequences | ||
1401 | */ | ||
1402 | dev->features |= NETIF_F_LLTX; | ||
1403 | } | ||
1404 | 1407 | ||
1405 | err = register_netdevice(dev); | 1408 | err = register_netdevice(dev); |
1406 | if (err) | 1409 | if (err) |
1407 | goto out; | 1410 | goto out; |
1408 | 1411 | ||
1412 | ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); | ||
1413 | |||
1414 | if (tb[IFLA_MTU]) | ||
1415 | ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); | ||
1416 | |||
1409 | dev_hold(dev); | 1417 | dev_hold(dev); |
1410 | ip6gre_tunnel_link(ign, nt); | 1418 | ip6gre_tunnel_link(ign, nt); |
1411 | 1419 | ||
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5110a418cc4d..3763dc01e374 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) | |||
166 | !(IP6CB(skb)->flags & IP6SKB_REROUTED)); | 166 | !(IP6CB(skb)->flags & IP6SKB_REROUTED)); |
167 | } | 167 | } |
168 | 168 | ||
169 | bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) | ||
170 | { | ||
171 | if (!np->autoflowlabel_set) | ||
172 | return ip6_default_np_autolabel(net); | ||
173 | else | ||
174 | return np->autoflowlabel; | ||
175 | } | ||
176 | |||
169 | /* | 177 | /* |
170 | * xmit an sk_buff (used by TCP, SCTP and DCCP) | 178 | * xmit an sk_buff (used by TCP, SCTP and DCCP) |
171 | * Note : socket lock is not held for SYNACK packets, but might be modified | 179 | * Note : socket lock is not held for SYNACK packets, but might be modified |
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, | |||
230 | hlimit = ip6_dst_hoplimit(dst); | 238 | hlimit = ip6_dst_hoplimit(dst); |
231 | 239 | ||
232 | ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, | 240 | ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, |
233 | np->autoflowlabel, fl6)); | 241 | ip6_autoflowlabel(net, np), fl6)); |
234 | 242 | ||
235 | hdr->payload_len = htons(seg_len); | 243 | hdr->payload_len = htons(seg_len); |
236 | hdr->nexthdr = proto; | 244 | hdr->nexthdr = proto; |
@@ -1198,14 +1206,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, | |||
1198 | v6_cork->tclass = ipc6->tclass; | 1206 | v6_cork->tclass = ipc6->tclass; |
1199 | if (rt->dst.flags & DST_XFRM_TUNNEL) | 1207 | if (rt->dst.flags & DST_XFRM_TUNNEL) |
1200 | mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? | 1208 | mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? |
1201 | rt->dst.dev->mtu : dst_mtu(&rt->dst); | 1209 | READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); |
1202 | else | 1210 | else |
1203 | mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? | 1211 | mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? |
1204 | rt->dst.dev->mtu : dst_mtu(rt->dst.path); | 1212 | READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); |
1205 | if (np->frag_size < mtu) { | 1213 | if (np->frag_size < mtu) { |
1206 | if (np->frag_size) | 1214 | if (np->frag_size) |
1207 | mtu = np->frag_size; | 1215 | mtu = np->frag_size; |
1208 | } | 1216 | } |
1217 | if (mtu < IPV6_MIN_MTU) | ||
1218 | return -EINVAL; | ||
1209 | cork->base.fragsize = mtu; | 1219 | cork->base.fragsize = mtu; |
1210 | if (dst_allfrag(rt->dst.path)) | 1220 | if (dst_allfrag(rt->dst.path)) |
1211 | cork->base.flags |= IPCORK_ALLFRAG; | 1221 | cork->base.flags |= IPCORK_ALLFRAG; |
@@ -1626,7 +1636,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, | |||
1626 | 1636 | ||
1627 | ip6_flow_hdr(hdr, v6_cork->tclass, | 1637 | ip6_flow_hdr(hdr, v6_cork->tclass, |
1628 | ip6_make_flowlabel(net, skb, fl6->flowlabel, | 1638 | ip6_make_flowlabel(net, skb, fl6->flowlabel, |
1629 | np->autoflowlabel, fl6)); | 1639 | ip6_autoflowlabel(net, np), fl6)); |
1630 | hdr->hop_limit = v6_cork->hop_limit; | 1640 | hdr->hop_limit = v6_cork->hop_limit; |
1631 | hdr->nexthdr = proto; | 1641 | hdr->nexthdr = proto; |
1632 | hdr->saddr = fl6->saddr; | 1642 | hdr->saddr = fl6->saddr; |
@@ -1725,11 +1735,13 @@ struct sk_buff *ip6_make_skb(struct sock *sk, | |||
1725 | cork.base.flags = 0; | 1735 | cork.base.flags = 0; |
1726 | cork.base.addr = 0; | 1736 | cork.base.addr = 0; |
1727 | cork.base.opt = NULL; | 1737 | cork.base.opt = NULL; |
1738 | cork.base.dst = NULL; | ||
1728 | v6_cork.opt = NULL; | 1739 | v6_cork.opt = NULL; |
1729 | err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); | 1740 | err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); |
1730 | if (err) | 1741 | if (err) { |
1742 | ip6_cork_release(&cork, &v6_cork); | ||
1731 | return ERR_PTR(err); | 1743 | return ERR_PTR(err); |
1732 | 1744 | } | |
1733 | if (ipc6->dontfrag < 0) | 1745 | if (ipc6->dontfrag < 0) |
1734 | ipc6->dontfrag = inet6_sk(sk)->dontfrag; | 1746 | ipc6->dontfrag = inet6_sk(sk)->dontfrag; |
1735 | 1747 | ||
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3d3092adf1d2..1ee5584c3555 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c | |||
@@ -642,8 +642,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
642 | if (rel_info > dst_mtu(skb_dst(skb2))) | 642 | if (rel_info > dst_mtu(skb_dst(skb2))) |
643 | goto out; | 643 | goto out; |
644 | 644 | ||
645 | skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, | 645 | skb_dst_update_pmtu(skb2, rel_info); |
646 | rel_info); | ||
647 | } | 646 | } |
648 | 647 | ||
649 | icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); | 648 | icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); |
@@ -904,7 +903,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, | |||
904 | if (t->parms.collect_md) { | 903 | if (t->parms.collect_md) { |
905 | tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); | 904 | tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); |
906 | if (!tun_dst) | 905 | if (!tun_dst) |
907 | return 0; | 906 | goto drop; |
908 | } | 907 | } |
909 | ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, | 908 | ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, |
910 | log_ecn_error); | 909 | log_ecn_error); |
@@ -1074,10 +1073,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, | |||
1074 | memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); | 1073 | memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); |
1075 | neigh_release(neigh); | 1074 | neigh_release(neigh); |
1076 | } | 1075 | } |
1077 | } else if (!(t->parms.flags & | 1076 | } else if (t->parms.proto != 0 && !(t->parms.flags & |
1078 | (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { | 1077 | (IP6_TNL_F_USE_ORIG_TCLASS | |
1079 | /* enable the cache only only if the routing decision does | 1078 | IP6_TNL_F_USE_ORIG_FWMARK))) { |
1080 | * not depend on the current inner header value | 1079 | /* enable the cache only if neither the outer protocol nor the |
1080 | * routing decision depends on the current inner header value | ||
1081 | */ | 1081 | */ |
1082 | use_cache = true; | 1082 | use_cache = true; |
1083 | } | 1083 | } |
@@ -1123,10 +1123,14 @@ route_lookup: | |||
1123 | max_headroom += 8; | 1123 | max_headroom += 8; |
1124 | mtu -= 8; | 1124 | mtu -= 8; |
1125 | } | 1125 | } |
1126 | if (mtu < IPV6_MIN_MTU) | 1126 | if (skb->protocol == htons(ETH_P_IPV6)) { |
1127 | mtu = IPV6_MIN_MTU; | 1127 | if (mtu < IPV6_MIN_MTU) |
1128 | if (skb_dst(skb) && !t->parms.collect_md) | 1128 | mtu = IPV6_MIN_MTU; |
1129 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | 1129 | } else if (mtu < 576) { |
1130 | mtu = 576; | ||
1131 | } | ||
1132 | |||
1133 | skb_dst_update_pmtu(skb, mtu); | ||
1130 | if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { | 1134 | if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { |
1131 | *pmtu = mtu; | 1135 | *pmtu = mtu; |
1132 | err = -EMSGSIZE; | 1136 | err = -EMSGSIZE; |
@@ -1671,11 +1675,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) | |||
1671 | { | 1675 | { |
1672 | struct ip6_tnl *tnl = netdev_priv(dev); | 1676 | struct ip6_tnl *tnl = netdev_priv(dev); |
1673 | 1677 | ||
1674 | if (tnl->parms.proto == IPPROTO_IPIP) { | 1678 | if (tnl->parms.proto == IPPROTO_IPV6) { |
1675 | if (new_mtu < ETH_MIN_MTU) | 1679 | if (new_mtu < IPV6_MIN_MTU) |
1676 | return -EINVAL; | 1680 | return -EINVAL; |
1677 | } else { | 1681 | } else { |
1678 | if (new_mtu < IPV6_MIN_MTU) | 1682 | if (new_mtu < ETH_MIN_MTU) |
1679 | return -EINVAL; | 1683 | return -EINVAL; |
1680 | } | 1684 | } |
1681 | if (new_mtu > 0xFFF8 - dev->hard_header_len) | 1685 | if (new_mtu > 0xFFF8 - dev->hard_header_len) |
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index dbb74f3c57a7..8c184f84f353 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c | |||
@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) | |||
483 | 483 | ||
484 | mtu = dst_mtu(dst); | 484 | mtu = dst_mtu(dst); |
485 | if (!skb->ignore_df && skb->len > mtu) { | 485 | if (!skb->ignore_df && skb->len > mtu) { |
486 | skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); | 486 | skb_dst_update_pmtu(skb, mtu); |
487 | 487 | ||
488 | if (skb->protocol == htons(ETH_P_IPV6)) { | 488 | if (skb->protocol == htons(ETH_P_IPV6)) { |
489 | if (mtu < IPV6_MIN_MTU) | 489 | if (mtu < IPV6_MIN_MTU) |
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b9404feabd78..e8ffb5b5d84e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c | |||
@@ -886,6 +886,7 @@ pref_skip_coa: | |||
886 | break; | 886 | break; |
887 | case IPV6_AUTOFLOWLABEL: | 887 | case IPV6_AUTOFLOWLABEL: |
888 | np->autoflowlabel = valbool; | 888 | np->autoflowlabel = valbool; |
889 | np->autoflowlabel_set = 1; | ||
889 | retv = 0; | 890 | retv = 0; |
890 | break; | 891 | break; |
891 | case IPV6_RECVFRAGSIZE: | 892 | case IPV6_RECVFRAGSIZE: |
@@ -1335,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, | |||
1335 | break; | 1336 | break; |
1336 | 1337 | ||
1337 | case IPV6_AUTOFLOWLABEL: | 1338 | case IPV6_AUTOFLOWLABEL: |
1338 | val = np->autoflowlabel; | 1339 | val = ip6_autoflowlabel(sock_net(sk), np); |
1339 | break; | 1340 | break; |
1340 | 1341 | ||
1341 | case IPV6_RECVFRAGSIZE: | 1342 | case IPV6_RECVFRAGSIZE: |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index fc6d7d143f2c..844642682b83 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) | |||
1682 | } | 1682 | } |
1683 | 1683 | ||
1684 | static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, | 1684 | static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, |
1685 | int type, struct mld2_grec **ppgr) | 1685 | int type, struct mld2_grec **ppgr, unsigned int mtu) |
1686 | { | 1686 | { |
1687 | struct net_device *dev = pmc->idev->dev; | ||
1688 | struct mld2_report *pmr; | 1687 | struct mld2_report *pmr; |
1689 | struct mld2_grec *pgr; | 1688 | struct mld2_grec *pgr; |
1690 | 1689 | ||
1691 | if (!skb) | 1690 | if (!skb) { |
1692 | skb = mld_newpack(pmc->idev, dev->mtu); | 1691 | skb = mld_newpack(pmc->idev, mtu); |
1693 | if (!skb) | 1692 | if (!skb) |
1694 | return NULL; | 1693 | return NULL; |
1694 | } | ||
1695 | pgr = skb_put(skb, sizeof(struct mld2_grec)); | 1695 | pgr = skb_put(skb, sizeof(struct mld2_grec)); |
1696 | pgr->grec_type = type; | 1696 | pgr->grec_type = type; |
1697 | pgr->grec_auxwords = 0; | 1697 | pgr->grec_auxwords = 0; |
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, | |||
1714 | struct mld2_grec *pgr = NULL; | 1714 | struct mld2_grec *pgr = NULL; |
1715 | struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; | 1715 | struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; |
1716 | int scount, stotal, first, isquery, truncate; | 1716 | int scount, stotal, first, isquery, truncate; |
1717 | unsigned int mtu; | ||
1717 | 1718 | ||
1718 | if (pmc->mca_flags & MAF_NOREPORT) | 1719 | if (pmc->mca_flags & MAF_NOREPORT) |
1719 | return skb; | 1720 | return skb; |
1720 | 1721 | ||
1722 | mtu = READ_ONCE(dev->mtu); | ||
1723 | if (mtu < IPV6_MIN_MTU) | ||
1724 | return skb; | ||
1725 | |||
1721 | isquery = type == MLD2_MODE_IS_INCLUDE || | 1726 | isquery = type == MLD2_MODE_IS_INCLUDE || |
1722 | type == MLD2_MODE_IS_EXCLUDE; | 1727 | type == MLD2_MODE_IS_EXCLUDE; |
1723 | truncate = type == MLD2_MODE_IS_EXCLUDE || | 1728 | truncate = type == MLD2_MODE_IS_EXCLUDE || |
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, | |||
1738 | AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { | 1743 | AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { |
1739 | if (skb) | 1744 | if (skb) |
1740 | mld_sendpack(skb); | 1745 | mld_sendpack(skb); |
1741 | skb = mld_newpack(idev, dev->mtu); | 1746 | skb = mld_newpack(idev, mtu); |
1742 | } | 1747 | } |
1743 | } | 1748 | } |
1744 | first = 1; | 1749 | first = 1; |
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, | |||
1774 | pgr->grec_nsrcs = htons(scount); | 1779 | pgr->grec_nsrcs = htons(scount); |
1775 | if (skb) | 1780 | if (skb) |
1776 | mld_sendpack(skb); | 1781 | mld_sendpack(skb); |
1777 | skb = mld_newpack(idev, dev->mtu); | 1782 | skb = mld_newpack(idev, mtu); |
1778 | first = 1; | 1783 | first = 1; |
1779 | scount = 0; | 1784 | scount = 0; |
1780 | } | 1785 | } |
1781 | if (first) { | 1786 | if (first) { |
1782 | skb = add_grhead(skb, pmc, type, &pgr); | 1787 | skb = add_grhead(skb, pmc, type, &pgr, mtu); |
1783 | first = 0; | 1788 | first = 0; |
1784 | } | 1789 | } |
1785 | if (!skb) | 1790 | if (!skb) |
@@ -1814,7 +1819,7 @@ empty_source: | |||
1814 | mld_sendpack(skb); | 1819 | mld_sendpack(skb); |
1815 | skb = NULL; /* add_grhead will get a new one */ | 1820 | skb = NULL; /* add_grhead will get a new one */ |
1816 | } | 1821 | } |
1817 | skb = add_grhead(skb, pmc, type, &pgr); | 1822 | skb = add_grhead(skb, pmc, type, &pgr, mtu); |
1818 | } | 1823 | } |
1819 | } | 1824 | } |
1820 | if (pgr) | 1825 | if (pgr) |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f06e25065a34..66a8c69a3db4 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -282,12 +282,7 @@ ip6t_do_table(struct sk_buff *skb, | |||
282 | 282 | ||
283 | local_bh_disable(); | 283 | local_bh_disable(); |
284 | addend = xt_write_recseq_begin(); | 284 | addend = xt_write_recseq_begin(); |
285 | private = table->private; | 285 | private = READ_ONCE(table->private); /* Address dependency. */ |
286 | /* | ||
287 | * Ensure we load private-> members after we've fetched the base | ||
288 | * pointer. | ||
289 | */ | ||
290 | smp_read_barrier_depends(); | ||
291 | cpu = smp_processor_id(); | 286 | cpu = smp_processor_id(); |
292 | table_base = private->entries; | 287 | table_base = private->entries; |
293 | jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; | 288 | jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; |
@@ -458,7 +453,6 @@ mark_source_chains(const struct xt_table_info *newinfo, | |||
458 | if (!xt_find_jump_offset(offsets, newpos, | 453 | if (!xt_find_jump_offset(offsets, newpos, |
459 | newinfo->number)) | 454 | newinfo->number)) |
460 | return 0; | 455 | return 0; |
461 | e = entry0 + newpos; | ||
462 | } else { | 456 | } else { |
463 | /* ... this is a fallthru */ | 457 | /* ... this is a fallthru */ |
464 | newpos = pos + e->next_offset; | 458 | newpos = pos + e->next_offset; |
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 2b1a15846f9a..92c0047e7e33 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c | |||
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) | |||
33 | 33 | ||
34 | if (range->flags & NF_NAT_RANGE_MAP_IPS) | 34 | if (range->flags & NF_NAT_RANGE_MAP_IPS) |
35 | return -EINVAL; | 35 | return -EINVAL; |
36 | return 0; | 36 | return nf_ct_netns_get(par->net, par->family); |
37 | } | ||
38 | |||
39 | static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par) | ||
40 | { | ||
41 | nf_ct_netns_put(par->net, par->family); | ||
37 | } | 42 | } |
38 | 43 | ||
39 | static struct xt_target masquerade_tg6_reg __read_mostly = { | 44 | static struct xt_target masquerade_tg6_reg __read_mostly = { |
40 | .name = "MASQUERADE", | 45 | .name = "MASQUERADE", |
41 | .family = NFPROTO_IPV6, | 46 | .family = NFPROTO_IPV6, |
42 | .checkentry = masquerade_tg6_checkentry, | 47 | .checkentry = masquerade_tg6_checkentry, |
48 | .destroy = masquerade_tg6_destroy, | ||
43 | .target = masquerade_tg6, | 49 | .target = masquerade_tg6, |
44 | .targetsize = sizeof(struct nf_nat_range), | 50 | .targetsize = sizeof(struct nf_nat_range), |
45 | .table = "nat", | 51 | .table = "nat", |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7a8d1500d374..0458b761f3c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, | |||
2336 | } | 2336 | } |
2337 | 2337 | ||
2338 | rt->dst.flags |= DST_HOST; | 2338 | rt->dst.flags |= DST_HOST; |
2339 | rt->dst.input = ip6_input; | ||
2339 | rt->dst.output = ip6_output; | 2340 | rt->dst.output = ip6_output; |
2340 | rt->rt6i_gateway = fl6->daddr; | 2341 | rt->rt6i_gateway = fl6->daddr; |
2341 | rt->rt6i_dst.addr = fl6->daddr; | 2342 | rt->rt6i_dst.addr = fl6->daddr; |
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, | |||
4297 | if (!ipv6_addr_any(&fl6.saddr)) | 4298 | if (!ipv6_addr_any(&fl6.saddr)) |
4298 | flags |= RT6_LOOKUP_F_HAS_SADDR; | 4299 | flags |= RT6_LOOKUP_F_HAS_SADDR; |
4299 | 4300 | ||
4300 | if (!fibmatch) | 4301 | dst = ip6_route_input_lookup(net, dev, &fl6, flags); |
4301 | dst = ip6_route_input_lookup(net, dev, &fl6, flags); | ||
4302 | else | ||
4303 | dst = ip6_route_lookup(net, &fl6, 0); | ||
4304 | 4302 | ||
4305 | rcu_read_unlock(); | 4303 | rcu_read_unlock(); |
4306 | } else { | 4304 | } else { |
4307 | fl6.flowi6_oif = oif; | 4305 | fl6.flowi6_oif = oif; |
4308 | 4306 | ||
4309 | if (!fibmatch) | 4307 | dst = ip6_route_output(net, NULL, &fl6); |
4310 | dst = ip6_route_output(net, NULL, &fl6); | ||
4311 | else | ||
4312 | dst = ip6_route_lookup(net, &fl6, 0); | ||
4313 | } | 4308 | } |
4314 | 4309 | ||
4315 | 4310 | ||
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, | |||
4326 | goto errout; | 4321 | goto errout; |
4327 | } | 4322 | } |
4328 | 4323 | ||
4324 | if (fibmatch && rt->dst.from) { | ||
4325 | struct rt6_info *ort = container_of(rt->dst.from, | ||
4326 | struct rt6_info, dst); | ||
4327 | |||
4328 | dst_hold(&ort->dst); | ||
4329 | ip6_rt_put(rt); | ||
4330 | rt = ort; | ||
4331 | } | ||
4332 | |||
4329 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 4333 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
4330 | if (!skb) { | 4334 | if (!skb) { |
4331 | ip6_rt_put(rt); | 4335 | ip6_rt_put(rt); |
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d60ddcb0bfe2..3873d3877135 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c | |||
@@ -934,8 +934,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |||
934 | df = 0; | 934 | df = 0; |
935 | } | 935 | } |
936 | 936 | ||
937 | if (tunnel->parms.iph.daddr && skb_dst(skb)) | 937 | if (tunnel->parms.iph.daddr) |
938 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | 938 | skb_dst_update_pmtu(skb, mtu); |
939 | 939 | ||
940 | if (skb->len > mtu && !skb_is_gso(skb)) { | 940 | if (skb->len > mtu && !skb_is_gso(skb)) { |
941 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 941 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
@@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, | |||
1098 | ipip6_tunnel_link(sitn, t); | 1098 | ipip6_tunnel_link(sitn, t); |
1099 | t->parms.iph.ttl = p->iph.ttl; | 1099 | t->parms.iph.ttl = p->iph.ttl; |
1100 | t->parms.iph.tos = p->iph.tos; | 1100 | t->parms.iph.tos = p->iph.tos; |
1101 | t->parms.iph.frag_off = p->iph.frag_off; | ||
1101 | if (t->parms.link != p->link || t->fwmark != fwmark) { | 1102 | if (t->parms.link != p->link || t->fwmark != fwmark) { |
1102 | t->parms.link = p->link; | 1103 | t->parms.link = p->link; |
1103 | t->fwmark = fwmark; | 1104 | t->fwmark = fwmark; |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6bb98c93edfe..7178476b3d2f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, | |||
994 | req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, | 994 | req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, |
995 | tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, | 995 | tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, |
996 | req->ts_recent, sk->sk_bound_dev_if, | 996 | req->ts_recent, sk->sk_bound_dev_if, |
997 | tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), | 997 | tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), |
998 | 0, 0); | 998 | 0, 0); |
999 | } | 999 | } |
1000 | 1000 | ||
@@ -1454,7 +1454,6 @@ process: | |||
1454 | struct sock *nsk; | 1454 | struct sock *nsk; |
1455 | 1455 | ||
1456 | sk = req->rsk_listener; | 1456 | sk = req->rsk_listener; |
1457 | tcp_v6_fill_cb(skb, hdr, th); | ||
1458 | if (tcp_v6_inbound_md5_hash(sk, skb)) { | 1457 | if (tcp_v6_inbound_md5_hash(sk, skb)) { |
1459 | sk_drops_add(sk, skb); | 1458 | sk_drops_add(sk, skb); |
1460 | reqsk_put(req); | 1459 | reqsk_put(req); |
@@ -1467,8 +1466,12 @@ process: | |||
1467 | sock_hold(sk); | 1466 | sock_hold(sk); |
1468 | refcounted = true; | 1467 | refcounted = true; |
1469 | nsk = NULL; | 1468 | nsk = NULL; |
1470 | if (!tcp_filter(sk, skb)) | 1469 | if (!tcp_filter(sk, skb)) { |
1470 | th = (const struct tcphdr *)skb->data; | ||
1471 | hdr = ipv6_hdr(skb); | ||
1472 | tcp_v6_fill_cb(skb, hdr, th); | ||
1471 | nsk = tcp_check_req(sk, skb, req, false); | 1473 | nsk = tcp_check_req(sk, skb, req, false); |
1474 | } | ||
1472 | if (!nsk) { | 1475 | if (!nsk) { |
1473 | reqsk_put(req); | 1476 | reqsk_put(req); |
1474 | goto discard_and_relse; | 1477 | goto discard_and_relse; |
@@ -1492,8 +1495,6 @@ process: | |||
1492 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) | 1495 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) |
1493 | goto discard_and_relse; | 1496 | goto discard_and_relse; |
1494 | 1497 | ||
1495 | tcp_v6_fill_cb(skb, hdr, th); | ||
1496 | |||
1497 | if (tcp_v6_inbound_md5_hash(sk, skb)) | 1498 | if (tcp_v6_inbound_md5_hash(sk, skb)) |
1498 | goto discard_and_relse; | 1499 | goto discard_and_relse; |
1499 | 1500 | ||
@@ -1501,6 +1502,7 @@ process: | |||
1501 | goto discard_and_relse; | 1502 | goto discard_and_relse; |
1502 | th = (const struct tcphdr *)skb->data; | 1503 | th = (const struct tcphdr *)skb->data; |
1503 | hdr = ipv6_hdr(skb); | 1504 | hdr = ipv6_hdr(skb); |
1505 | tcp_v6_fill_cb(skb, hdr, th); | ||
1504 | 1506 | ||
1505 | skb->dev = NULL; | 1507 | skb->dev = NULL; |
1506 | 1508 | ||
@@ -1590,7 +1592,6 @@ do_time_wait: | |||
1590 | tcp_v6_timewait_ack(sk, skb); | 1592 | tcp_v6_timewait_ack(sk, skb); |
1591 | break; | 1593 | break; |
1592 | case TCP_TW_RST: | 1594 | case TCP_TW_RST: |
1593 | tcp_v6_restore_cb(skb); | ||
1594 | tcp_v6_send_reset(sk, skb); | 1595 | tcp_v6_send_reset(sk, skb); |
1595 | inet_twsk_deschedule_put(inet_twsk(sk)); | 1596 | inet_twsk_deschedule_put(inet_twsk(sk)); |
1596 | goto discard_it; | 1597 | goto discard_it; |
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01..278e49cd67d4 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c | |||
@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, | |||
46 | { | 46 | { |
47 | struct tcphdr *th; | 47 | struct tcphdr *th; |
48 | 48 | ||
49 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) | ||
50 | return ERR_PTR(-EINVAL); | ||
51 | |||
49 | if (!pskb_may_pull(skb, sizeof(*th))) | 52 | if (!pskb_may_pull(skb, sizeof(*th))) |
50 | return ERR_PTR(-EINVAL); | 53 | return ERR_PTR(-EINVAL); |
51 | 54 | ||
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a0f89ad76f9d..2a04dc9c781b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c | |||
@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, | |||
42 | const struct ipv6hdr *ipv6h; | 42 | const struct ipv6hdr *ipv6h; |
43 | struct udphdr *uh; | 43 | struct udphdr *uh; |
44 | 44 | ||
45 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) | ||
46 | goto out; | ||
47 | |||
45 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | 48 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
46 | goto out; | 49 | goto out; |
47 | 50 | ||
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index fe04e23af986..841f4a07438e 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c | |||
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, | |||
32 | } | 32 | } |
33 | EXPORT_SYMBOL(xfrm6_rcv_spi); | 33 | EXPORT_SYMBOL(xfrm6_rcv_spi); |
34 | 34 | ||
35 | static int xfrm6_transport_finish2(struct net *net, struct sock *sk, | ||
36 | struct sk_buff *skb) | ||
37 | { | ||
38 | if (xfrm_trans_queue(skb, ip6_rcv_finish)) | ||
39 | __kfree_skb(skb); | ||
40 | return -1; | ||
41 | } | ||
42 | |||
35 | int xfrm6_transport_finish(struct sk_buff *skb, int async) | 43 | int xfrm6_transport_finish(struct sk_buff *skb, int async) |
36 | { | 44 | { |
37 | struct xfrm_offload *xo = xfrm_offload(skb); | 45 | struct xfrm_offload *xo = xfrm_offload(skb); |
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) | |||
56 | 64 | ||
57 | NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, | 65 | NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, |
58 | dev_net(skb->dev), NULL, skb, skb->dev, NULL, | 66 | dev_net(skb->dev), NULL, skb, skb->dev, NULL, |
59 | ip6_rcv_finish); | 67 | xfrm6_transport_finish2); |
60 | return -1; | 68 | return -1; |
61 | } | 69 | } |
62 | 70 | ||
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 02556e356f87..dc93002ff9d1 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c | |||
@@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | |||
92 | 92 | ||
93 | skb_reset_network_header(skb); | 93 | skb_reset_network_header(skb); |
94 | skb_mac_header_rebuild(skb); | 94 | skb_mac_header_rebuild(skb); |
95 | eth_hdr(skb)->h_proto = skb->protocol; | ||
95 | 96 | ||
96 | err = 0; | 97 | err = 0; |
97 | 98 | ||
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 148533169b1d..64331158d693 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c | |||
@@ -1474,7 +1474,7 @@ done: | |||
1474 | return copied; | 1474 | return copied; |
1475 | } | 1475 | } |
1476 | 1476 | ||
1477 | static inline unsigned int iucv_accept_poll(struct sock *parent) | 1477 | static inline __poll_t iucv_accept_poll(struct sock *parent) |
1478 | { | 1478 | { |
1479 | struct iucv_sock *isk, *n; | 1479 | struct iucv_sock *isk, *n; |
1480 | struct sock *sk; | 1480 | struct sock *sk; |
@@ -1489,11 +1489,11 @@ static inline unsigned int iucv_accept_poll(struct sock *parent) | |||
1489 | return 0; | 1489 | return 0; |
1490 | } | 1490 | } |
1491 | 1491 | ||
1492 | unsigned int iucv_sock_poll(struct file *file, struct socket *sock, | 1492 | __poll_t iucv_sock_poll(struct file *file, struct socket *sock, |
1493 | poll_table *wait) | 1493 | poll_table *wait) |
1494 | { | 1494 | { |
1495 | struct sock *sk = sock->sk; | 1495 | struct sock *sk = sock->sk; |
1496 | unsigned int mask = 0; | 1496 | __poll_t mask = 0; |
1497 | 1497 | ||
1498 | sock_poll_wait(file, sk_sleep(sk), wait); | 1498 | sock_poll_wait(file, sk_sleep(sk), wait); |
1499 | 1499 | ||
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 0b750a22c4b9..4a8d407f8902 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c | |||
@@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock, | |||
1387 | if (!csk) | 1387 | if (!csk) |
1388 | return -EINVAL; | 1388 | return -EINVAL; |
1389 | 1389 | ||
1390 | /* We must prevent loops or risk deadlock ! */ | 1390 | /* Only allow TCP sockets to be attached for now */ |
1391 | if (csk->sk_family == PF_KCM) | 1391 | if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || |
1392 | csk->sk_protocol != IPPROTO_TCP) | ||
1393 | return -EOPNOTSUPP; | ||
1394 | |||
1395 | /* Don't allow listeners or closed sockets */ | ||
1396 | if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) | ||
1392 | return -EOPNOTSUPP; | 1397 | return -EOPNOTSUPP; |
1393 | 1398 | ||
1394 | psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); | 1399 | psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); |
@@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, | |||
1405 | return err; | 1410 | return err; |
1406 | } | 1411 | } |
1407 | 1412 | ||
1408 | sock_hold(csk); | ||
1409 | |||
1410 | write_lock_bh(&csk->sk_callback_lock); | 1413 | write_lock_bh(&csk->sk_callback_lock); |
1414 | |||
1415 | /* Check if sk_user_data is aready by KCM or someone else. | ||
1416 | * Must be done under lock to prevent race conditions. | ||
1417 | */ | ||
1418 | if (csk->sk_user_data) { | ||
1419 | write_unlock_bh(&csk->sk_callback_lock); | ||
1420 | strp_done(&psock->strp); | ||
1421 | kmem_cache_free(kcm_psockp, psock); | ||
1422 | return -EALREADY; | ||
1423 | } | ||
1424 | |||
1411 | psock->save_data_ready = csk->sk_data_ready; | 1425 | psock->save_data_ready = csk->sk_data_ready; |
1412 | psock->save_write_space = csk->sk_write_space; | 1426 | psock->save_write_space = csk->sk_write_space; |
1413 | psock->save_state_change = csk->sk_state_change; | 1427 | psock->save_state_change = csk->sk_state_change; |
@@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, | |||
1415 | csk->sk_data_ready = psock_data_ready; | 1429 | csk->sk_data_ready = psock_data_ready; |
1416 | csk->sk_write_space = psock_write_space; | 1430 | csk->sk_write_space = psock_write_space; |
1417 | csk->sk_state_change = psock_state_change; | 1431 | csk->sk_state_change = psock_state_change; |
1432 | |||
1418 | write_unlock_bh(&csk->sk_callback_lock); | 1433 | write_unlock_bh(&csk->sk_callback_lock); |
1419 | 1434 | ||
1435 | sock_hold(csk); | ||
1436 | |||
1420 | /* Finished initialization, now add the psock to the MUX. */ | 1437 | /* Finished initialization, now add the psock to the MUX. */ |
1421 | spin_lock_bh(&mux->lock); | 1438 | spin_lock_bh(&mux->lock); |
1422 | head = &mux->psocks; | 1439 | head = &mux->psocks; |
@@ -1625,60 +1642,30 @@ static struct proto kcm_proto = { | |||
1625 | }; | 1642 | }; |
1626 | 1643 | ||
1627 | /* Clone a kcm socket. */ | 1644 | /* Clone a kcm socket. */ |
1628 | static int kcm_clone(struct socket *osock, struct kcm_clone *info, | 1645 | static struct file *kcm_clone(struct socket *osock) |
1629 | struct socket **newsockp) | ||
1630 | { | 1646 | { |
1631 | struct socket *newsock; | 1647 | struct socket *newsock; |
1632 | struct sock *newsk; | 1648 | struct sock *newsk; |
1633 | struct file *newfile; | ||
1634 | int err, newfd; | ||
1635 | 1649 | ||
1636 | err = -ENFILE; | ||
1637 | newsock = sock_alloc(); | 1650 | newsock = sock_alloc(); |
1638 | if (!newsock) | 1651 | if (!newsock) |
1639 | goto out; | 1652 | return ERR_PTR(-ENFILE); |
1640 | 1653 | ||
1641 | newsock->type = osock->type; | 1654 | newsock->type = osock->type; |
1642 | newsock->ops = osock->ops; | 1655 | newsock->ops = osock->ops; |
1643 | 1656 | ||
1644 | __module_get(newsock->ops->owner); | 1657 | __module_get(newsock->ops->owner); |
1645 | 1658 | ||
1646 | newfd = get_unused_fd_flags(0); | ||
1647 | if (unlikely(newfd < 0)) { | ||
1648 | err = newfd; | ||
1649 | goto out_fd_fail; | ||
1650 | } | ||
1651 | |||
1652 | newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); | ||
1653 | if (IS_ERR(newfile)) { | ||
1654 | err = PTR_ERR(newfile); | ||
1655 | goto out_sock_alloc_fail; | ||
1656 | } | ||
1657 | |||
1658 | newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, | 1659 | newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, |
1659 | &kcm_proto, true); | 1660 | &kcm_proto, true); |
1660 | if (!newsk) { | 1661 | if (!newsk) { |
1661 | err = -ENOMEM; | 1662 | sock_release(newsock); |
1662 | goto out_sk_alloc_fail; | 1663 | return ERR_PTR(-ENOMEM); |
1663 | } | 1664 | } |
1664 | |||
1665 | sock_init_data(newsock, newsk); | 1665 | sock_init_data(newsock, newsk); |
1666 | init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); | 1666 | init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); |
1667 | 1667 | ||
1668 | fd_install(newfd, newfile); | 1668 | return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); |
1669 | *newsockp = newsock; | ||
1670 | info->fd = newfd; | ||
1671 | |||
1672 | return 0; | ||
1673 | |||
1674 | out_sk_alloc_fail: | ||
1675 | fput(newfile); | ||
1676 | out_sock_alloc_fail: | ||
1677 | put_unused_fd(newfd); | ||
1678 | out_fd_fail: | ||
1679 | sock_release(newsock); | ||
1680 | out: | ||
1681 | return err; | ||
1682 | } | 1669 | } |
1683 | 1670 | ||
1684 | static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | 1671 | static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) |
@@ -1708,17 +1695,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1708 | } | 1695 | } |
1709 | case SIOCKCMCLONE: { | 1696 | case SIOCKCMCLONE: { |
1710 | struct kcm_clone info; | 1697 | struct kcm_clone info; |
1711 | struct socket *newsock = NULL; | 1698 | struct file *file; |
1712 | |||
1713 | err = kcm_clone(sock, &info, &newsock); | ||
1714 | if (!err) { | ||
1715 | if (copy_to_user((void __user *)arg, &info, | ||
1716 | sizeof(info))) { | ||
1717 | err = -EFAULT; | ||
1718 | sys_close(info.fd); | ||
1719 | } | ||
1720 | } | ||
1721 | 1699 | ||
1700 | info.fd = get_unused_fd_flags(0); | ||
1701 | if (unlikely(info.fd < 0)) | ||
1702 | return info.fd; | ||
1703 | |||
1704 | file = kcm_clone(sock); | ||
1705 | if (IS_ERR(file)) { | ||
1706 | put_unused_fd(info.fd); | ||
1707 | return PTR_ERR(file); | ||
1708 | } | ||
1709 | if (copy_to_user((void __user *)arg, &info, | ||
1710 | sizeof(info))) { | ||
1711 | put_unused_fd(info.fd); | ||
1712 | fput(file); | ||
1713 | return -EFAULT; | ||
1714 | } | ||
1715 | fd_install(info.fd, file); | ||
1716 | err = 0; | ||
1722 | break; | 1717 | break; |
1723 | } | 1718 | } |
1724 | default: | 1719 | default: |
diff --git a/net/key/af_key.c b/net/key/af_key.c index 3dffb892d52c..7e2e7188e7f4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c | |||
@@ -401,6 +401,11 @@ static int verify_address_len(const void *p) | |||
401 | #endif | 401 | #endif |
402 | int len; | 402 | int len; |
403 | 403 | ||
404 | if (sp->sadb_address_len < | ||
405 | DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), | ||
406 | sizeof(uint64_t))) | ||
407 | return -EINVAL; | ||
408 | |||
404 | switch (addr->sa_family) { | 409 | switch (addr->sa_family) { |
405 | case AF_INET: | 410 | case AF_INET: |
406 | len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); | 411 | len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); |
@@ -511,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void * | |||
511 | uint16_t ext_type; | 516 | uint16_t ext_type; |
512 | int ext_len; | 517 | int ext_len; |
513 | 518 | ||
519 | if (len < sizeof(*ehdr)) | ||
520 | return -EINVAL; | ||
521 | |||
514 | ext_len = ehdr->sadb_ext_len; | 522 | ext_len = ehdr->sadb_ext_len; |
515 | ext_len *= sizeof(uint64_t); | 523 | ext_len *= sizeof(uint64_t); |
516 | ext_type = ehdr->sadb_ext_type; | 524 | ext_type = ehdr->sadb_ext_type; |
@@ -2194,8 +2202,10 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev | |||
2194 | return PTR_ERR(out_skb); | 2202 | return PTR_ERR(out_skb); |
2195 | 2203 | ||
2196 | err = pfkey_xfrm_policy2msg(out_skb, xp, dir); | 2204 | err = pfkey_xfrm_policy2msg(out_skb, xp, dir); |
2197 | if (err < 0) | 2205 | if (err < 0) { |
2206 | kfree_skb(out_skb); | ||
2198 | return err; | 2207 | return err; |
2208 | } | ||
2199 | 2209 | ||
2200 | out_hdr = (struct sadb_msg *) out_skb->data; | 2210 | out_hdr = (struct sadb_msg *) out_skb->data; |
2201 | out_hdr->sadb_msg_version = PF_KEY_V2; | 2211 | out_hdr->sadb_msg_version = PF_KEY_V2; |
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 41f5e48f8021..1621b6ab17ba 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c | |||
@@ -291,13 +291,14 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, | |||
291 | int i; | 291 | int i; |
292 | 292 | ||
293 | mutex_lock(&sta->ampdu_mlme.mtx); | 293 | mutex_lock(&sta->ampdu_mlme.mtx); |
294 | for (i = 0; i < IEEE80211_NUM_TIDS; i++) { | 294 | for (i = 0; i < IEEE80211_NUM_TIDS; i++) |
295 | ___ieee80211_stop_tx_ba_session(sta, i, reason); | ||
296 | ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, | 295 | ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, |
297 | WLAN_REASON_QSTA_LEAVE_QBSS, | 296 | WLAN_REASON_QSTA_LEAVE_QBSS, |
298 | reason != AGG_STOP_DESTROY_STA && | 297 | reason != AGG_STOP_DESTROY_STA && |
299 | reason != AGG_STOP_PEER_REQUEST); | 298 | reason != AGG_STOP_PEER_REQUEST); |
300 | } | 299 | |
300 | for (i = 0; i < IEEE80211_NUM_TIDS; i++) | ||
301 | ___ieee80211_stop_tx_ba_session(sta, i, reason); | ||
301 | mutex_unlock(&sta->ampdu_mlme.mtx); | 302 | mutex_unlock(&sta->ampdu_mlme.mtx); |
302 | 303 | ||
303 | /* stopping might queue the work again - so cancel only afterwards */ | 304 | /* stopping might queue the work again - so cancel only afterwards */ |
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 4f7826d7b47c..4394463a0c2e 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c | |||
@@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, | |||
797 | struct mesh_path *mpath; | 797 | struct mesh_path *mpath; |
798 | u8 ttl, flags, hopcount; | 798 | u8 ttl, flags, hopcount; |
799 | const u8 *orig_addr; | 799 | const u8 *orig_addr; |
800 | u32 orig_sn, metric, metric_txsta, interval; | 800 | u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; |
801 | bool root_is_gate; | 801 | bool root_is_gate; |
802 | 802 | ||
803 | ttl = rann->rann_ttl; | 803 | ttl = rann->rann_ttl; |
@@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, | |||
808 | interval = le32_to_cpu(rann->rann_interval); | 808 | interval = le32_to_cpu(rann->rann_interval); |
809 | hopcount = rann->rann_hopcount; | 809 | hopcount = rann->rann_hopcount; |
810 | hopcount++; | 810 | hopcount++; |
811 | metric = le32_to_cpu(rann->rann_metric); | 811 | orig_metric = le32_to_cpu(rann->rann_metric); |
812 | 812 | ||
813 | /* Ignore our own RANNs */ | 813 | /* Ignore our own RANNs */ |
814 | if (ether_addr_equal(orig_addr, sdata->vif.addr)) | 814 | if (ether_addr_equal(orig_addr, sdata->vif.addr)) |
@@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, | |||
825 | return; | 825 | return; |
826 | } | 826 | } |
827 | 827 | ||
828 | metric_txsta = airtime_link_metric_get(local, sta); | 828 | last_hop_metric = airtime_link_metric_get(local, sta); |
829 | new_metric = orig_metric + last_hop_metric; | ||
830 | if (new_metric < orig_metric) | ||
831 | new_metric = MAX_METRIC; | ||
829 | 832 | ||
830 | mpath = mesh_path_lookup(sdata, orig_addr); | 833 | mpath = mesh_path_lookup(sdata, orig_addr); |
831 | if (!mpath) { | 834 | if (!mpath) { |
@@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, | |||
838 | } | 841 | } |
839 | 842 | ||
840 | if (!(SN_LT(mpath->sn, orig_sn)) && | 843 | if (!(SN_LT(mpath->sn, orig_sn)) && |
841 | !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { | 844 | !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { |
842 | rcu_read_unlock(); | 845 | rcu_read_unlock(); |
843 | return; | 846 | return; |
844 | } | 847 | } |
@@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, | |||
856 | } | 859 | } |
857 | 860 | ||
858 | mpath->sn = orig_sn; | 861 | mpath->sn = orig_sn; |
859 | mpath->rann_metric = metric + metric_txsta; | 862 | mpath->rann_metric = new_metric; |
860 | mpath->is_root = true; | 863 | mpath->is_root = true; |
861 | /* Recording RANNs sender address to send individually | 864 | /* Recording RANNs sender address to send individually |
862 | * addressed PREQs destined for root mesh STA */ | 865 | * addressed PREQs destined for root mesh STA */ |
@@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, | |||
876 | mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, | 879 | mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, |
877 | orig_sn, 0, NULL, 0, broadcast_addr, | 880 | orig_sn, 0, NULL, 0, broadcast_addr, |
878 | hopcount, ttl, interval, | 881 | hopcount, ttl, interval, |
879 | metric + metric_txsta, 0, sdata); | 882 | new_metric, 0, sdata); |
880 | } | 883 | } |
881 | 884 | ||
882 | rcu_read_unlock(); | 885 | rcu_read_unlock(); |
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 04460440d731..c244691deab9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c | |||
@@ -895,7 +895,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, | |||
895 | struct ieee80211_hdr_3addr *nullfunc; | 895 | struct ieee80211_hdr_3addr *nullfunc; |
896 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | 896 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
897 | 897 | ||
898 | skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); | 898 | skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); |
899 | if (!skb) | 899 | if (!skb) |
900 | return; | 900 | return; |
901 | 901 | ||
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 70e9d2ca8bbe..4daafb07602f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c | |||
@@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) | |||
3632 | } | 3632 | } |
3633 | return true; | 3633 | return true; |
3634 | case NL80211_IFTYPE_MESH_POINT: | 3634 | case NL80211_IFTYPE_MESH_POINT: |
3635 | if (ether_addr_equal(sdata->vif.addr, hdr->addr2)) | ||
3636 | return false; | ||
3635 | if (multicast) | 3637 | if (multicast) |
3636 | return true; | 3638 | return true; |
3637 | return ether_addr_equal(sdata->vif.addr, hdr->addr1); | 3639 | return ether_addr_equal(sdata->vif.addr, hdr->addr1); |
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7b8154474b9e..3160954fc406 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c | |||
@@ -4438,13 +4438,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, | |||
4438 | EXPORT_SYMBOL(ieee80211_pspoll_get); | 4438 | EXPORT_SYMBOL(ieee80211_pspoll_get); |
4439 | 4439 | ||
4440 | struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, | 4440 | struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, |
4441 | struct ieee80211_vif *vif) | 4441 | struct ieee80211_vif *vif, |
4442 | bool qos_ok) | ||
4442 | { | 4443 | { |
4443 | struct ieee80211_hdr_3addr *nullfunc; | 4444 | struct ieee80211_hdr_3addr *nullfunc; |
4444 | struct ieee80211_sub_if_data *sdata; | 4445 | struct ieee80211_sub_if_data *sdata; |
4445 | struct ieee80211_if_managed *ifmgd; | 4446 | struct ieee80211_if_managed *ifmgd; |
4446 | struct ieee80211_local *local; | 4447 | struct ieee80211_local *local; |
4447 | struct sk_buff *skb; | 4448 | struct sk_buff *skb; |
4449 | bool qos = false; | ||
4448 | 4450 | ||
4449 | if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) | 4451 | if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) |
4450 | return NULL; | 4452 | return NULL; |
@@ -4453,7 +4455,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, | |||
4453 | ifmgd = &sdata->u.mgd; | 4455 | ifmgd = &sdata->u.mgd; |
4454 | local = sdata->local; | 4456 | local = sdata->local; |
4455 | 4457 | ||
4456 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); | 4458 | if (qos_ok) { |
4459 | struct sta_info *sta; | ||
4460 | |||
4461 | rcu_read_lock(); | ||
4462 | sta = sta_info_get(sdata, ifmgd->bssid); | ||
4463 | qos = sta && sta->sta.wme; | ||
4464 | rcu_read_unlock(); | ||
4465 | } | ||
4466 | |||
4467 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + | ||
4468 | sizeof(*nullfunc) + 2); | ||
4457 | if (!skb) | 4469 | if (!skb) |
4458 | return NULL; | 4470 | return NULL; |
4459 | 4471 | ||
@@ -4463,6 +4475,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, | |||
4463 | nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | | 4475 | nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | |
4464 | IEEE80211_STYPE_NULLFUNC | | 4476 | IEEE80211_STYPE_NULLFUNC | |
4465 | IEEE80211_FCTL_TODS); | 4477 | IEEE80211_FCTL_TODS); |
4478 | if (qos) { | ||
4479 | __le16 qos = cpu_to_le16(7); | ||
4480 | |||
4481 | BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC | | ||
4482 | IEEE80211_STYPE_NULLFUNC) != | ||
4483 | IEEE80211_STYPE_QOS_NULLFUNC); | ||
4484 | nullfunc->frame_control |= | ||
4485 | cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC); | ||
4486 | skb->priority = 7; | ||
4487 | skb_set_queue_mapping(skb, IEEE80211_AC_VO); | ||
4488 | skb_put_data(skb, &qos, sizeof(qos)); | ||
4489 | } | ||
4490 | |||
4466 | memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); | 4491 | memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); |
4467 | memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); | 4492 | memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); |
4468 | memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); | 4493 | memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 9ee71cb276d7..fbaf3bd05b2e 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -1636,17 +1636,14 @@ static int | |||
1636 | ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) | 1636 | ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) |
1637 | { | 1637 | { |
1638 | struct msghdr msg = {NULL,}; | 1638 | struct msghdr msg = {NULL,}; |
1639 | struct kvec iov; | 1639 | struct kvec iov = {buffer, buflen}; |
1640 | int len; | 1640 | int len; |
1641 | 1641 | ||
1642 | EnterFunction(7); | 1642 | EnterFunction(7); |
1643 | 1643 | ||
1644 | /* Receive a packet */ | 1644 | /* Receive a packet */ |
1645 | iov.iov_base = buffer; | 1645 | iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen); |
1646 | iov.iov_len = (size_t)buflen; | 1646 | len = sock_recvmsg(sock, &msg, MSG_DONTWAIT); |
1647 | |||
1648 | len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT); | ||
1649 | |||
1650 | if (len < 0) | 1647 | if (len < 0) |
1651 | return len; | 1648 | return len; |
1652 | 1649 | ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 85f643c1e227..4efaa3066c78 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -1044,7 +1044,7 @@ static void gc_worker(struct work_struct *work) | |||
1044 | * we will just continue with next hash slot. | 1044 | * we will just continue with next hash slot. |
1045 | */ | 1045 | */ |
1046 | rcu_read_unlock(); | 1046 | rcu_read_unlock(); |
1047 | cond_resched_rcu_qs(); | 1047 | cond_resched(); |
1048 | } while (++buckets < goal); | 1048 | } while (++buckets < goal); |
1049 | 1049 | ||
1050 | if (gc_work->exiting) | 1050 | if (gc_work->exiting) |
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index cf1bf2605c10..dc6347342e34 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c | |||
@@ -103,7 +103,6 @@ struct bitstr { | |||
103 | #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} | 103 | #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} |
104 | #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} | 104 | #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} |
105 | #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} | 105 | #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} |
106 | #define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND) | ||
107 | static unsigned int get_len(struct bitstr *bs); | 106 | static unsigned int get_len(struct bitstr *bs); |
108 | static unsigned int get_bit(struct bitstr *bs); | 107 | static unsigned int get_bit(struct bitstr *bs); |
109 | static unsigned int get_bits(struct bitstr *bs, unsigned int b); | 108 | static unsigned int get_bits(struct bitstr *bs, unsigned int b); |
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs) | |||
165 | return v; | 164 | return v; |
166 | } | 165 | } |
167 | 166 | ||
167 | static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits) | ||
168 | { | ||
169 | bits += bs->bit; | ||
170 | bytes += bits / BITS_PER_BYTE; | ||
171 | if (bits % BITS_PER_BYTE > 0) | ||
172 | bytes++; | ||
173 | |||
174 | if (*bs->cur + bytes > *bs->end) | ||
175 | return 1; | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
168 | /****************************************************************************/ | 180 | /****************************************************************************/ |
169 | static unsigned int get_bit(struct bitstr *bs) | 181 | static unsigned int get_bit(struct bitstr *bs) |
170 | { | 182 | { |
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f, | |||
279 | PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); | 291 | PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); |
280 | 292 | ||
281 | INC_BIT(bs); | 293 | INC_BIT(bs); |
282 | 294 | if (nf_h323_error_boundary(bs, 0, 0)) | |
283 | CHECK_BOUND(bs, 0); | 295 | return H323_ERROR_BOUND; |
284 | return H323_ERROR_NONE; | 296 | return H323_ERROR_NONE; |
285 | } | 297 | } |
286 | 298 | ||
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f, | |||
293 | PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); | 305 | PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); |
294 | 306 | ||
295 | BYTE_ALIGN(bs); | 307 | BYTE_ALIGN(bs); |
296 | CHECK_BOUND(bs, 1); | 308 | if (nf_h323_error_boundary(bs, 1, 0)) |
309 | return H323_ERROR_BOUND; | ||
310 | |||
297 | len = *bs->cur++; | 311 | len = *bs->cur++; |
298 | bs->cur += len; | 312 | bs->cur += len; |
313 | if (nf_h323_error_boundary(bs, 0, 0)) | ||
314 | return H323_ERROR_BOUND; | ||
299 | 315 | ||
300 | CHECK_BOUND(bs, 0); | ||
301 | return H323_ERROR_NONE; | 316 | return H323_ERROR_NONE; |
302 | } | 317 | } |
303 | 318 | ||
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, | |||
319 | bs->cur += 2; | 334 | bs->cur += 2; |
320 | break; | 335 | break; |
321 | case CONS: /* 64K < Range < 4G */ | 336 | case CONS: /* 64K < Range < 4G */ |
337 | if (nf_h323_error_boundary(bs, 0, 2)) | ||
338 | return H323_ERROR_BOUND; | ||
322 | len = get_bits(bs, 2) + 1; | 339 | len = get_bits(bs, 2) + 1; |
323 | BYTE_ALIGN(bs); | 340 | BYTE_ALIGN(bs); |
324 | if (base && (f->attr & DECODE)) { /* timeToLive */ | 341 | if (base && (f->attr & DECODE)) { /* timeToLive */ |
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, | |||
330 | break; | 347 | break; |
331 | case UNCO: | 348 | case UNCO: |
332 | BYTE_ALIGN(bs); | 349 | BYTE_ALIGN(bs); |
333 | CHECK_BOUND(bs, 2); | 350 | if (nf_h323_error_boundary(bs, 2, 0)) |
351 | return H323_ERROR_BOUND; | ||
334 | len = get_len(bs); | 352 | len = get_len(bs); |
335 | bs->cur += len; | 353 | bs->cur += len; |
336 | break; | 354 | break; |
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, | |||
341 | 359 | ||
342 | PRINT("\n"); | 360 | PRINT("\n"); |
343 | 361 | ||
344 | CHECK_BOUND(bs, 0); | 362 | if (nf_h323_error_boundary(bs, 0, 0)) |
363 | return H323_ERROR_BOUND; | ||
345 | return H323_ERROR_NONE; | 364 | return H323_ERROR_NONE; |
346 | } | 365 | } |
347 | 366 | ||
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f, | |||
357 | INC_BITS(bs, f->sz); | 376 | INC_BITS(bs, f->sz); |
358 | } | 377 | } |
359 | 378 | ||
360 | CHECK_BOUND(bs, 0); | 379 | if (nf_h323_error_boundary(bs, 0, 0)) |
380 | return H323_ERROR_BOUND; | ||
361 | return H323_ERROR_NONE; | 381 | return H323_ERROR_NONE; |
362 | } | 382 | } |
363 | 383 | ||
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, | |||
375 | len = f->lb; | 395 | len = f->lb; |
376 | break; | 396 | break; |
377 | case WORD: /* 2-byte length */ | 397 | case WORD: /* 2-byte length */ |
378 | CHECK_BOUND(bs, 2); | 398 | if (nf_h323_error_boundary(bs, 2, 0)) |
399 | return H323_ERROR_BOUND; | ||
379 | len = (*bs->cur++) << 8; | 400 | len = (*bs->cur++) << 8; |
380 | len += (*bs->cur++) + f->lb; | 401 | len += (*bs->cur++) + f->lb; |
381 | break; | 402 | break; |
382 | case SEMI: | 403 | case SEMI: |
383 | CHECK_BOUND(bs, 2); | 404 | if (nf_h323_error_boundary(bs, 2, 0)) |
405 | return H323_ERROR_BOUND; | ||
384 | len = get_len(bs); | 406 | len = get_len(bs); |
385 | break; | 407 | break; |
386 | default: | 408 | default: |
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, | |||
391 | bs->cur += len >> 3; | 413 | bs->cur += len >> 3; |
392 | bs->bit = len & 7; | 414 | bs->bit = len & 7; |
393 | 415 | ||
394 | CHECK_BOUND(bs, 0); | 416 | if (nf_h323_error_boundary(bs, 0, 0)) |
417 | return H323_ERROR_BOUND; | ||
395 | return H323_ERROR_NONE; | 418 | return H323_ERROR_NONE; |
396 | } | 419 | } |
397 | 420 | ||
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f, | |||
404 | PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); | 427 | PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); |
405 | 428 | ||
406 | /* 2 <= Range <= 255 */ | 429 | /* 2 <= Range <= 255 */ |
430 | if (nf_h323_error_boundary(bs, 0, f->sz)) | ||
431 | return H323_ERROR_BOUND; | ||
407 | len = get_bits(bs, f->sz) + f->lb; | 432 | len = get_bits(bs, f->sz) + f->lb; |
408 | 433 | ||
409 | BYTE_ALIGN(bs); | 434 | BYTE_ALIGN(bs); |
410 | INC_BITS(bs, (len << 2)); | 435 | INC_BITS(bs, (len << 2)); |
411 | 436 | ||
412 | CHECK_BOUND(bs, 0); | 437 | if (nf_h323_error_boundary(bs, 0, 0)) |
438 | return H323_ERROR_BOUND; | ||
413 | return H323_ERROR_NONE; | 439 | return H323_ERROR_NONE; |
414 | } | 440 | } |
415 | 441 | ||
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, | |||
440 | break; | 466 | break; |
441 | case BYTE: /* Range == 256 */ | 467 | case BYTE: /* Range == 256 */ |
442 | BYTE_ALIGN(bs); | 468 | BYTE_ALIGN(bs); |
443 | CHECK_BOUND(bs, 1); | 469 | if (nf_h323_error_boundary(bs, 1, 0)) |
470 | return H323_ERROR_BOUND; | ||
444 | len = (*bs->cur++) + f->lb; | 471 | len = (*bs->cur++) + f->lb; |
445 | break; | 472 | break; |
446 | case SEMI: | 473 | case SEMI: |
447 | BYTE_ALIGN(bs); | 474 | BYTE_ALIGN(bs); |
448 | CHECK_BOUND(bs, 2); | 475 | if (nf_h323_error_boundary(bs, 2, 0)) |
476 | return H323_ERROR_BOUND; | ||
449 | len = get_len(bs) + f->lb; | 477 | len = get_len(bs) + f->lb; |
450 | break; | 478 | break; |
451 | default: /* 2 <= Range <= 255 */ | 479 | default: /* 2 <= Range <= 255 */ |
480 | if (nf_h323_error_boundary(bs, 0, f->sz)) | ||
481 | return H323_ERROR_BOUND; | ||
452 | len = get_bits(bs, f->sz) + f->lb; | 482 | len = get_bits(bs, f->sz) + f->lb; |
453 | BYTE_ALIGN(bs); | 483 | BYTE_ALIGN(bs); |
454 | break; | 484 | break; |
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, | |||
458 | 488 | ||
459 | PRINT("\n"); | 489 | PRINT("\n"); |
460 | 490 | ||
461 | CHECK_BOUND(bs, 0); | 491 | if (nf_h323_error_boundary(bs, 0, 0)) |
492 | return H323_ERROR_BOUND; | ||
462 | return H323_ERROR_NONE; | 493 | return H323_ERROR_NONE; |
463 | } | 494 | } |
464 | 495 | ||
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, | |||
473 | switch (f->sz) { | 504 | switch (f->sz) { |
474 | case BYTE: /* Range == 256 */ | 505 | case BYTE: /* Range == 256 */ |
475 | BYTE_ALIGN(bs); | 506 | BYTE_ALIGN(bs); |
476 | CHECK_BOUND(bs, 1); | 507 | if (nf_h323_error_boundary(bs, 1, 0)) |
508 | return H323_ERROR_BOUND; | ||
477 | len = (*bs->cur++) + f->lb; | 509 | len = (*bs->cur++) + f->lb; |
478 | break; | 510 | break; |
479 | default: /* 2 <= Range <= 255 */ | 511 | default: /* 2 <= Range <= 255 */ |
512 | if (nf_h323_error_boundary(bs, 0, f->sz)) | ||
513 | return H323_ERROR_BOUND; | ||
480 | len = get_bits(bs, f->sz) + f->lb; | 514 | len = get_bits(bs, f->sz) + f->lb; |
481 | BYTE_ALIGN(bs); | 515 | BYTE_ALIGN(bs); |
482 | break; | 516 | break; |
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, | |||
484 | 518 | ||
485 | bs->cur += len << 1; | 519 | bs->cur += len << 1; |
486 | 520 | ||
487 | CHECK_BOUND(bs, 0); | 521 | if (nf_h323_error_boundary(bs, 0, 0)) |
522 | return H323_ERROR_BOUND; | ||
488 | return H323_ERROR_NONE; | 523 | return H323_ERROR_NONE; |
489 | } | 524 | } |
490 | 525 | ||
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, | |||
503 | base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; | 538 | base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; |
504 | 539 | ||
505 | /* Extensible? */ | 540 | /* Extensible? */ |
541 | if (nf_h323_error_boundary(bs, 0, 1)) | ||
542 | return H323_ERROR_BOUND; | ||
506 | ext = (f->attr & EXT) ? get_bit(bs) : 0; | 543 | ext = (f->attr & EXT) ? get_bit(bs) : 0; |
507 | 544 | ||
508 | /* Get fields bitmap */ | 545 | /* Get fields bitmap */ |
546 | if (nf_h323_error_boundary(bs, 0, f->sz)) | ||
547 | return H323_ERROR_BOUND; | ||
509 | bmp = get_bitmap(bs, f->sz); | 548 | bmp = get_bitmap(bs, f->sz); |
510 | if (base) | 549 | if (base) |
511 | *(unsigned int *)base = bmp; | 550 | *(unsigned int *)base = bmp; |
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, | |||
525 | 564 | ||
526 | /* Decode */ | 565 | /* Decode */ |
527 | if (son->attr & OPEN) { /* Open field */ | 566 | if (son->attr & OPEN) { /* Open field */ |
528 | CHECK_BOUND(bs, 2); | 567 | if (nf_h323_error_boundary(bs, 2, 0)) |
568 | return H323_ERROR_BOUND; | ||
529 | len = get_len(bs); | 569 | len = get_len(bs); |
530 | CHECK_BOUND(bs, len); | 570 | if (nf_h323_error_boundary(bs, len, 0)) |
571 | return H323_ERROR_BOUND; | ||
531 | if (!base || !(son->attr & DECODE)) { | 572 | if (!base || !(son->attr & DECODE)) { |
532 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, | 573 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, |
533 | " ", son->name); | 574 | " ", son->name); |
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, | |||
555 | return H323_ERROR_NONE; | 596 | return H323_ERROR_NONE; |
556 | 597 | ||
557 | /* Get the extension bitmap */ | 598 | /* Get the extension bitmap */ |
599 | if (nf_h323_error_boundary(bs, 0, 7)) | ||
600 | return H323_ERROR_BOUND; | ||
558 | bmp2_len = get_bits(bs, 7) + 1; | 601 | bmp2_len = get_bits(bs, 7) + 1; |
559 | CHECK_BOUND(bs, (bmp2_len + 7) >> 3); | 602 | if (nf_h323_error_boundary(bs, 0, bmp2_len)) |
603 | return H323_ERROR_BOUND; | ||
560 | bmp2 = get_bitmap(bs, bmp2_len); | 604 | bmp2 = get_bitmap(bs, bmp2_len); |
561 | bmp |= bmp2 >> f->sz; | 605 | bmp |= bmp2 >> f->sz; |
562 | if (base) | 606 | if (base) |
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, | |||
567 | for (opt = 0; opt < bmp2_len; opt++, i++, son++) { | 611 | for (opt = 0; opt < bmp2_len; opt++, i++, son++) { |
568 | /* Check Range */ | 612 | /* Check Range */ |
569 | if (i >= f->ub) { /* Newer Version? */ | 613 | if (i >= f->ub) { /* Newer Version? */ |
570 | CHECK_BOUND(bs, 2); | 614 | if (nf_h323_error_boundary(bs, 2, 0)) |
615 | return H323_ERROR_BOUND; | ||
571 | len = get_len(bs); | 616 | len = get_len(bs); |
572 | CHECK_BOUND(bs, len); | 617 | if (nf_h323_error_boundary(bs, len, 0)) |
618 | return H323_ERROR_BOUND; | ||
573 | bs->cur += len; | 619 | bs->cur += len; |
574 | continue; | 620 | continue; |
575 | } | 621 | } |
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, | |||
583 | if (!((0x80000000 >> opt) & bmp2)) /* Not present */ | 629 | if (!((0x80000000 >> opt) & bmp2)) /* Not present */ |
584 | continue; | 630 | continue; |
585 | 631 | ||
586 | CHECK_BOUND(bs, 2); | 632 | if (nf_h323_error_boundary(bs, 2, 0)) |
633 | return H323_ERROR_BOUND; | ||
587 | len = get_len(bs); | 634 | len = get_len(bs); |
588 | CHECK_BOUND(bs, len); | 635 | if (nf_h323_error_boundary(bs, len, 0)) |
636 | return H323_ERROR_BOUND; | ||
589 | if (!base || !(son->attr & DECODE)) { | 637 | if (!base || !(son->attr & DECODE)) { |
590 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", | 638 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", |
591 | son->name); | 639 | son->name); |
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, | |||
623 | switch (f->sz) { | 671 | switch (f->sz) { |
624 | case BYTE: | 672 | case BYTE: |
625 | BYTE_ALIGN(bs); | 673 | BYTE_ALIGN(bs); |
626 | CHECK_BOUND(bs, 1); | 674 | if (nf_h323_error_boundary(bs, 1, 0)) |
675 | return H323_ERROR_BOUND; | ||
627 | count = *bs->cur++; | 676 | count = *bs->cur++; |
628 | break; | 677 | break; |
629 | case WORD: | 678 | case WORD: |
630 | BYTE_ALIGN(bs); | 679 | BYTE_ALIGN(bs); |
631 | CHECK_BOUND(bs, 2); | 680 | if (nf_h323_error_boundary(bs, 2, 0)) |
681 | return H323_ERROR_BOUND; | ||
632 | count = *bs->cur++; | 682 | count = *bs->cur++; |
633 | count <<= 8; | 683 | count <<= 8; |
634 | count += *bs->cur++; | 684 | count += *bs->cur++; |
635 | break; | 685 | break; |
636 | case SEMI: | 686 | case SEMI: |
637 | BYTE_ALIGN(bs); | 687 | BYTE_ALIGN(bs); |
638 | CHECK_BOUND(bs, 2); | 688 | if (nf_h323_error_boundary(bs, 2, 0)) |
689 | return H323_ERROR_BOUND; | ||
639 | count = get_len(bs); | 690 | count = get_len(bs); |
640 | break; | 691 | break; |
641 | default: | 692 | default: |
693 | if (nf_h323_error_boundary(bs, 0, f->sz)) | ||
694 | return H323_ERROR_BOUND; | ||
642 | count = get_bits(bs, f->sz); | 695 | count = get_bits(bs, f->sz); |
643 | break; | 696 | break; |
644 | } | 697 | } |
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, | |||
658 | for (i = 0; i < count; i++) { | 711 | for (i = 0; i < count; i++) { |
659 | if (son->attr & OPEN) { | 712 | if (son->attr & OPEN) { |
660 | BYTE_ALIGN(bs); | 713 | BYTE_ALIGN(bs); |
714 | if (nf_h323_error_boundary(bs, 2, 0)) | ||
715 | return H323_ERROR_BOUND; | ||
661 | len = get_len(bs); | 716 | len = get_len(bs); |
662 | CHECK_BOUND(bs, len); | 717 | if (nf_h323_error_boundary(bs, len, 0)) |
718 | return H323_ERROR_BOUND; | ||
663 | if (!base || !(son->attr & DECODE)) { | 719 | if (!base || !(son->attr & DECODE)) { |
664 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, | 720 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, |
665 | " ", son->name); | 721 | " ", son->name); |
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, | |||
710 | base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; | 766 | base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; |
711 | 767 | ||
712 | /* Decode the choice index number */ | 768 | /* Decode the choice index number */ |
769 | if (nf_h323_error_boundary(bs, 0, 1)) | ||
770 | return H323_ERROR_BOUND; | ||
713 | if ((f->attr & EXT) && get_bit(bs)) { | 771 | if ((f->attr & EXT) && get_bit(bs)) { |
714 | ext = 1; | 772 | ext = 1; |
773 | if (nf_h323_error_boundary(bs, 0, 7)) | ||
774 | return H323_ERROR_BOUND; | ||
715 | type = get_bits(bs, 7) + f->lb; | 775 | type = get_bits(bs, 7) + f->lb; |
716 | } else { | 776 | } else { |
717 | ext = 0; | 777 | ext = 0; |
778 | if (nf_h323_error_boundary(bs, 0, f->sz)) | ||
779 | return H323_ERROR_BOUND; | ||
718 | type = get_bits(bs, f->sz); | 780 | type = get_bits(bs, f->sz); |
719 | if (type >= f->lb) | 781 | if (type >= f->lb) |
720 | return H323_ERROR_RANGE; | 782 | return H323_ERROR_RANGE; |
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, | |||
727 | /* Check Range */ | 789 | /* Check Range */ |
728 | if (type >= f->ub) { /* Newer version? */ | 790 | if (type >= f->ub) { /* Newer version? */ |
729 | BYTE_ALIGN(bs); | 791 | BYTE_ALIGN(bs); |
792 | if (nf_h323_error_boundary(bs, 2, 0)) | ||
793 | return H323_ERROR_BOUND; | ||
730 | len = get_len(bs); | 794 | len = get_len(bs); |
731 | CHECK_BOUND(bs, len); | 795 | if (nf_h323_error_boundary(bs, len, 0)) |
796 | return H323_ERROR_BOUND; | ||
732 | bs->cur += len; | 797 | bs->cur += len; |
733 | return H323_ERROR_NONE; | 798 | return H323_ERROR_NONE; |
734 | } | 799 | } |
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, | |||
742 | 807 | ||
743 | if (ext || (son->attr & OPEN)) { | 808 | if (ext || (son->attr & OPEN)) { |
744 | BYTE_ALIGN(bs); | 809 | BYTE_ALIGN(bs); |
810 | if (nf_h323_error_boundary(bs, len, 0)) | ||
811 | return H323_ERROR_BOUND; | ||
745 | len = get_len(bs); | 812 | len = get_len(bs); |
746 | CHECK_BOUND(bs, len); | 813 | if (nf_h323_error_boundary(bs, len, 0)) |
814 | return H323_ERROR_BOUND; | ||
747 | if (!base || !(son->attr & DECODE)) { | 815 | if (!base || !(son->attr & DECODE)) { |
748 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", | 816 | PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", |
749 | son->name); | 817 | son->name); |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 59c08997bfdf..382d49792f42 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -45,7 +45,6 @@ | |||
45 | #include <net/netfilter/nf_conntrack_zones.h> | 45 | #include <net/netfilter/nf_conntrack_zones.h> |
46 | #include <net/netfilter/nf_conntrack_timestamp.h> | 46 | #include <net/netfilter/nf_conntrack_timestamp.h> |
47 | #include <net/netfilter/nf_conntrack_labels.h> | 47 | #include <net/netfilter/nf_conntrack_labels.h> |
48 | #include <net/netfilter/nf_conntrack_seqadj.h> | ||
49 | #include <net/netfilter/nf_conntrack_synproxy.h> | 48 | #include <net/netfilter/nf_conntrack_synproxy.h> |
50 | #ifdef CONFIG_NF_NAT_NEEDED | 49 | #ifdef CONFIG_NF_NAT_NEEDED |
51 | #include <net/netfilter/nf_nat_core.h> | 50 | #include <net/netfilter/nf_nat_core.h> |
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct, | |||
1566 | static int ctnetlink_change_timeout(struct nf_conn *ct, | 1565 | static int ctnetlink_change_timeout(struct nf_conn *ct, |
1567 | const struct nlattr * const cda[]) | 1566 | const struct nlattr * const cda[]) |
1568 | { | 1567 | { |
1569 | u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); | 1568 | u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; |
1570 | 1569 | ||
1571 | ct->timeout = nfct_time_stamp + timeout * HZ; | 1570 | if (timeout > INT_MAX) |
1571 | timeout = INT_MAX; | ||
1572 | ct->timeout = nfct_time_stamp + (u32)timeout; | ||
1572 | 1573 | ||
1573 | if (test_bit(IPS_DYING_BIT, &ct->status)) | 1574 | if (test_bit(IPS_DYING_BIT, &ct->status)) |
1574 | return -ETIME; | 1575 | return -ETIME; |
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net, | |||
1768 | int err = -EINVAL; | 1769 | int err = -EINVAL; |
1769 | struct nf_conntrack_helper *helper; | 1770 | struct nf_conntrack_helper *helper; |
1770 | struct nf_conn_tstamp *tstamp; | 1771 | struct nf_conn_tstamp *tstamp; |
1772 | u64 timeout; | ||
1771 | 1773 | ||
1772 | ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); | 1774 | ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); |
1773 | if (IS_ERR(ct)) | 1775 | if (IS_ERR(ct)) |
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net, | |||
1776 | if (!cda[CTA_TIMEOUT]) | 1778 | if (!cda[CTA_TIMEOUT]) |
1777 | goto err1; | 1779 | goto err1; |
1778 | 1780 | ||
1779 | ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; | 1781 | timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; |
1782 | if (timeout > INT_MAX) | ||
1783 | timeout = INT_MAX; | ||
1784 | ct->timeout = (u32)timeout + nfct_time_stamp; | ||
1780 | 1785 | ||
1781 | rcu_read_lock(); | 1786 | rcu_read_lock(); |
1782 | if (cda[CTA_HELP]) { | 1787 | if (cda[CTA_HELP]) { |
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b12fc07111d0..37ef35b861f2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct, | |||
1039 | IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && | 1039 | IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && |
1040 | timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) | 1040 | timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) |
1041 | timeout = timeouts[TCP_CONNTRACK_UNACK]; | 1041 | timeout = timeouts[TCP_CONNTRACK_UNACK]; |
1042 | else if (ct->proto.tcp.last_win == 0 && | ||
1043 | timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) | ||
1044 | timeout = timeouts[TCP_CONNTRACK_RETRANS]; | ||
1042 | else | 1045 | else |
1043 | timeout = timeouts[new_state]; | 1046 | timeout = timeouts[new_state]; |
1044 | spin_unlock_bh(&ct->lock); | 1047 | spin_unlock_bh(&ct->lock); |
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d8327b43e4dc..07bd4138c84e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c | |||
@@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, | |||
2072 | continue; | 2072 | continue; |
2073 | 2073 | ||
2074 | list_for_each_entry_rcu(chain, &table->chains, list) { | 2074 | list_for_each_entry_rcu(chain, &table->chains, list) { |
2075 | if (ctx && ctx->chain[0] && | 2075 | if (ctx && ctx->chain && |
2076 | strcmp(ctx->chain, chain->name) != 0) | 2076 | strcmp(ctx->chain, chain->name) != 0) |
2077 | continue; | 2077 | continue; |
2078 | 2078 | ||
@@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) | |||
4665 | { | 4665 | { |
4666 | struct nft_obj_filter *filter = cb->data; | 4666 | struct nft_obj_filter *filter = cb->data; |
4667 | 4667 | ||
4668 | kfree(filter->table); | 4668 | if (filter) { |
4669 | kfree(filter); | 4669 | kfree(filter->table); |
4670 | kfree(filter); | ||
4671 | } | ||
4670 | 4672 | ||
4671 | return 0; | 4673 | return 0; |
4672 | } | 4674 | } |
@@ -5847,6 +5849,12 @@ static int __net_init nf_tables_init_net(struct net *net) | |||
5847 | return 0; | 5849 | return 0; |
5848 | } | 5850 | } |
5849 | 5851 | ||
5852 | static void __net_exit nf_tables_exit_net(struct net *net) | ||
5853 | { | ||
5854 | WARN_ON_ONCE(!list_empty(&net->nft.af_info)); | ||
5855 | WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); | ||
5856 | } | ||
5857 | |||
5850 | int __nft_release_basechain(struct nft_ctx *ctx) | 5858 | int __nft_release_basechain(struct nft_ctx *ctx) |
5851 | { | 5859 | { |
5852 | struct nft_rule *rule, *nr; | 5860 | struct nft_rule *rule, *nr; |
@@ -5917,6 +5925,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) | |||
5917 | 5925 | ||
5918 | static struct pernet_operations nf_tables_net_ops = { | 5926 | static struct pernet_operations nf_tables_net_ops = { |
5919 | .init = nf_tables_init_net, | 5927 | .init = nf_tables_init_net, |
5928 | .exit = nf_tables_exit_net, | ||
5920 | }; | 5929 | }; |
5921 | 5930 | ||
5922 | static int __init nf_tables_module_init(void) | 5931 | static int __init nf_tables_module_init(void) |
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 41628b393673..d33ce6d5ebce 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
18 | #include <linux/list.h> | 18 | #include <linux/list.h> |
19 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
20 | #include <linux/capability.h> | ||
20 | #include <net/netlink.h> | 21 | #include <net/netlink.h> |
21 | #include <net/sock.h> | 22 | #include <net/sock.h> |
22 | 23 | ||
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, | |||
407 | struct nfnl_cthelper *nlcth; | 408 | struct nfnl_cthelper *nlcth; |
408 | int ret = 0; | 409 | int ret = 0; |
409 | 410 | ||
411 | if (!capable(CAP_NET_ADMIN)) | ||
412 | return -EPERM; | ||
413 | |||
410 | if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) | 414 | if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) |
411 | return -EINVAL; | 415 | return -EINVAL; |
412 | 416 | ||
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, | |||
611 | struct nfnl_cthelper *nlcth; | 615 | struct nfnl_cthelper *nlcth; |
612 | bool tuple_set = false; | 616 | bool tuple_set = false; |
613 | 617 | ||
618 | if (!capable(CAP_NET_ADMIN)) | ||
619 | return -EPERM; | ||
620 | |||
614 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | 621 | if (nlh->nlmsg_flags & NLM_F_DUMP) { |
615 | struct netlink_dump_control c = { | 622 | struct netlink_dump_control c = { |
616 | .dump = nfnl_cthelper_dump_table, | 623 | .dump = nfnl_cthelper_dump_table, |
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, | |||
678 | struct nfnl_cthelper *nlcth, *n; | 685 | struct nfnl_cthelper *nlcth, *n; |
679 | int j = 0, ret; | 686 | int j = 0, ret; |
680 | 687 | ||
688 | if (!capable(CAP_NET_ADMIN)) | ||
689 | return -EPERM; | ||
690 | |||
681 | if (tb[NFCTH_NAME]) | 691 | if (tb[NFCTH_NAME]) |
682 | helper_name = nla_data(tb[NFCTH_NAME]); | 692 | helper_name = nla_data(tb[NFCTH_NAME]); |
683 | 693 | ||
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e5afab86381c..e955bec0acc6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net) | |||
1093 | 1093 | ||
1094 | static void __net_exit nfnl_log_net_exit(struct net *net) | 1094 | static void __net_exit nfnl_log_net_exit(struct net *net) |
1095 | { | 1095 | { |
1096 | struct nfnl_log_net *log = nfnl_log_pernet(net); | ||
1097 | unsigned int i; | ||
1098 | |||
1096 | #ifdef CONFIG_PROC_FS | 1099 | #ifdef CONFIG_PROC_FS |
1097 | remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); | 1100 | remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); |
1098 | #endif | 1101 | #endif |
1099 | nf_log_unset(net, &nfulnl_logger); | 1102 | nf_log_unset(net, &nfulnl_logger); |
1103 | for (i = 0; i < INSTANCE_BUCKETS; i++) | ||
1104 | WARN_ON_ONCE(!hlist_empty(&log->instance_table[i])); | ||
1100 | } | 1105 | } |
1101 | 1106 | ||
1102 | static struct pernet_operations nfnl_log_net_ops = { | 1107 | static struct pernet_operations nfnl_log_net_ops = { |
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index a16356cacec3..c09b36755ed7 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net) | |||
1512 | 1512 | ||
1513 | static void __net_exit nfnl_queue_net_exit(struct net *net) | 1513 | static void __net_exit nfnl_queue_net_exit(struct net *net) |
1514 | { | 1514 | { |
1515 | struct nfnl_queue_net *q = nfnl_queue_pernet(net); | ||
1516 | unsigned int i; | ||
1517 | |||
1515 | nf_unregister_queue_handler(net); | 1518 | nf_unregister_queue_handler(net); |
1516 | #ifdef CONFIG_PROC_FS | 1519 | #ifdef CONFIG_PROC_FS |
1517 | remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); | 1520 | remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); |
1518 | #endif | 1521 | #endif |
1522 | for (i = 0; i < INSTANCE_BUCKETS; i++) | ||
1523 | WARN_ON_ONCE(!hlist_empty(&q->instance_table[i])); | ||
1519 | } | 1524 | } |
1520 | 1525 | ||
1521 | static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) | 1526 | static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) |
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index a0a93d987a3b..47ec1046ad11 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c | |||
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { | |||
214 | [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, | 214 | [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, |
215 | [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, | 215 | [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, |
216 | [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, | 216 | [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, |
217 | [NFTA_EXTHDR_OP] = { .type = NLA_U32 }, | ||
218 | [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, | ||
217 | }; | 219 | }; |
218 | 220 | ||
219 | static int nft_exthdr_init(const struct nft_ctx *ctx, | 221 | static int nft_exthdr_init(const struct nft_ctx *ctx, |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index a77dd514297c..55802e97f906 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net) | |||
1729 | return 0; | 1729 | return 0; |
1730 | } | 1730 | } |
1731 | 1731 | ||
1732 | static void __net_exit xt_net_exit(struct net *net) | ||
1733 | { | ||
1734 | int i; | ||
1735 | |||
1736 | for (i = 0; i < NFPROTO_NUMPROTO; i++) | ||
1737 | WARN_ON_ONCE(!list_empty(&net->xt.tables[i])); | ||
1738 | } | ||
1739 | |||
1732 | static struct pernet_operations xt_net_ops = { | 1740 | static struct pernet_operations xt_net_ops = { |
1733 | .init = xt_net_init, | 1741 | .init = xt_net_init, |
1742 | .exit = xt_net_exit, | ||
1734 | }; | 1743 | }; |
1735 | 1744 | ||
1736 | static int __init xt_init(void) | 1745 | static int __init xt_init(void) |
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 041da0d9c06f..06b090d8e901 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c | |||
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, | |||
27 | { | 27 | { |
28 | struct sock_fprog_kern program; | 28 | struct sock_fprog_kern program; |
29 | 29 | ||
30 | if (len > XT_BPF_MAX_NUM_INSTR) | ||
31 | return -EINVAL; | ||
32 | |||
30 | program.len = len; | 33 | program.len = len; |
31 | program.filter = insns; | 34 | program.filter = insns; |
32 | 35 | ||
@@ -52,18 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) | |||
52 | 55 | ||
53 | static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) | 56 | static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) |
54 | { | 57 | { |
55 | mm_segment_t oldfs = get_fs(); | 58 | if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) |
56 | int retval, fd; | 59 | return -EINVAL; |
57 | 60 | ||
58 | set_fs(KERNEL_DS); | 61 | *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); |
59 | fd = bpf_obj_get_user(path, 0); | 62 | return PTR_ERR_OR_ZERO(*ret); |
60 | set_fs(oldfs); | ||
61 | if (fd < 0) | ||
62 | return fd; | ||
63 | |||
64 | retval = __bpf_mt_check_fd(fd, ret); | ||
65 | sys_close(fd); | ||
66 | return retval; | ||
67 | } | 63 | } |
68 | 64 | ||
69 | static int bpf_mt_check(const struct xt_mtchk_param *par) | 65 | static int bpf_mt_check(const struct xt_mtchk_param *par) |
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 36e14b1f061d..a34f314a8c23 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
21 | 21 | ||
22 | #include <linux/capability.h> | ||
22 | #include <linux/if.h> | 23 | #include <linux/if.h> |
23 | #include <linux/inetdevice.h> | 24 | #include <linux/inetdevice.h> |
24 | #include <linux/ip.h> | 25 | #include <linux/ip.h> |
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl, | |||
70 | struct xt_osf_finger *kf = NULL, *sf; | 71 | struct xt_osf_finger *kf = NULL, *sf; |
71 | int err = 0; | 72 | int err = 0; |
72 | 73 | ||
74 | if (!capable(CAP_NET_ADMIN)) | ||
75 | return -EPERM; | ||
76 | |||
73 | if (!osf_attrs[OSF_ATTR_FINGER]) | 77 | if (!osf_attrs[OSF_ATTR_FINGER]) |
74 | return -EINVAL; | 78 | return -EINVAL; |
75 | 79 | ||
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, | |||
115 | struct xt_osf_finger *sf; | 119 | struct xt_osf_finger *sf; |
116 | int err = -ENOENT; | 120 | int err = -ENOENT; |
117 | 121 | ||
122 | if (!capable(CAP_NET_ADMIN)) | ||
123 | return -EPERM; | ||
124 | |||
118 | if (!osf_attrs[OSF_ATTR_FINGER]) | 125 | if (!osf_attrs[OSF_ATTR_FINGER]) |
119 | return -EINVAL; | 126 | return -EINVAL; |
120 | 127 | ||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b9e0ee4e22f5..84a4e4c3be4b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, | |||
253 | struct sock *sk = skb->sk; | 253 | struct sock *sk = skb->sk; |
254 | int ret = -ENOMEM; | 254 | int ret = -ENOMEM; |
255 | 255 | ||
256 | if (!net_eq(dev_net(dev), sock_net(sk))) | ||
257 | return 0; | ||
258 | |||
256 | dev_hold(dev); | 259 | dev_hold(dev); |
257 | 260 | ||
258 | if (is_vmalloc_addr(skb->head)) | 261 | if (is_vmalloc_addr(skb->head)) |
@@ -2381,13 +2384,14 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | |||
2381 | struct nlmsghdr *, | 2384 | struct nlmsghdr *, |
2382 | struct netlink_ext_ack *)) | 2385 | struct netlink_ext_ack *)) |
2383 | { | 2386 | { |
2384 | struct netlink_ext_ack extack = {}; | 2387 | struct netlink_ext_ack extack; |
2385 | struct nlmsghdr *nlh; | 2388 | struct nlmsghdr *nlh; |
2386 | int err; | 2389 | int err; |
2387 | 2390 | ||
2388 | while (skb->len >= nlmsg_total_size(0)) { | 2391 | while (skb->len >= nlmsg_total_size(0)) { |
2389 | int msglen; | 2392 | int msglen; |
2390 | 2393 | ||
2394 | memset(&extack, 0, sizeof(extack)); | ||
2391 | nlh = nlmsg_hdr(skb); | 2395 | nlh = nlmsg_hdr(skb); |
2392 | err = 0; | 2396 | err = 0; |
2393 | 2397 | ||
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index fb7afcaa3004..985909f105eb 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c | |||
@@ -531,7 +531,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, | |||
531 | return 0; | 531 | return 0; |
532 | } | 532 | } |
533 | 533 | ||
534 | static inline unsigned int llcp_accept_poll(struct sock *parent) | 534 | static inline __poll_t llcp_accept_poll(struct sock *parent) |
535 | { | 535 | { |
536 | struct nfc_llcp_sock *llcp_sock, *parent_sock; | 536 | struct nfc_llcp_sock *llcp_sock, *parent_sock; |
537 | struct sock *sk; | 537 | struct sock *sk; |
@@ -549,11 +549,11 @@ static inline unsigned int llcp_accept_poll(struct sock *parent) | |||
549 | return 0; | 549 | return 0; |
550 | } | 550 | } |
551 | 551 | ||
552 | static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, | 552 | static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, |
553 | poll_table *wait) | 553 | poll_table *wait) |
554 | { | 554 | { |
555 | struct sock *sk = sock->sk; | 555 | struct sock *sk = sock->sk; |
556 | unsigned int mask = 0; | 556 | __poll_t mask = 0; |
557 | 557 | ||
558 | pr_debug("%p\n", sk); | 558 | pr_debug("%p\n", sk); |
559 | 559 | ||
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 8d104c1db628..a66f102c6c01 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c | |||
@@ -305,7 +305,7 @@ static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file, | |||
305 | return 0; | 305 | return 0; |
306 | } | 306 | } |
307 | 307 | ||
308 | static unsigned int nci_uart_tty_poll(struct tty_struct *tty, | 308 | static __poll_t nci_uart_tty_poll(struct tty_struct *tty, |
309 | struct file *filp, poll_table *wait) | 309 | struct file *filp, poll_table *wait) |
310 | { | 310 | { |
311 | return 0; | 311 | return 0; |
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 99cfafc2a139..ef38e5aecd28 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c | |||
@@ -308,7 +308,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, | |||
308 | const struct dp_upcall_info *upcall_info, | 308 | const struct dp_upcall_info *upcall_info, |
309 | uint32_t cutlen) | 309 | uint32_t cutlen) |
310 | { | 310 | { |
311 | unsigned short gso_type = skb_shinfo(skb)->gso_type; | 311 | unsigned int gso_type = skb_shinfo(skb)->gso_type; |
312 | struct sw_flow_key later_key; | 312 | struct sw_flow_key later_key; |
313 | struct sk_buff *segs, *nskb; | 313 | struct sk_buff *segs, *nskb; |
314 | int err; | 314 | int err; |
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dbe2379329c5..f039064ce922 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c | |||
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) | |||
579 | return -EINVAL; | 579 | return -EINVAL; |
580 | 580 | ||
581 | skb_reset_network_header(skb); | 581 | skb_reset_network_header(skb); |
582 | key->eth.type = skb->protocol; | ||
582 | } else { | 583 | } else { |
583 | eth = eth_hdr(skb); | 584 | eth = eth_hdr(skb); |
584 | ether_addr_copy(key->eth.src, eth->h_source); | 585 | ether_addr_copy(key->eth.src, eth->h_source); |
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) | |||
592 | if (unlikely(parse_vlan(skb, key))) | 593 | if (unlikely(parse_vlan(skb, key))) |
593 | return -ENOMEM; | 594 | return -ENOMEM; |
594 | 595 | ||
595 | skb->protocol = parse_ethertype(skb); | 596 | key->eth.type = parse_ethertype(skb); |
596 | if (unlikely(skb->protocol == htons(0))) | 597 | if (unlikely(key->eth.type == htons(0))) |
597 | return -ENOMEM; | 598 | return -ENOMEM; |
598 | 599 | ||
600 | /* Multiple tagged packets need to retain TPID to satisfy | ||
601 | * skb_vlan_pop(), which will later shift the ethertype into | ||
602 | * skb->protocol. | ||
603 | */ | ||
604 | if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT)) | ||
605 | skb->protocol = key->eth.cvlan.tpid; | ||
606 | else | ||
607 | skb->protocol = key->eth.type; | ||
608 | |||
599 | skb_reset_network_header(skb); | 609 | skb_reset_network_header(skb); |
600 | __skb_push(skb, skb->data - skb_mac_header(skb)); | 610 | __skb_push(skb, skb->data - skb_mac_header(skb)); |
601 | } | 611 | } |
602 | skb_reset_mac_len(skb); | 612 | skb_reset_mac_len(skb); |
603 | key->eth.type = skb->protocol; | ||
604 | 613 | ||
605 | /* Network layer. */ | 614 | /* Network layer. */ |
606 | if (key->eth.type == htons(ETH_P_IP)) { | 615 | if (key->eth.type == htons(ETH_P_IP)) { |
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dc424798ba6f..f143908b651d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include <net/mpls.h> | 49 | #include <net/mpls.h> |
50 | #include <net/vxlan.h> | 50 | #include <net/vxlan.h> |
51 | #include <net/tun_proto.h> | 51 | #include <net/tun_proto.h> |
52 | #include <net/erspan.h> | ||
53 | 52 | ||
54 | #include "flow_netlink.h" | 53 | #include "flow_netlink.h" |
55 | 54 | ||
@@ -334,8 +333,7 @@ size_t ovs_tun_key_attr_size(void) | |||
334 | * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. | 333 | * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. |
335 | */ | 334 | */ |
336 | + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ | 335 | + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ |
337 | + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */ | 336 | + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ |
338 | + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ | ||
339 | } | 337 | } |
340 | 338 | ||
341 | static size_t ovs_nsh_key_attr_size(void) | 339 | static size_t ovs_nsh_key_attr_size(void) |
@@ -402,7 +400,6 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] | |||
402 | .next = ovs_vxlan_ext_key_lens }, | 400 | .next = ovs_vxlan_ext_key_lens }, |
403 | [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, | 401 | [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, |
404 | [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, | 402 | [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, |
405 | [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, | ||
406 | }; | 403 | }; |
407 | 404 | ||
408 | static const struct ovs_len_tbl | 405 | static const struct ovs_len_tbl |
@@ -634,33 +631,6 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, | |||
634 | return 0; | 631 | return 0; |
635 | } | 632 | } |
636 | 633 | ||
637 | static int erspan_tun_opt_from_nlattr(const struct nlattr *attr, | ||
638 | struct sw_flow_match *match, bool is_mask, | ||
639 | bool log) | ||
640 | { | ||
641 | unsigned long opt_key_offset; | ||
642 | struct erspan_metadata opts; | ||
643 | |||
644 | BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); | ||
645 | |||
646 | memset(&opts, 0, sizeof(opts)); | ||
647 | opts.index = nla_get_be32(attr); | ||
648 | |||
649 | /* Index has only 20-bit */ | ||
650 | if (ntohl(opts.index) & ~INDEX_MASK) { | ||
651 | OVS_NLERR(log, "ERSPAN index number %x too large.", | ||
652 | ntohl(opts.index)); | ||
653 | return -EINVAL; | ||
654 | } | ||
655 | |||
656 | SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask); | ||
657 | opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); | ||
658 | SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), | ||
659 | is_mask); | ||
660 | |||
661 | return 0; | ||
662 | } | ||
663 | |||
664 | static int ip_tun_from_nlattr(const struct nlattr *attr, | 634 | static int ip_tun_from_nlattr(const struct nlattr *attr, |
665 | struct sw_flow_match *match, bool is_mask, | 635 | struct sw_flow_match *match, bool is_mask, |
666 | bool log) | 636 | bool log) |
@@ -768,19 +738,6 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, | |||
768 | break; | 738 | break; |
769 | case OVS_TUNNEL_KEY_ATTR_PAD: | 739 | case OVS_TUNNEL_KEY_ATTR_PAD: |
770 | break; | 740 | break; |
771 | case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: | ||
772 | if (opts_type) { | ||
773 | OVS_NLERR(log, "Multiple metadata blocks provided"); | ||
774 | return -EINVAL; | ||
775 | } | ||
776 | |||
777 | err = erspan_tun_opt_from_nlattr(a, match, is_mask, log); | ||
778 | if (err) | ||
779 | return err; | ||
780 | |||
781 | tun_flags |= TUNNEL_ERSPAN_OPT; | ||
782 | opts_type = type; | ||
783 | break; | ||
784 | default: | 741 | default: |
785 | OVS_NLERR(log, "Unknown IP tunnel attribute %d", | 742 | OVS_NLERR(log, "Unknown IP tunnel attribute %d", |
786 | type); | 743 | type); |
@@ -905,10 +862,6 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, | |||
905 | else if (output->tun_flags & TUNNEL_VXLAN_OPT && | 862 | else if (output->tun_flags & TUNNEL_VXLAN_OPT && |
906 | vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) | 863 | vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) |
907 | return -EMSGSIZE; | 864 | return -EMSGSIZE; |
908 | else if (output->tun_flags & TUNNEL_ERSPAN_OPT && | ||
909 | nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, | ||
910 | ((struct erspan_metadata *)tun_opts)->index)) | ||
911 | return -EMSGSIZE; | ||
912 | } | 865 | } |
913 | 866 | ||
914 | return 0; | 867 | return 0; |
@@ -2241,14 +2194,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) | |||
2241 | 2194 | ||
2242 | #define MAX_ACTIONS_BUFSIZE (32 * 1024) | 2195 | #define MAX_ACTIONS_BUFSIZE (32 * 1024) |
2243 | 2196 | ||
2244 | static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) | 2197 | static struct sw_flow_actions *nla_alloc_flow_actions(int size) |
2245 | { | 2198 | { |
2246 | struct sw_flow_actions *sfa; | 2199 | struct sw_flow_actions *sfa; |
2247 | 2200 | ||
2248 | if (size > MAX_ACTIONS_BUFSIZE) { | 2201 | WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); |
2249 | OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); | ||
2250 | return ERR_PTR(-EINVAL); | ||
2251 | } | ||
2252 | 2202 | ||
2253 | sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); | 2203 | sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); |
2254 | if (!sfa) | 2204 | if (!sfa) |
@@ -2321,12 +2271,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, | |||
2321 | new_acts_size = ksize(*sfa) * 2; | 2271 | new_acts_size = ksize(*sfa) * 2; |
2322 | 2272 | ||
2323 | if (new_acts_size > MAX_ACTIONS_BUFSIZE) { | 2273 | if (new_acts_size > MAX_ACTIONS_BUFSIZE) { |
2324 | if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) | 2274 | if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { |
2275 | OVS_NLERR(log, "Flow action size exceeds max %u", | ||
2276 | MAX_ACTIONS_BUFSIZE); | ||
2325 | return ERR_PTR(-EMSGSIZE); | 2277 | return ERR_PTR(-EMSGSIZE); |
2278 | } | ||
2326 | new_acts_size = MAX_ACTIONS_BUFSIZE; | 2279 | new_acts_size = MAX_ACTIONS_BUFSIZE; |
2327 | } | 2280 | } |
2328 | 2281 | ||
2329 | acts = nla_alloc_flow_actions(new_acts_size, log); | 2282 | acts = nla_alloc_flow_actions(new_acts_size); |
2330 | if (IS_ERR(acts)) | 2283 | if (IS_ERR(acts)) |
2331 | return (void *)acts; | 2284 | return (void *)acts; |
2332 | 2285 | ||
@@ -2533,8 +2486,6 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, | |||
2533 | break; | 2486 | break; |
2534 | case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: | 2487 | case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: |
2535 | break; | 2488 | break; |
2536 | case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: | ||
2537 | break; | ||
2538 | } | 2489 | } |
2539 | }; | 2490 | }; |
2540 | 2491 | ||
@@ -3059,7 +3010,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, | |||
3059 | { | 3010 | { |
3060 | int err; | 3011 | int err; |
3061 | 3012 | ||
3062 | *sfa = nla_alloc_flow_actions(nla_len(attr), log); | 3013 | *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); |
3063 | if (IS_ERR(*sfa)) | 3014 | if (IS_ERR(*sfa)) |
3064 | return PTR_ERR(*sfa); | 3015 | return PTR_ERR(*sfa); |
3065 | 3016 | ||
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 737092ca9b4e..3b4d6a3cf190 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -1687,7 +1687,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | |||
1687 | atomic_long_set(&rollover->num, 0); | 1687 | atomic_long_set(&rollover->num, 0); |
1688 | atomic_long_set(&rollover->num_huge, 0); | 1688 | atomic_long_set(&rollover->num_huge, 0); |
1689 | atomic_long_set(&rollover->num_failed, 0); | 1689 | atomic_long_set(&rollover->num_failed, 0); |
1690 | po->rollover = rollover; | ||
1691 | } | 1690 | } |
1692 | 1691 | ||
1693 | if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { | 1692 | if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { |
@@ -1745,6 +1744,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | |||
1745 | if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { | 1744 | if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { |
1746 | __dev_remove_pack(&po->prot_hook); | 1745 | __dev_remove_pack(&po->prot_hook); |
1747 | po->fanout = match; | 1746 | po->fanout = match; |
1747 | po->rollover = rollover; | ||
1748 | rollover = NULL; | ||
1748 | refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); | 1749 | refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); |
1749 | __fanout_link(sk, po); | 1750 | __fanout_link(sk, po); |
1750 | err = 0; | 1751 | err = 0; |
@@ -1758,10 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | |||
1758 | } | 1759 | } |
1759 | 1760 | ||
1760 | out: | 1761 | out: |
1761 | if (err && rollover) { | 1762 | kfree(rollover); |
1762 | kfree_rcu(rollover, rcu); | ||
1763 | po->rollover = NULL; | ||
1764 | } | ||
1765 | mutex_unlock(&fanout_mutex); | 1763 | mutex_unlock(&fanout_mutex); |
1766 | return err; | 1764 | return err; |
1767 | } | 1765 | } |
@@ -1785,11 +1783,6 @@ static struct packet_fanout *fanout_release(struct sock *sk) | |||
1785 | list_del(&f->list); | 1783 | list_del(&f->list); |
1786 | else | 1784 | else |
1787 | f = NULL; | 1785 | f = NULL; |
1788 | |||
1789 | if (po->rollover) { | ||
1790 | kfree_rcu(po->rollover, rcu); | ||
1791 | po->rollover = NULL; | ||
1792 | } | ||
1793 | } | 1786 | } |
1794 | mutex_unlock(&fanout_mutex); | 1787 | mutex_unlock(&fanout_mutex); |
1795 | 1788 | ||
@@ -3029,6 +3022,7 @@ static int packet_release(struct socket *sock) | |||
3029 | synchronize_net(); | 3022 | synchronize_net(); |
3030 | 3023 | ||
3031 | if (f) { | 3024 | if (f) { |
3025 | kfree(po->rollover); | ||
3032 | fanout_release_data(f); | 3026 | fanout_release_data(f); |
3033 | kfree(f); | 3027 | kfree(f); |
3034 | } | 3028 | } |
@@ -3097,6 +3091,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, | |||
3097 | if (need_rehook) { | 3091 | if (need_rehook) { |
3098 | if (po->running) { | 3092 | if (po->running) { |
3099 | rcu_read_unlock(); | 3093 | rcu_read_unlock(); |
3094 | /* prevents packet_notifier() from calling | ||
3095 | * register_prot_hook() | ||
3096 | */ | ||
3097 | po->num = 0; | ||
3100 | __unregister_prot_hook(sk, true); | 3098 | __unregister_prot_hook(sk, true); |
3101 | rcu_read_lock(); | 3099 | rcu_read_lock(); |
3102 | dev_curr = po->prot_hook.dev; | 3100 | dev_curr = po->prot_hook.dev; |
@@ -3105,6 +3103,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, | |||
3105 | dev->ifindex); | 3103 | dev->ifindex); |
3106 | } | 3104 | } |
3107 | 3105 | ||
3106 | BUG_ON(po->running); | ||
3108 | po->num = proto; | 3107 | po->num = proto; |
3109 | po->prot_hook.type = proto; | 3108 | po->prot_hook.type = proto; |
3110 | 3109 | ||
@@ -3843,7 +3842,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
3843 | void *data = &val; | 3842 | void *data = &val; |
3844 | union tpacket_stats_u st; | 3843 | union tpacket_stats_u st; |
3845 | struct tpacket_rollover_stats rstats; | 3844 | struct tpacket_rollover_stats rstats; |
3846 | struct packet_rollover *rollover; | ||
3847 | 3845 | ||
3848 | if (level != SOL_PACKET) | 3846 | if (level != SOL_PACKET) |
3849 | return -ENOPROTOOPT; | 3847 | return -ENOPROTOOPT; |
@@ -3922,18 +3920,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
3922 | 0); | 3920 | 0); |
3923 | break; | 3921 | break; |
3924 | case PACKET_ROLLOVER_STATS: | 3922 | case PACKET_ROLLOVER_STATS: |
3925 | rcu_read_lock(); | 3923 | if (!po->rollover) |
3926 | rollover = rcu_dereference(po->rollover); | ||
3927 | if (rollover) { | ||
3928 | rstats.tp_all = atomic_long_read(&rollover->num); | ||
3929 | rstats.tp_huge = atomic_long_read(&rollover->num_huge); | ||
3930 | rstats.tp_failed = atomic_long_read(&rollover->num_failed); | ||
3931 | data = &rstats; | ||
3932 | lv = sizeof(rstats); | ||
3933 | } | ||
3934 | rcu_read_unlock(); | ||
3935 | if (!rollover) | ||
3936 | return -EINVAL; | 3924 | return -EINVAL; |
3925 | rstats.tp_all = atomic_long_read(&po->rollover->num); | ||
3926 | rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); | ||
3927 | rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); | ||
3928 | data = &rstats; | ||
3929 | lv = sizeof(rstats); | ||
3937 | break; | 3930 | break; |
3938 | case PACKET_TX_HAS_OFF: | 3931 | case PACKET_TX_HAS_OFF: |
3939 | val = po->tp_tx_has_off; | 3932 | val = po->tp_tx_has_off; |
@@ -4080,12 +4073,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, | |||
4080 | return 0; | 4073 | return 0; |
4081 | } | 4074 | } |
4082 | 4075 | ||
4083 | static unsigned int packet_poll(struct file *file, struct socket *sock, | 4076 | static __poll_t packet_poll(struct file *file, struct socket *sock, |
4084 | poll_table *wait) | 4077 | poll_table *wait) |
4085 | { | 4078 | { |
4086 | struct sock *sk = sock->sk; | 4079 | struct sock *sk = sock->sk; |
4087 | struct packet_sock *po = pkt_sk(sk); | 4080 | struct packet_sock *po = pkt_sk(sk); |
4088 | unsigned int mask = datagram_poll(file, sock, wait); | 4081 | __poll_t mask = datagram_poll(file, sock, wait); |
4089 | 4082 | ||
4090 | spin_lock_bh(&sk->sk_receive_queue.lock); | 4083 | spin_lock_bh(&sk->sk_receive_queue.lock); |
4091 | if (po->rx_ring.pg_vec) { | 4084 | if (po->rx_ring.pg_vec) { |
diff --git a/net/packet/internal.h b/net/packet/internal.h index 562fbc155006..a1d2b2319ae9 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h | |||
@@ -95,7 +95,6 @@ struct packet_fanout { | |||
95 | 95 | ||
96 | struct packet_rollover { | 96 | struct packet_rollover { |
97 | int sock; | 97 | int sock; |
98 | struct rcu_head rcu; | ||
99 | atomic_long_t num; | 98 | atomic_long_t num; |
100 | atomic_long_t num_huge; | 99 | atomic_long_t num_huge; |
101 | atomic_long_t num_failed; | 100 | atomic_long_t num_failed; |
diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 1b050dd17393..44417480dab7 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c | |||
@@ -341,12 +341,12 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, | |||
341 | return 0; | 341 | return 0; |
342 | } | 342 | } |
343 | 343 | ||
344 | static unsigned int pn_socket_poll(struct file *file, struct socket *sock, | 344 | static __poll_t pn_socket_poll(struct file *file, struct socket *sock, |
345 | poll_table *wait) | 345 | poll_table *wait) |
346 | { | 346 | { |
347 | struct sock *sk = sock->sk; | 347 | struct sock *sk = sock->sk; |
348 | struct pep_sock *pn = pep_sk(sk); | 348 | struct pep_sock *pn = pep_sk(sk); |
349 | unsigned int mask = 0; | 349 | __poll_t mask = 0; |
350 | 350 | ||
351 | poll_wait(file, sk_sleep(sk), wait); | 351 | poll_wait(file, sk_sleep(sk), wait); |
352 | 352 | ||
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index b405f77d664c..88aa8ad0f5b6 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c | |||
@@ -152,12 +152,12 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, | |||
152 | * to send to a congested destination, the system call may still fail (and | 152 | * to send to a congested destination, the system call may still fail (and |
153 | * return ENOBUFS). | 153 | * return ENOBUFS). |
154 | */ | 154 | */ |
155 | static unsigned int rds_poll(struct file *file, struct socket *sock, | 155 | static __poll_t rds_poll(struct file *file, struct socket *sock, |
156 | poll_table *wait) | 156 | poll_table *wait) |
157 | { | 157 | { |
158 | struct sock *sk = sock->sk; | 158 | struct sock *sk = sock->sk; |
159 | struct rds_sock *rs = rds_sk_to_rs(sk); | 159 | struct rds_sock *rs = rds_sk_to_rs(sk); |
160 | unsigned int mask = 0; | 160 | __poll_t mask = 0; |
161 | unsigned long flags; | 161 | unsigned long flags; |
162 | 162 | ||
163 | poll_wait(file, sk_sleep(sk), wait); | 163 | poll_wait(file, sk_sleep(sk), wait); |
diff --git a/net/rds/ib.c b/net/rds/ib.c index 36dd2099048a..b2a5067b4afe 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -301,13 +301,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, | |||
301 | memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); | 301 | memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); |
302 | if (rds_conn_state(conn) == RDS_CONN_UP) { | 302 | if (rds_conn_state(conn) == RDS_CONN_UP) { |
303 | struct rds_ib_device *rds_ibdev; | 303 | struct rds_ib_device *rds_ibdev; |
304 | struct rdma_dev_addr *dev_addr; | ||
305 | 304 | ||
306 | ic = conn->c_transport_data; | 305 | ic = conn->c_transport_data; |
307 | dev_addr = &ic->i_cm_id->route.addr.dev_addr; | ||
308 | 306 | ||
309 | rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); | 307 | rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, |
310 | rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); | 308 | (union ib_gid *)&iinfo->dst_gid); |
311 | 309 | ||
312 | rds_ibdev = ic->rds_ibdev; | 310 | rds_ibdev = ic->rds_ibdev; |
313 | iinfo->max_send_wr = ic->i_send_ring.w_nr; | 311 | iinfo->max_send_wr = ic->i_send_ring.w_nr; |
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8886f15abe90..634cfcb7bba6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
183 | long i; | 183 | long i; |
184 | int ret; | 184 | int ret; |
185 | 185 | ||
186 | if (rs->rs_bound_addr == 0) { | 186 | if (rs->rs_bound_addr == 0 || !rs->rs_transport) { |
187 | ret = -ENOTCONN; /* XXX not a great errno */ | 187 | ret = -ENOTCONN; /* XXX not a great errno */ |
188 | goto out; | 188 | goto out; |
189 | } | 189 | } |
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) | |||
525 | 525 | ||
526 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | 526 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; |
527 | 527 | ||
528 | if (args->nr_local == 0) | ||
529 | return -EINVAL; | ||
530 | |||
528 | /* figure out the number of pages in the vector */ | 531 | /* figure out the number of pages in the vector */ |
529 | for (i = 0; i < args->nr_local; i++) { | 532 | for (i = 0; i < args->nr_local; i++) { |
530 | if (copy_from_user(&vec, &local_vec[i], | 533 | if (copy_from_user(&vec, &local_vec[i], |
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, | |||
874 | err: | 877 | err: |
875 | if (page) | 878 | if (page) |
876 | put_page(page); | 879 | put_page(page); |
880 | rm->atomic.op_active = 0; | ||
877 | kfree(rm->atomic.op_notifier); | 881 | kfree(rm->atomic.op_notifier); |
878 | 882 | ||
879 | return ret; | 883 | return ret; |
diff --git a/net/rds/send.c b/net/rds/send.c index b52cdc8ae428..f72466c63f0c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes) | |||
1009 | continue; | 1009 | continue; |
1010 | 1010 | ||
1011 | if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { | 1011 | if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { |
1012 | if (cmsg->cmsg_len < | ||
1013 | CMSG_LEN(sizeof(struct rds_rdma_args))) | ||
1014 | return -EINVAL; | ||
1012 | args = CMSG_DATA(cmsg); | 1015 | args = CMSG_DATA(cmsg); |
1013 | *rdma_bytes += args->remote_vec.bytes; | 1016 | *rdma_bytes += args->remote_vec.bytes; |
1014 | } | 1017 | } |
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 6b7ee71f40c6..ab7356e0ba83 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c | |||
@@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock) | |||
90 | sizeof(val)); | 90 | sizeof(val)); |
91 | } | 91 | } |
92 | 92 | ||
93 | u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) | 93 | u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) |
94 | { | 94 | { |
95 | return tcp_sk(tc->t_sock->sk)->snd_nxt; | 95 | /* seq# of the last byte of data in tcp send buffer */ |
96 | return tcp_sk(tc->t_sock->sk)->write_seq; | ||
96 | } | 97 | } |
97 | 98 | ||
98 | u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) | 99 | u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) |
diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 1aafbf7c3011..864ca7d8f019 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h | |||
@@ -54,7 +54,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); | |||
54 | void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); | 54 | void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); |
55 | void rds_tcp_restore_callbacks(struct socket *sock, | 55 | void rds_tcp_restore_callbacks(struct socket *sock, |
56 | struct rds_tcp_connection *tc); | 56 | struct rds_tcp_connection *tc); |
57 | u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); | 57 | u32 rds_tcp_write_seq(struct rds_tcp_connection *tc); |
58 | u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); | 58 | u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); |
59 | u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); | 59 | u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); |
60 | extern struct rds_transport rds_tcp_transport; | 60 | extern struct rds_transport rds_tcp_transport; |
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index dc860d1bb608..9b76e0fa1722 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c | |||
@@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
86 | * m_ack_seq is set to the sequence number of the last byte of | 86 | * m_ack_seq is set to the sequence number of the last byte of |
87 | * header and data. see rds_tcp_is_acked(). | 87 | * header and data. see rds_tcp_is_acked(). |
88 | */ | 88 | */ |
89 | tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc); | 89 | tc->t_last_sent_nxt = rds_tcp_write_seq(tc); |
90 | rm->m_ack_seq = tc->t_last_sent_nxt + | 90 | rm->m_ack_seq = tc->t_last_sent_nxt + |
91 | sizeof(struct rds_header) + | 91 | sizeof(struct rds_header) + |
92 | be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; | 92 | be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; |
@@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
98 | rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; | 98 | rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; |
99 | 99 | ||
100 | rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", | 100 | rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", |
101 | rm, rds_tcp_snd_nxt(tc), | 101 | rm, rds_tcp_write_seq(tc), |
102 | (unsigned long long)rm->m_ack_seq); | 102 | (unsigned long long)rm->m_ack_seq); |
103 | } | 103 | } |
104 | 104 | ||
diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 2064c3a35ef8..124c77e9d058 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c | |||
@@ -1139,10 +1139,10 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) | |||
1139 | return -ENOMEM; | 1139 | return -ENOMEM; |
1140 | } | 1140 | } |
1141 | 1141 | ||
1142 | static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) | 1142 | static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait) |
1143 | { | 1143 | { |
1144 | struct rfkill_data *data = file->private_data; | 1144 | struct rfkill_data *data = file->private_data; |
1145 | unsigned int res = POLLOUT | POLLWRNORM; | 1145 | __poll_t res = POLLOUT | POLLWRNORM; |
1146 | 1146 | ||
1147 | poll_wait(file, &data->read_wait, wait); | 1147 | poll_wait(file, &data->read_wait, wait); |
1148 | 1148 | ||
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9b5c46b052fd..21ad6a3a465c 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c | |||
@@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, | |||
285 | bool upgrade) | 285 | bool upgrade) |
286 | { | 286 | { |
287 | struct rxrpc_conn_parameters cp; | 287 | struct rxrpc_conn_parameters cp; |
288 | struct rxrpc_call_params p; | ||
288 | struct rxrpc_call *call; | 289 | struct rxrpc_call *call; |
289 | struct rxrpc_sock *rx = rxrpc_sk(sock->sk); | 290 | struct rxrpc_sock *rx = rxrpc_sk(sock->sk); |
290 | int ret; | 291 | int ret; |
@@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, | |||
302 | if (key && !key->payload.data[0]) | 303 | if (key && !key->payload.data[0]) |
303 | key = NULL; /* a no-security key */ | 304 | key = NULL; /* a no-security key */ |
304 | 305 | ||
306 | memset(&p, 0, sizeof(p)); | ||
307 | p.user_call_ID = user_call_ID; | ||
308 | p.tx_total_len = tx_total_len; | ||
309 | |||
305 | memset(&cp, 0, sizeof(cp)); | 310 | memset(&cp, 0, sizeof(cp)); |
306 | cp.local = rx->local; | 311 | cp.local = rx->local; |
307 | cp.key = key; | 312 | cp.key = key; |
@@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, | |||
309 | cp.exclusive = false; | 314 | cp.exclusive = false; |
310 | cp.upgrade = upgrade; | 315 | cp.upgrade = upgrade; |
311 | cp.service_id = srx->srx_service; | 316 | cp.service_id = srx->srx_service; |
312 | call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, | 317 | call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp); |
313 | gfp); | ||
314 | /* The socket has been unlocked. */ | 318 | /* The socket has been unlocked. */ |
315 | if (!IS_ERR(call)) { | 319 | if (!IS_ERR(call)) { |
316 | call->notify_rx = notify_rx; | 320 | call->notify_rx = notify_rx; |
@@ -725,12 +729,12 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname, | |||
725 | /* | 729 | /* |
726 | * permit an RxRPC socket to be polled | 730 | * permit an RxRPC socket to be polled |
727 | */ | 731 | */ |
728 | static unsigned int rxrpc_poll(struct file *file, struct socket *sock, | 732 | static __poll_t rxrpc_poll(struct file *file, struct socket *sock, |
729 | poll_table *wait) | 733 | poll_table *wait) |
730 | { | 734 | { |
731 | struct sock *sk = sock->sk; | 735 | struct sock *sk = sock->sk; |
732 | struct rxrpc_sock *rx = rxrpc_sk(sk); | 736 | struct rxrpc_sock *rx = rxrpc_sk(sk); |
733 | unsigned int mask; | 737 | __poll_t mask; |
734 | 738 | ||
735 | sock_poll_wait(file, sk_sleep(sk), wait); | 739 | sock_poll_wait(file, sk_sleep(sk), wait); |
736 | mask = 0; | 740 | mask = 0; |
@@ -856,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk) | |||
856 | static int rxrpc_release_sock(struct sock *sk) | 860 | static int rxrpc_release_sock(struct sock *sk) |
857 | { | 861 | { |
858 | struct rxrpc_sock *rx = rxrpc_sk(sk); | 862 | struct rxrpc_sock *rx = rxrpc_sk(sk); |
863 | struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); | ||
859 | 864 | ||
860 | _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); | 865 | _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); |
861 | 866 | ||
@@ -863,6 +868,19 @@ static int rxrpc_release_sock(struct sock *sk) | |||
863 | sock_orphan(sk); | 868 | sock_orphan(sk); |
864 | sk->sk_shutdown = SHUTDOWN_MASK; | 869 | sk->sk_shutdown = SHUTDOWN_MASK; |
865 | 870 | ||
871 | /* We want to kill off all connections from a service socket | ||
872 | * as fast as possible because we can't share these; client | ||
873 | * sockets, on the other hand, can share an endpoint. | ||
874 | */ | ||
875 | switch (sk->sk_state) { | ||
876 | case RXRPC_SERVER_BOUND: | ||
877 | case RXRPC_SERVER_BOUND2: | ||
878 | case RXRPC_SERVER_LISTENING: | ||
879 | case RXRPC_SERVER_LISTEN_DISABLED: | ||
880 | rx->local->service_closed = true; | ||
881 | break; | ||
882 | } | ||
883 | |||
866 | spin_lock_bh(&sk->sk_receive_queue.lock); | 884 | spin_lock_bh(&sk->sk_receive_queue.lock); |
867 | sk->sk_state = RXRPC_CLOSE; | 885 | sk->sk_state = RXRPC_CLOSE; |
868 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 886 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
@@ -878,6 +896,8 @@ static int rxrpc_release_sock(struct sock *sk) | |||
878 | rxrpc_release_calls_on_socket(rx); | 896 | rxrpc_release_calls_on_socket(rx); |
879 | flush_workqueue(rxrpc_workqueue); | 897 | flush_workqueue(rxrpc_workqueue); |
880 | rxrpc_purge_queue(&sk->sk_receive_queue); | 898 | rxrpc_purge_queue(&sk->sk_receive_queue); |
899 | rxrpc_queue_work(&rxnet->service_conn_reaper); | ||
900 | rxrpc_queue_work(&rxnet->client_conn_reaper); | ||
881 | 901 | ||
882 | rxrpc_put_local(rx->local); | 902 | rxrpc_put_local(rx->local); |
883 | rx->local = NULL; | 903 | rx->local = NULL; |
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b2151993d384..416688381eb7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h | |||
@@ -79,17 +79,20 @@ struct rxrpc_net { | |||
79 | struct list_head conn_proc_list; /* List of conns in this namespace for proc */ | 79 | struct list_head conn_proc_list; /* List of conns in this namespace for proc */ |
80 | struct list_head service_conns; /* Service conns in this namespace */ | 80 | struct list_head service_conns; /* Service conns in this namespace */ |
81 | rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ | 81 | rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ |
82 | struct delayed_work service_conn_reaper; | 82 | struct work_struct service_conn_reaper; |
83 | struct timer_list service_conn_reap_timer; | ||
83 | 84 | ||
84 | unsigned int nr_client_conns; | 85 | unsigned int nr_client_conns; |
85 | unsigned int nr_active_client_conns; | 86 | unsigned int nr_active_client_conns; |
86 | bool kill_all_client_conns; | 87 | bool kill_all_client_conns; |
88 | bool live; | ||
87 | spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ | 89 | spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ |
88 | spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ | 90 | spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ |
89 | struct list_head waiting_client_conns; | 91 | struct list_head waiting_client_conns; |
90 | struct list_head active_client_conns; | 92 | struct list_head active_client_conns; |
91 | struct list_head idle_client_conns; | 93 | struct list_head idle_client_conns; |
92 | struct delayed_work client_conn_reaper; | 94 | struct work_struct client_conn_reaper; |
95 | struct timer_list client_conn_reap_timer; | ||
93 | 96 | ||
94 | struct list_head local_endpoints; | 97 | struct list_head local_endpoints; |
95 | struct mutex local_mutex; /* Lock for ->local_endpoints */ | 98 | struct mutex local_mutex; /* Lock for ->local_endpoints */ |
@@ -265,6 +268,7 @@ struct rxrpc_local { | |||
265 | rwlock_t services_lock; /* lock for services list */ | 268 | rwlock_t services_lock; /* lock for services list */ |
266 | int debug_id; /* debug ID for printks */ | 269 | int debug_id; /* debug ID for printks */ |
267 | bool dead; | 270 | bool dead; |
271 | bool service_closed; /* Service socket closed */ | ||
268 | struct sockaddr_rxrpc srx; /* local address */ | 272 | struct sockaddr_rxrpc srx; /* local address */ |
269 | }; | 273 | }; |
270 | 274 | ||
@@ -338,8 +342,17 @@ enum rxrpc_conn_flag { | |||
338 | RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ | 342 | RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ |
339 | RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ | 343 | RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ |
340 | RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ | 344 | RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ |
345 | RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */ | ||
346 | RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */ | ||
347 | RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */ | ||
348 | RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */ | ||
341 | }; | 349 | }; |
342 | 350 | ||
351 | #define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \ | ||
352 | (1UL << RXRPC_CONN_FINAL_ACK_1) | \ | ||
353 | (1UL << RXRPC_CONN_FINAL_ACK_2) | \ | ||
354 | (1UL << RXRPC_CONN_FINAL_ACK_3)) | ||
355 | |||
343 | /* | 356 | /* |
344 | * Events that can be raised upon a connection. | 357 | * Events that can be raised upon a connection. |
345 | */ | 358 | */ |
@@ -393,6 +406,7 @@ struct rxrpc_connection { | |||
393 | #define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) | 406 | #define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) |
394 | struct list_head waiting_calls; /* Calls waiting for channels */ | 407 | struct list_head waiting_calls; /* Calls waiting for channels */ |
395 | struct rxrpc_channel { | 408 | struct rxrpc_channel { |
409 | unsigned long final_ack_at; /* Time at which to issue final ACK */ | ||
396 | struct rxrpc_call __rcu *call; /* Active call */ | 410 | struct rxrpc_call __rcu *call; /* Active call */ |
397 | u32 call_id; /* ID of current call */ | 411 | u32 call_id; /* ID of current call */ |
398 | u32 call_counter; /* Call ID counter */ | 412 | u32 call_counter; /* Call ID counter */ |
@@ -404,6 +418,7 @@ struct rxrpc_connection { | |||
404 | }; | 418 | }; |
405 | } channels[RXRPC_MAXCALLS]; | 419 | } channels[RXRPC_MAXCALLS]; |
406 | 420 | ||
421 | struct timer_list timer; /* Conn event timer */ | ||
407 | struct work_struct processor; /* connection event processor */ | 422 | struct work_struct processor; /* connection event processor */ |
408 | union { | 423 | union { |
409 | struct rb_node client_node; /* Node in local->client_conns */ | 424 | struct rb_node client_node; /* Node in local->client_conns */ |
@@ -457,9 +472,10 @@ enum rxrpc_call_flag { | |||
457 | enum rxrpc_call_event { | 472 | enum rxrpc_call_event { |
458 | RXRPC_CALL_EV_ACK, /* need to generate ACK */ | 473 | RXRPC_CALL_EV_ACK, /* need to generate ACK */ |
459 | RXRPC_CALL_EV_ABORT, /* need to generate abort */ | 474 | RXRPC_CALL_EV_ABORT, /* need to generate abort */ |
460 | RXRPC_CALL_EV_TIMER, /* Timer expired */ | ||
461 | RXRPC_CALL_EV_RESEND, /* Tx resend required */ | 475 | RXRPC_CALL_EV_RESEND, /* Tx resend required */ |
462 | RXRPC_CALL_EV_PING, /* Ping send required */ | 476 | RXRPC_CALL_EV_PING, /* Ping send required */ |
477 | RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ | ||
478 | RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ | ||
463 | }; | 479 | }; |
464 | 480 | ||
465 | /* | 481 | /* |
@@ -503,10 +519,16 @@ struct rxrpc_call { | |||
503 | struct rxrpc_peer *peer; /* Peer record for remote address */ | 519 | struct rxrpc_peer *peer; /* Peer record for remote address */ |
504 | struct rxrpc_sock __rcu *socket; /* socket responsible */ | 520 | struct rxrpc_sock __rcu *socket; /* socket responsible */ |
505 | struct mutex user_mutex; /* User access mutex */ | 521 | struct mutex user_mutex; /* User access mutex */ |
506 | ktime_t ack_at; /* When deferred ACK needs to happen */ | 522 | unsigned long ack_at; /* When deferred ACK needs to happen */ |
507 | ktime_t resend_at; /* When next resend needs to happen */ | 523 | unsigned long ack_lost_at; /* When ACK is figured as lost */ |
508 | ktime_t ping_at; /* When next to send a ping */ | 524 | unsigned long resend_at; /* When next resend needs to happen */ |
509 | ktime_t expire_at; /* When the call times out */ | 525 | unsigned long ping_at; /* When next to send a ping */ |
526 | unsigned long keepalive_at; /* When next to send a keepalive ping */ | ||
527 | unsigned long expect_rx_by; /* When we expect to get a packet by */ | ||
528 | unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ | ||
529 | unsigned long expect_term_by; /* When we expect call termination by */ | ||
530 | u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ | ||
531 | u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ | ||
510 | struct timer_list timer; /* Combined event timer */ | 532 | struct timer_list timer; /* Combined event timer */ |
511 | struct work_struct processor; /* Event processor */ | 533 | struct work_struct processor; /* Event processor */ |
512 | rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ | 534 | rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ |
@@ -609,6 +631,8 @@ struct rxrpc_call { | |||
609 | ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ | 631 | ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ |
610 | rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ | 632 | rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ |
611 | rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ | 633 | rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ |
634 | rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ | ||
635 | rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ | ||
612 | }; | 636 | }; |
613 | 637 | ||
614 | /* | 638 | /* |
@@ -632,6 +656,35 @@ struct rxrpc_ack_summary { | |||
632 | u8 cumulative_acks; | 656 | u8 cumulative_acks; |
633 | }; | 657 | }; |
634 | 658 | ||
659 | /* | ||
660 | * sendmsg() cmsg-specified parameters. | ||
661 | */ | ||
662 | enum rxrpc_command { | ||
663 | RXRPC_CMD_SEND_DATA, /* send data message */ | ||
664 | RXRPC_CMD_SEND_ABORT, /* request abort generation */ | ||
665 | RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ | ||
666 | RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ | ||
667 | }; | ||
668 | |||
669 | struct rxrpc_call_params { | ||
670 | s64 tx_total_len; /* Total Tx data length (if send data) */ | ||
671 | unsigned long user_call_ID; /* User's call ID */ | ||
672 | struct { | ||
673 | u32 hard; /* Maximum lifetime (sec) */ | ||
674 | u32 idle; /* Max time since last data packet (msec) */ | ||
675 | u32 normal; /* Max time since last call packet (msec) */ | ||
676 | } timeouts; | ||
677 | u8 nr_timeouts; /* Number of timeouts specified */ | ||
678 | }; | ||
679 | |||
680 | struct rxrpc_send_params { | ||
681 | struct rxrpc_call_params call; | ||
682 | u32 abort_code; /* Abort code to Tx (if abort) */ | ||
683 | enum rxrpc_command command : 8; /* The command to implement */ | ||
684 | bool exclusive; /* Shared or exclusive call */ | ||
685 | bool upgrade; /* If the connection is upgradeable */ | ||
686 | }; | ||
687 | |||
635 | #include <trace/events/rxrpc.h> | 688 | #include <trace/events/rxrpc.h> |
636 | 689 | ||
637 | /* | 690 | /* |
@@ -657,12 +710,19 @@ int rxrpc_reject_call(struct rxrpc_sock *); | |||
657 | /* | 710 | /* |
658 | * call_event.c | 711 | * call_event.c |
659 | */ | 712 | */ |
660 | void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); | ||
661 | void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); | ||
662 | void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, | 713 | void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, |
663 | enum rxrpc_propose_ack_trace); | 714 | enum rxrpc_propose_ack_trace); |
664 | void rxrpc_process_call(struct work_struct *); | 715 | void rxrpc_process_call(struct work_struct *); |
665 | 716 | ||
717 | static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, | ||
718 | unsigned long expire_at, | ||
719 | unsigned long now, | ||
720 | enum rxrpc_timer_trace why) | ||
721 | { | ||
722 | trace_rxrpc_timer(call, why, now); | ||
723 | timer_reduce(&call->timer, expire_at); | ||
724 | } | ||
725 | |||
666 | /* | 726 | /* |
667 | * call_object.c | 727 | * call_object.c |
668 | */ | 728 | */ |
@@ -672,11 +732,11 @@ extern unsigned int rxrpc_max_call_lifetime; | |||
672 | extern struct kmem_cache *rxrpc_call_jar; | 732 | extern struct kmem_cache *rxrpc_call_jar; |
673 | 733 | ||
674 | struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); | 734 | struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); |
675 | struct rxrpc_call *rxrpc_alloc_call(gfp_t); | 735 | struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); |
676 | struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, | 736 | struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, |
677 | struct rxrpc_conn_parameters *, | 737 | struct rxrpc_conn_parameters *, |
678 | struct sockaddr_rxrpc *, | 738 | struct sockaddr_rxrpc *, |
679 | unsigned long, s64, gfp_t); | 739 | struct rxrpc_call_params *, gfp_t); |
680 | int rxrpc_retry_client_call(struct rxrpc_sock *, | 740 | int rxrpc_retry_client_call(struct rxrpc_sock *, |
681 | struct rxrpc_call *, | 741 | struct rxrpc_call *, |
682 | struct rxrpc_conn_parameters *, | 742 | struct rxrpc_conn_parameters *, |
@@ -803,8 +863,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, | |||
803 | */ | 863 | */ |
804 | extern unsigned int rxrpc_max_client_connections; | 864 | extern unsigned int rxrpc_max_client_connections; |
805 | extern unsigned int rxrpc_reap_client_connections; | 865 | extern unsigned int rxrpc_reap_client_connections; |
806 | extern unsigned int rxrpc_conn_idle_client_expiry; | 866 | extern unsigned long rxrpc_conn_idle_client_expiry; |
807 | extern unsigned int rxrpc_conn_idle_client_fast_expiry; | 867 | extern unsigned long rxrpc_conn_idle_client_fast_expiry; |
808 | extern struct idr rxrpc_client_conn_ids; | 868 | extern struct idr rxrpc_client_conn_ids; |
809 | 869 | ||
810 | void rxrpc_destroy_client_conn_ids(void); | 870 | void rxrpc_destroy_client_conn_ids(void); |
@@ -825,6 +885,7 @@ void rxrpc_process_connection(struct work_struct *); | |||
825 | * conn_object.c | 885 | * conn_object.c |
826 | */ | 886 | */ |
827 | extern unsigned int rxrpc_connection_expiry; | 887 | extern unsigned int rxrpc_connection_expiry; |
888 | extern unsigned int rxrpc_closed_conn_expiry; | ||
828 | 889 | ||
829 | struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); | 890 | struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); |
830 | struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, | 891 | struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, |
@@ -861,6 +922,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) | |||
861 | rxrpc_put_service_conn(conn); | 922 | rxrpc_put_service_conn(conn); |
862 | } | 923 | } |
863 | 924 | ||
925 | static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, | ||
926 | unsigned long expire_at) | ||
927 | { | ||
928 | timer_reduce(&conn->timer, expire_at); | ||
929 | } | ||
930 | |||
864 | /* | 931 | /* |
865 | * conn_service.c | 932 | * conn_service.c |
866 | */ | 933 | */ |
@@ -930,13 +997,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local) | |||
930 | * misc.c | 997 | * misc.c |
931 | */ | 998 | */ |
932 | extern unsigned int rxrpc_max_backlog __read_mostly; | 999 | extern unsigned int rxrpc_max_backlog __read_mostly; |
933 | extern unsigned int rxrpc_requested_ack_delay; | 1000 | extern unsigned long rxrpc_requested_ack_delay; |
934 | extern unsigned int rxrpc_soft_ack_delay; | 1001 | extern unsigned long rxrpc_soft_ack_delay; |
935 | extern unsigned int rxrpc_idle_ack_delay; | 1002 | extern unsigned long rxrpc_idle_ack_delay; |
936 | extern unsigned int rxrpc_rx_window_size; | 1003 | extern unsigned int rxrpc_rx_window_size; |
937 | extern unsigned int rxrpc_rx_mtu; | 1004 | extern unsigned int rxrpc_rx_mtu; |
938 | extern unsigned int rxrpc_rx_jumbo_max; | 1005 | extern unsigned int rxrpc_rx_jumbo_max; |
939 | extern unsigned int rxrpc_resend_timeout; | 1006 | extern unsigned long rxrpc_resend_timeout; |
940 | 1007 | ||
941 | extern const s8 rxrpc_ack_priority[]; | 1008 | extern const s8 rxrpc_ack_priority[]; |
942 | 1009 | ||
@@ -954,7 +1021,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) | |||
954 | /* | 1021 | /* |
955 | * output.c | 1022 | * output.c |
956 | */ | 1023 | */ |
957 | int rxrpc_send_ack_packet(struct rxrpc_call *, bool); | 1024 | int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *); |
958 | int rxrpc_send_abort_packet(struct rxrpc_call *); | 1025 | int rxrpc_send_abort_packet(struct rxrpc_call *); |
959 | int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); | 1026 | int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); |
960 | void rxrpc_reject_packets(struct rxrpc_local *); | 1027 | void rxrpc_reject_packets(struct rxrpc_local *); |
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cbd1701e813a..3028298ca561 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c | |||
@@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, | |||
94 | /* Now it gets complicated, because calls get registered with the | 94 | /* Now it gets complicated, because calls get registered with the |
95 | * socket here, particularly if a user ID is preassigned by the user. | 95 | * socket here, particularly if a user ID is preassigned by the user. |
96 | */ | 96 | */ |
97 | call = rxrpc_alloc_call(gfp); | 97 | call = rxrpc_alloc_call(rx, gfp); |
98 | if (!call) | 98 | if (!call) |
99 | return -ENOMEM; | 99 | return -ENOMEM; |
100 | call->flags |= (1 << RXRPC_CALL_IS_SERVICE); | 100 | call->flags |= (1 << RXRPC_CALL_IS_SERVICE); |
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3574508baf9a..ad2ab1103189 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c | |||
@@ -22,80 +22,6 @@ | |||
22 | #include "ar-internal.h" | 22 | #include "ar-internal.h" |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Set the timer | ||
26 | */ | ||
27 | void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, | ||
28 | ktime_t now) | ||
29 | { | ||
30 | unsigned long t_j, now_j = jiffies; | ||
31 | ktime_t t; | ||
32 | bool queue = false; | ||
33 | |||
34 | if (call->state < RXRPC_CALL_COMPLETE) { | ||
35 | t = call->expire_at; | ||
36 | if (!ktime_after(t, now)) { | ||
37 | trace_rxrpc_timer(call, why, now, now_j); | ||
38 | queue = true; | ||
39 | goto out; | ||
40 | } | ||
41 | |||
42 | if (!ktime_after(call->resend_at, now)) { | ||
43 | call->resend_at = call->expire_at; | ||
44 | if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) | ||
45 | queue = true; | ||
46 | } else if (ktime_before(call->resend_at, t)) { | ||
47 | t = call->resend_at; | ||
48 | } | ||
49 | |||
50 | if (!ktime_after(call->ack_at, now)) { | ||
51 | call->ack_at = call->expire_at; | ||
52 | if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) | ||
53 | queue = true; | ||
54 | } else if (ktime_before(call->ack_at, t)) { | ||
55 | t = call->ack_at; | ||
56 | } | ||
57 | |||
58 | if (!ktime_after(call->ping_at, now)) { | ||
59 | call->ping_at = call->expire_at; | ||
60 | if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) | ||
61 | queue = true; | ||
62 | } else if (ktime_before(call->ping_at, t)) { | ||
63 | t = call->ping_at; | ||
64 | } | ||
65 | |||
66 | t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); | ||
67 | t_j += jiffies; | ||
68 | |||
69 | /* We have to make sure that the calculated jiffies value falls | ||
70 | * at or after the nsec value, or we may loop ceaselessly | ||
71 | * because the timer times out, but we haven't reached the nsec | ||
72 | * timeout yet. | ||
73 | */ | ||
74 | t_j++; | ||
75 | |||
76 | if (call->timer.expires != t_j || !timer_pending(&call->timer)) { | ||
77 | mod_timer(&call->timer, t_j); | ||
78 | trace_rxrpc_timer(call, why, now, now_j); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | out: | ||
83 | if (queue) | ||
84 | rxrpc_queue_call(call); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Set the timer | ||
89 | */ | ||
90 | void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, | ||
91 | ktime_t now) | ||
92 | { | ||
93 | read_lock_bh(&call->state_lock); | ||
94 | __rxrpc_set_timer(call, why, now); | ||
95 | read_unlock_bh(&call->state_lock); | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Propose a PING ACK be sent. | 25 | * Propose a PING ACK be sent. |
100 | */ | 26 | */ |
101 | static void rxrpc_propose_ping(struct rxrpc_call *call, | 27 | static void rxrpc_propose_ping(struct rxrpc_call *call, |
@@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call, | |||
106 | !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) | 32 | !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) |
107 | rxrpc_queue_call(call); | 33 | rxrpc_queue_call(call); |
108 | } else { | 34 | } else { |
109 | ktime_t now = ktime_get_real(); | 35 | unsigned long now = jiffies; |
110 | ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); | 36 | unsigned long ping_at = now + rxrpc_idle_ack_delay; |
111 | 37 | ||
112 | if (ktime_before(ping_at, call->ping_at)) { | 38 | if (time_before(ping_at, call->ping_at)) { |
113 | call->ping_at = ping_at; | 39 | WRITE_ONCE(call->ping_at, ping_at); |
114 | rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); | 40 | rxrpc_reduce_call_timer(call, ping_at, now, |
41 | rxrpc_timer_set_for_ping); | ||
115 | } | 42 | } |
116 | } | 43 | } |
117 | } | 44 | } |
@@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, | |||
125 | enum rxrpc_propose_ack_trace why) | 52 | enum rxrpc_propose_ack_trace why) |
126 | { | 53 | { |
127 | enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; | 54 | enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; |
128 | unsigned int expiry = rxrpc_soft_ack_delay; | 55 | unsigned long expiry = rxrpc_soft_ack_delay; |
129 | ktime_t now, ack_at; | ||
130 | s8 prior = rxrpc_ack_priority[ack_reason]; | 56 | s8 prior = rxrpc_ack_priority[ack_reason]; |
131 | 57 | ||
132 | /* Pings are handled specially because we don't want to accidentally | 58 | /* Pings are handled specially because we don't want to accidentally |
@@ -190,11 +116,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, | |||
190 | background) | 116 | background) |
191 | rxrpc_queue_call(call); | 117 | rxrpc_queue_call(call); |
192 | } else { | 118 | } else { |
193 | now = ktime_get_real(); | 119 | unsigned long now = jiffies, ack_at; |
194 | ack_at = ktime_add_ms(now, expiry); | 120 | |
195 | if (ktime_before(ack_at, call->ack_at)) { | 121 | if (call->peer->rtt_usage > 0) |
196 | call->ack_at = ack_at; | 122 | ack_at = nsecs_to_jiffies(call->peer->rtt); |
197 | rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); | 123 | else |
124 | ack_at = expiry; | ||
125 | |||
126 | ack_at += now; | ||
127 | if (time_before(ack_at, call->ack_at)) { | ||
128 | WRITE_ONCE(call->ack_at, ack_at); | ||
129 | rxrpc_reduce_call_timer(call, ack_at, now, | ||
130 | rxrpc_timer_set_for_ack); | ||
198 | } | 131 | } |
199 | } | 132 | } |
200 | 133 | ||
@@ -227,18 +160,28 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) | |||
227 | /* | 160 | /* |
228 | * Perform retransmission of NAK'd and unack'd packets. | 161 | * Perform retransmission of NAK'd and unack'd packets. |
229 | */ | 162 | */ |
230 | static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) | 163 | static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) |
231 | { | 164 | { |
232 | struct rxrpc_skb_priv *sp; | 165 | struct rxrpc_skb_priv *sp; |
233 | struct sk_buff *skb; | 166 | struct sk_buff *skb; |
167 | unsigned long resend_at; | ||
234 | rxrpc_seq_t cursor, seq, top; | 168 | rxrpc_seq_t cursor, seq, top; |
235 | ktime_t max_age, oldest, ack_ts; | 169 | ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; |
236 | int ix; | 170 | int ix; |
237 | u8 annotation, anno_type, retrans = 0, unacked = 0; | 171 | u8 annotation, anno_type, retrans = 0, unacked = 0; |
238 | 172 | ||
239 | _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); | 173 | _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); |
240 | 174 | ||
241 | max_age = ktime_sub_ms(now, rxrpc_resend_timeout); | 175 | if (call->peer->rtt_usage > 1) |
176 | timeout = ns_to_ktime(call->peer->rtt * 3 / 2); | ||
177 | else | ||
178 | timeout = ms_to_ktime(rxrpc_resend_timeout); | ||
179 | min_timeo = ns_to_ktime((1000000000 / HZ) * 4); | ||
180 | if (ktime_before(timeout, min_timeo)) | ||
181 | timeout = min_timeo; | ||
182 | |||
183 | now = ktime_get_real(); | ||
184 | max_age = ktime_sub(now, timeout); | ||
242 | 185 | ||
243 | spin_lock_bh(&call->lock); | 186 | spin_lock_bh(&call->lock); |
244 | 187 | ||
@@ -282,7 +225,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) | |||
282 | ktime_to_ns(ktime_sub(skb->tstamp, max_age))); | 225 | ktime_to_ns(ktime_sub(skb->tstamp, max_age))); |
283 | } | 226 | } |
284 | 227 | ||
285 | call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); | 228 | resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now))); |
229 | resend_at += jiffies + rxrpc_resend_timeout; | ||
230 | WRITE_ONCE(call->resend_at, resend_at); | ||
286 | 231 | ||
287 | if (unacked) | 232 | if (unacked) |
288 | rxrpc_congestion_timeout(call); | 233 | rxrpc_congestion_timeout(call); |
@@ -292,14 +237,15 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) | |||
292 | * retransmitting data. | 237 | * retransmitting data. |
293 | */ | 238 | */ |
294 | if (!retrans) { | 239 | if (!retrans) { |
295 | rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); | 240 | rxrpc_reduce_call_timer(call, resend_at, now, |
241 | rxrpc_timer_set_for_resend); | ||
296 | spin_unlock_bh(&call->lock); | 242 | spin_unlock_bh(&call->lock); |
297 | ack_ts = ktime_sub(now, call->acks_latest_ts); | 243 | ack_ts = ktime_sub(now, call->acks_latest_ts); |
298 | if (ktime_to_ns(ack_ts) < call->peer->rtt) | 244 | if (ktime_to_ns(ack_ts) < call->peer->rtt) |
299 | goto out; | 245 | goto out; |
300 | rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, | 246 | rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, |
301 | rxrpc_propose_ack_ping_for_lost_ack); | 247 | rxrpc_propose_ack_ping_for_lost_ack); |
302 | rxrpc_send_ack_packet(call, true); | 248 | rxrpc_send_ack_packet(call, true, NULL); |
303 | goto out; | 249 | goto out; |
304 | } | 250 | } |
305 | 251 | ||
@@ -364,7 +310,8 @@ void rxrpc_process_call(struct work_struct *work) | |||
364 | { | 310 | { |
365 | struct rxrpc_call *call = | 311 | struct rxrpc_call *call = |
366 | container_of(work, struct rxrpc_call, processor); | 312 | container_of(work, struct rxrpc_call, processor); |
367 | ktime_t now; | 313 | rxrpc_serial_t *send_ack; |
314 | unsigned long now, next, t; | ||
368 | 315 | ||
369 | rxrpc_see_call(call); | 316 | rxrpc_see_call(call); |
370 | 317 | ||
@@ -384,22 +331,89 @@ recheck_state: | |||
384 | goto out_put; | 331 | goto out_put; |
385 | } | 332 | } |
386 | 333 | ||
387 | now = ktime_get_real(); | 334 | /* Work out if any timeouts tripped */ |
388 | if (ktime_before(call->expire_at, now)) { | 335 | now = jiffies; |
336 | t = READ_ONCE(call->expect_rx_by); | ||
337 | if (time_after_eq(now, t)) { | ||
338 | trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); | ||
339 | set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); | ||
340 | } | ||
341 | |||
342 | t = READ_ONCE(call->expect_req_by); | ||
343 | if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && | ||
344 | time_after_eq(now, t)) { | ||
345 | trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); | ||
346 | set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); | ||
347 | } | ||
348 | |||
349 | t = READ_ONCE(call->expect_term_by); | ||
350 | if (time_after_eq(now, t)) { | ||
351 | trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); | ||
352 | set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); | ||
353 | } | ||
354 | |||
355 | t = READ_ONCE(call->ack_at); | ||
356 | if (time_after_eq(now, t)) { | ||
357 | trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); | ||
358 | cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); | ||
359 | set_bit(RXRPC_CALL_EV_ACK, &call->events); | ||
360 | } | ||
361 | |||
362 | t = READ_ONCE(call->ack_lost_at); | ||
363 | if (time_after_eq(now, t)) { | ||
364 | trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); | ||
365 | cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); | ||
366 | set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); | ||
367 | } | ||
368 | |||
369 | t = READ_ONCE(call->keepalive_at); | ||
370 | if (time_after_eq(now, t)) { | ||
371 | trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); | ||
372 | cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); | ||
373 | rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, | ||
374 | rxrpc_propose_ack_ping_for_keepalive); | ||
375 | set_bit(RXRPC_CALL_EV_PING, &call->events); | ||
376 | } | ||
377 | |||
378 | t = READ_ONCE(call->ping_at); | ||
379 | if (time_after_eq(now, t)) { | ||
380 | trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); | ||
381 | cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); | ||
382 | set_bit(RXRPC_CALL_EV_PING, &call->events); | ||
383 | } | ||
384 | |||
385 | t = READ_ONCE(call->resend_at); | ||
386 | if (time_after_eq(now, t)) { | ||
387 | trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); | ||
388 | cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); | ||
389 | set_bit(RXRPC_CALL_EV_RESEND, &call->events); | ||
390 | } | ||
391 | |||
392 | /* Process events */ | ||
393 | if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { | ||
389 | rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); | 394 | rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); |
390 | set_bit(RXRPC_CALL_EV_ABORT, &call->events); | 395 | set_bit(RXRPC_CALL_EV_ABORT, &call->events); |
391 | goto recheck_state; | 396 | goto recheck_state; |
392 | } | 397 | } |
393 | 398 | ||
394 | if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { | 399 | send_ack = NULL; |
400 | if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { | ||
401 | call->acks_lost_top = call->tx_top; | ||
402 | rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, | ||
403 | rxrpc_propose_ack_ping_for_lost_ack); | ||
404 | send_ack = &call->acks_lost_ping; | ||
405 | } | ||
406 | |||
407 | if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || | ||
408 | send_ack) { | ||
395 | if (call->ackr_reason) { | 409 | if (call->ackr_reason) { |
396 | rxrpc_send_ack_packet(call, false); | 410 | rxrpc_send_ack_packet(call, false, send_ack); |
397 | goto recheck_state; | 411 | goto recheck_state; |
398 | } | 412 | } |
399 | } | 413 | } |
400 | 414 | ||
401 | if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { | 415 | if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { |
402 | rxrpc_send_ack_packet(call, true); | 416 | rxrpc_send_ack_packet(call, true, NULL); |
403 | goto recheck_state; | 417 | goto recheck_state; |
404 | } | 418 | } |
405 | 419 | ||
@@ -408,7 +422,24 @@ recheck_state: | |||
408 | goto recheck_state; | 422 | goto recheck_state; |
409 | } | 423 | } |
410 | 424 | ||
411 | rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); | 425 | /* Make sure the timer is restarted */ |
426 | next = call->expect_rx_by; | ||
427 | |||
428 | #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } | ||
429 | |||
430 | set(call->expect_req_by); | ||
431 | set(call->expect_term_by); | ||
432 | set(call->ack_at); | ||
433 | set(call->ack_lost_at); | ||
434 | set(call->resend_at); | ||
435 | set(call->keepalive_at); | ||
436 | set(call->ping_at); | ||
437 | |||
438 | now = jiffies; | ||
439 | if (time_after_eq(now, next)) | ||
440 | goto recheck_state; | ||
441 | |||
442 | rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); | ||
412 | 443 | ||
413 | /* other events may have been raised since we started checking */ | 444 | /* other events may have been raised since we started checking */ |
414 | if (call->events && call->state < RXRPC_CALL_COMPLETE) { | 445 | if (call->events && call->state < RXRPC_CALL_COMPLETE) { |
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 994dc2df57e4..0b2db38dd32d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c | |||
@@ -51,10 +51,14 @@ static void rxrpc_call_timer_expired(struct timer_list *t) | |||
51 | 51 | ||
52 | _enter("%d", call->debug_id); | 52 | _enter("%d", call->debug_id); |
53 | 53 | ||
54 | if (call->state < RXRPC_CALL_COMPLETE) | 54 | if (call->state < RXRPC_CALL_COMPLETE) { |
55 | rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); | 55 | trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); |
56 | rxrpc_queue_call(call); | ||
57 | } | ||
56 | } | 58 | } |
57 | 59 | ||
60 | static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; | ||
61 | |||
58 | /* | 62 | /* |
59 | * find an extant server call | 63 | * find an extant server call |
60 | * - called in process context with IRQs enabled | 64 | * - called in process context with IRQs enabled |
@@ -95,7 +99,7 @@ found_extant_call: | |||
95 | /* | 99 | /* |
96 | * allocate a new call | 100 | * allocate a new call |
97 | */ | 101 | */ |
98 | struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) | 102 | struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) |
99 | { | 103 | { |
100 | struct rxrpc_call *call; | 104 | struct rxrpc_call *call; |
101 | 105 | ||
@@ -114,6 +118,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) | |||
114 | goto nomem_2; | 118 | goto nomem_2; |
115 | 119 | ||
116 | mutex_init(&call->user_mutex); | 120 | mutex_init(&call->user_mutex); |
121 | |||
122 | /* Prevent lockdep reporting a deadlock false positive between the afs | ||
123 | * filesystem and sys_sendmsg() via the mmap sem. | ||
124 | */ | ||
125 | if (rx->sk.sk_kern_sock) | ||
126 | lockdep_set_class(&call->user_mutex, | ||
127 | &rxrpc_call_user_mutex_lock_class_key); | ||
128 | |||
117 | timer_setup(&call->timer, rxrpc_call_timer_expired, 0); | 129 | timer_setup(&call->timer, rxrpc_call_timer_expired, 0); |
118 | INIT_WORK(&call->processor, &rxrpc_process_call); | 130 | INIT_WORK(&call->processor, &rxrpc_process_call); |
119 | INIT_LIST_HEAD(&call->link); | 131 | INIT_LIST_HEAD(&call->link); |
@@ -128,6 +140,8 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) | |||
128 | atomic_set(&call->usage, 1); | 140 | atomic_set(&call->usage, 1); |
129 | call->debug_id = atomic_inc_return(&rxrpc_debug_id); | 141 | call->debug_id = atomic_inc_return(&rxrpc_debug_id); |
130 | call->tx_total_len = -1; | 142 | call->tx_total_len = -1; |
143 | call->next_rx_timo = 20 * HZ; | ||
144 | call->next_req_timo = 1 * HZ; | ||
131 | 145 | ||
132 | memset(&call->sock_node, 0xed, sizeof(call->sock_node)); | 146 | memset(&call->sock_node, 0xed, sizeof(call->sock_node)); |
133 | 147 | ||
@@ -150,7 +164,8 @@ nomem: | |||
150 | /* | 164 | /* |
151 | * Allocate a new client call. | 165 | * Allocate a new client call. |
152 | */ | 166 | */ |
153 | static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, | 167 | static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, |
168 | struct sockaddr_rxrpc *srx, | ||
154 | gfp_t gfp) | 169 | gfp_t gfp) |
155 | { | 170 | { |
156 | struct rxrpc_call *call; | 171 | struct rxrpc_call *call; |
@@ -158,7 +173,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, | |||
158 | 173 | ||
159 | _enter(""); | 174 | _enter(""); |
160 | 175 | ||
161 | call = rxrpc_alloc_call(gfp); | 176 | call = rxrpc_alloc_call(rx, gfp); |
162 | if (!call) | 177 | if (!call) |
163 | return ERR_PTR(-ENOMEM); | 178 | return ERR_PTR(-ENOMEM); |
164 | call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; | 179 | call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; |
@@ -177,15 +192,17 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, | |||
177 | */ | 192 | */ |
178 | static void rxrpc_start_call_timer(struct rxrpc_call *call) | 193 | static void rxrpc_start_call_timer(struct rxrpc_call *call) |
179 | { | 194 | { |
180 | ktime_t now = ktime_get_real(), expire_at; | 195 | unsigned long now = jiffies; |
181 | 196 | unsigned long j = now + MAX_JIFFY_OFFSET; | |
182 | expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); | 197 | |
183 | call->expire_at = expire_at; | 198 | call->ack_at = j; |
184 | call->ack_at = expire_at; | 199 | call->ack_lost_at = j; |
185 | call->ping_at = expire_at; | 200 | call->resend_at = j; |
186 | call->resend_at = expire_at; | 201 | call->ping_at = j; |
187 | call->timer.expires = jiffies + LONG_MAX / 2; | 202 | call->expect_rx_by = j; |
188 | rxrpc_set_timer(call, rxrpc_timer_begin, now); | 203 | call->expect_req_by = j; |
204 | call->expect_term_by = j; | ||
205 | call->timer.expires = now; | ||
189 | } | 206 | } |
190 | 207 | ||
191 | /* | 208 | /* |
@@ -196,8 +213,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) | |||
196 | struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, | 213 | struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, |
197 | struct rxrpc_conn_parameters *cp, | 214 | struct rxrpc_conn_parameters *cp, |
198 | struct sockaddr_rxrpc *srx, | 215 | struct sockaddr_rxrpc *srx, |
199 | unsigned long user_call_ID, | 216 | struct rxrpc_call_params *p, |
200 | s64 tx_total_len, | ||
201 | gfp_t gfp) | 217 | gfp_t gfp) |
202 | __releases(&rx->sk.sk_lock.slock) | 218 | __releases(&rx->sk.sk_lock.slock) |
203 | { | 219 | { |
@@ -207,18 +223,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, | |||
207 | const void *here = __builtin_return_address(0); | 223 | const void *here = __builtin_return_address(0); |
208 | int ret; | 224 | int ret; |
209 | 225 | ||
210 | _enter("%p,%lx", rx, user_call_ID); | 226 | _enter("%p,%lx", rx, p->user_call_ID); |
211 | 227 | ||
212 | call = rxrpc_alloc_client_call(srx, gfp); | 228 | call = rxrpc_alloc_client_call(rx, srx, gfp); |
213 | if (IS_ERR(call)) { | 229 | if (IS_ERR(call)) { |
214 | release_sock(&rx->sk); | 230 | release_sock(&rx->sk); |
215 | _leave(" = %ld", PTR_ERR(call)); | 231 | _leave(" = %ld", PTR_ERR(call)); |
216 | return call; | 232 | return call; |
217 | } | 233 | } |
218 | 234 | ||
219 | call->tx_total_len = tx_total_len; | 235 | call->tx_total_len = p->tx_total_len; |
220 | trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), | 236 | trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), |
221 | here, (const void *)user_call_ID); | 237 | here, (const void *)p->user_call_ID); |
222 | 238 | ||
223 | /* We need to protect a partially set up call against the user as we | 239 | /* We need to protect a partially set up call against the user as we |
224 | * will be acting outside the socket lock. | 240 | * will be acting outside the socket lock. |
@@ -234,16 +250,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, | |||
234 | parent = *pp; | 250 | parent = *pp; |
235 | xcall = rb_entry(parent, struct rxrpc_call, sock_node); | 251 | xcall = rb_entry(parent, struct rxrpc_call, sock_node); |
236 | 252 | ||
237 | if (user_call_ID < xcall->user_call_ID) | 253 | if (p->user_call_ID < xcall->user_call_ID) |
238 | pp = &(*pp)->rb_left; | 254 | pp = &(*pp)->rb_left; |
239 | else if (user_call_ID > xcall->user_call_ID) | 255 | else if (p->user_call_ID > xcall->user_call_ID) |
240 | pp = &(*pp)->rb_right; | 256 | pp = &(*pp)->rb_right; |
241 | else | 257 | else |
242 | goto error_dup_user_ID; | 258 | goto error_dup_user_ID; |
243 | } | 259 | } |
244 | 260 | ||
245 | rcu_assign_pointer(call->socket, rx); | 261 | rcu_assign_pointer(call->socket, rx); |
246 | call->user_call_ID = user_call_ID; | 262 | call->user_call_ID = p->user_call_ID; |
247 | __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); | 263 | __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); |
248 | rxrpc_get_call(call, rxrpc_call_got_userid); | 264 | rxrpc_get_call(call, rxrpc_call_got_userid); |
249 | rb_link_node(&call->sock_node, parent, pp); | 265 | rb_link_node(&call->sock_node, parent, pp); |
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5f9624bd311c..7f74ca3059f8 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c | |||
@@ -85,8 +85,8 @@ | |||
85 | 85 | ||
86 | __read_mostly unsigned int rxrpc_max_client_connections = 1000; | 86 | __read_mostly unsigned int rxrpc_max_client_connections = 1000; |
87 | __read_mostly unsigned int rxrpc_reap_client_connections = 900; | 87 | __read_mostly unsigned int rxrpc_reap_client_connections = 900; |
88 | __read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; | 88 | __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; |
89 | __read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; | 89 | __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * We use machine-unique IDs for our client connections. | 92 | * We use machine-unique IDs for our client connections. |
@@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, | |||
554 | 554 | ||
555 | trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); | 555 | trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); |
556 | 556 | ||
557 | /* Cancel the final ACK on the previous call if it hasn't been sent yet | ||
558 | * as the DATA packet will implicitly ACK it. | ||
559 | */ | ||
560 | clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); | ||
561 | |||
557 | write_lock_bh(&call->state_lock); | 562 | write_lock_bh(&call->state_lock); |
558 | if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) | 563 | if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) |
559 | call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; | 564 | call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; |
@@ -686,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, | |||
686 | 691 | ||
687 | _enter("{%d,%lx},", call->debug_id, call->user_call_ID); | 692 | _enter("{%d,%lx},", call->debug_id, call->user_call_ID); |
688 | 693 | ||
689 | rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); | 694 | rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); |
690 | rxrpc_cull_active_client_conns(rxnet); | 695 | rxrpc_cull_active_client_conns(rxnet); |
691 | 696 | ||
692 | ret = rxrpc_get_client_conn(call, cp, srx, gfp); | 697 | ret = rxrpc_get_client_conn(call, cp, srx, gfp); |
@@ -752,6 +757,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) | |||
752 | } | 757 | } |
753 | 758 | ||
754 | /* | 759 | /* |
760 | * Set the reap timer. | ||
761 | */ | ||
762 | static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) | ||
763 | { | ||
764 | unsigned long now = jiffies; | ||
765 | unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; | ||
766 | |||
767 | if (rxnet->live) | ||
768 | timer_reduce(&rxnet->client_conn_reap_timer, reap_at); | ||
769 | } | ||
770 | |||
771 | /* | ||
755 | * Disconnect a client call. | 772 | * Disconnect a client call. |
756 | */ | 773 | */ |
757 | void rxrpc_disconnect_client_call(struct rxrpc_call *call) | 774 | void rxrpc_disconnect_client_call(struct rxrpc_call *call) |
@@ -813,6 +830,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) | |||
813 | goto out_2; | 830 | goto out_2; |
814 | } | 831 | } |
815 | 832 | ||
833 | /* Schedule the final ACK to be transmitted in a short while so that it | ||
834 | * can be skipped if we find a follow-on call. The first DATA packet | ||
835 | * of the follow on call will implicitly ACK this call. | ||
836 | */ | ||
837 | if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { | ||
838 | unsigned long final_ack_at = jiffies + 2; | ||
839 | |||
840 | WRITE_ONCE(chan->final_ack_at, final_ack_at); | ||
841 | smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */ | ||
842 | set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); | ||
843 | rxrpc_reduce_conn_timer(conn, final_ack_at); | ||
844 | } | ||
845 | |||
816 | /* Things are more complex and we need the cache lock. We might be | 846 | /* Things are more complex and we need the cache lock. We might be |
817 | * able to simply idle the conn or it might now be lurking on the wait | 847 | * able to simply idle the conn or it might now be lurking on the wait |
818 | * list. It might even get moved back to the active list whilst we're | 848 | * list. It might even get moved back to the active list whilst we're |
@@ -878,9 +908,7 @@ idle_connection: | |||
878 | list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); | 908 | list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); |
879 | if (rxnet->idle_client_conns.next == &conn->cache_link && | 909 | if (rxnet->idle_client_conns.next == &conn->cache_link && |
880 | !rxnet->kill_all_client_conns) | 910 | !rxnet->kill_all_client_conns) |
881 | queue_delayed_work(rxrpc_workqueue, | 911 | rxrpc_set_client_reap_timer(rxnet); |
882 | &rxnet->client_conn_reaper, | ||
883 | rxrpc_conn_idle_client_expiry); | ||
884 | } else { | 912 | } else { |
885 | trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); | 913 | trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); |
886 | conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; | 914 | conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; |
@@ -1018,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) | |||
1018 | { | 1046 | { |
1019 | struct rxrpc_connection *conn; | 1047 | struct rxrpc_connection *conn; |
1020 | struct rxrpc_net *rxnet = | 1048 | struct rxrpc_net *rxnet = |
1021 | container_of(to_delayed_work(work), | 1049 | container_of(work, struct rxrpc_net, client_conn_reaper); |
1022 | struct rxrpc_net, client_conn_reaper); | ||
1023 | unsigned long expiry, conn_expires_at, now; | 1050 | unsigned long expiry, conn_expires_at, now; |
1024 | unsigned int nr_conns; | 1051 | unsigned int nr_conns; |
1025 | bool did_discard = false; | 1052 | bool did_discard = false; |
@@ -1061,6 +1088,8 @@ next: | |||
1061 | expiry = rxrpc_conn_idle_client_expiry; | 1088 | expiry = rxrpc_conn_idle_client_expiry; |
1062 | if (nr_conns > rxrpc_reap_client_connections) | 1089 | if (nr_conns > rxrpc_reap_client_connections) |
1063 | expiry = rxrpc_conn_idle_client_fast_expiry; | 1090 | expiry = rxrpc_conn_idle_client_fast_expiry; |
1091 | if (conn->params.local->service_closed) | ||
1092 | expiry = rxrpc_closed_conn_expiry * HZ; | ||
1064 | 1093 | ||
1065 | conn_expires_at = conn->idle_timestamp + expiry; | 1094 | conn_expires_at = conn->idle_timestamp + expiry; |
1066 | 1095 | ||
@@ -1096,9 +1125,8 @@ not_yet_expired: | |||
1096 | */ | 1125 | */ |
1097 | _debug("not yet"); | 1126 | _debug("not yet"); |
1098 | if (!rxnet->kill_all_client_conns) | 1127 | if (!rxnet->kill_all_client_conns) |
1099 | queue_delayed_work(rxrpc_workqueue, | 1128 | timer_reduce(&rxnet->client_conn_reap_timer, |
1100 | &rxnet->client_conn_reaper, | 1129 | conn_expires_at); |
1101 | conn_expires_at - now); | ||
1102 | 1130 | ||
1103 | out: | 1131 | out: |
1104 | spin_unlock(&rxnet->client_conn_cache_lock); | 1132 | spin_unlock(&rxnet->client_conn_cache_lock); |
@@ -1118,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) | |||
1118 | rxnet->kill_all_client_conns = true; | 1146 | rxnet->kill_all_client_conns = true; |
1119 | spin_unlock(&rxnet->client_conn_cache_lock); | 1147 | spin_unlock(&rxnet->client_conn_cache_lock); |
1120 | 1148 | ||
1121 | cancel_delayed_work(&rxnet->client_conn_reaper); | 1149 | del_timer_sync(&rxnet->client_conn_reap_timer); |
1122 | 1150 | ||
1123 | if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) | 1151 | if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) |
1124 | _debug("destroy: queue failed"); | 1152 | _debug("destroy: queue failed"); |
1125 | 1153 | ||
1126 | _leave(""); | 1154 | _leave(""); |
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 59a51a56e7c8..4ca11be6be3c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c | |||
@@ -24,31 +24,28 @@ | |||
24 | * Retransmit terminal ACK or ABORT of the previous call. | 24 | * Retransmit terminal ACK or ABORT of the previous call. |
25 | */ | 25 | */ |
26 | static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, | 26 | static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, |
27 | struct sk_buff *skb) | 27 | struct sk_buff *skb, |
28 | unsigned int channel) | ||
28 | { | 29 | { |
29 | struct rxrpc_skb_priv *sp = rxrpc_skb(skb); | 30 | struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; |
30 | struct rxrpc_channel *chan; | 31 | struct rxrpc_channel *chan; |
31 | struct msghdr msg; | 32 | struct msghdr msg; |
32 | struct kvec iov; | 33 | struct kvec iov[3]; |
33 | struct { | 34 | struct { |
34 | struct rxrpc_wire_header whdr; | 35 | struct rxrpc_wire_header whdr; |
35 | union { | 36 | union { |
36 | struct { | 37 | __be32 abort_code; |
37 | __be32 code; | 38 | struct rxrpc_ackpacket ack; |
38 | } abort; | ||
39 | struct { | ||
40 | struct rxrpc_ackpacket ack; | ||
41 | u8 padding[3]; | ||
42 | struct rxrpc_ackinfo info; | ||
43 | }; | ||
44 | }; | 39 | }; |
45 | } __attribute__((packed)) pkt; | 40 | } __attribute__((packed)) pkt; |
41 | struct rxrpc_ackinfo ack_info; | ||
46 | size_t len; | 42 | size_t len; |
47 | u32 serial, mtu, call_id; | 43 | int ioc; |
44 | u32 serial, mtu, call_id, padding; | ||
48 | 45 | ||
49 | _enter("%d", conn->debug_id); | 46 | _enter("%d", conn->debug_id); |
50 | 47 | ||
51 | chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; | 48 | chan = &conn->channels[channel]; |
52 | 49 | ||
53 | /* If the last call got moved on whilst we were waiting to run, just | 50 | /* If the last call got moved on whilst we were waiting to run, just |
54 | * ignore this packet. | 51 | * ignore this packet. |
@@ -56,7 +53,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, | |||
56 | call_id = READ_ONCE(chan->last_call); | 53 | call_id = READ_ONCE(chan->last_call); |
57 | /* Sync with __rxrpc_disconnect_call() */ | 54 | /* Sync with __rxrpc_disconnect_call() */ |
58 | smp_rmb(); | 55 | smp_rmb(); |
59 | if (call_id != sp->hdr.callNumber) | 56 | if (skb && call_id != sp->hdr.callNumber) |
60 | return; | 57 | return; |
61 | 58 | ||
62 | msg.msg_name = &conn->params.peer->srx.transport; | 59 | msg.msg_name = &conn->params.peer->srx.transport; |
@@ -65,9 +62,16 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, | |||
65 | msg.msg_controllen = 0; | 62 | msg.msg_controllen = 0; |
66 | msg.msg_flags = 0; | 63 | msg.msg_flags = 0; |
67 | 64 | ||
68 | pkt.whdr.epoch = htonl(sp->hdr.epoch); | 65 | iov[0].iov_base = &pkt; |
69 | pkt.whdr.cid = htonl(sp->hdr.cid); | 66 | iov[0].iov_len = sizeof(pkt.whdr); |
70 | pkt.whdr.callNumber = htonl(sp->hdr.callNumber); | 67 | iov[1].iov_base = &padding; |
68 | iov[1].iov_len = 3; | ||
69 | iov[2].iov_base = &ack_info; | ||
70 | iov[2].iov_len = sizeof(ack_info); | ||
71 | |||
72 | pkt.whdr.epoch = htonl(conn->proto.epoch); | ||
73 | pkt.whdr.cid = htonl(conn->proto.cid); | ||
74 | pkt.whdr.callNumber = htonl(call_id); | ||
71 | pkt.whdr.seq = 0; | 75 | pkt.whdr.seq = 0; |
72 | pkt.whdr.type = chan->last_type; | 76 | pkt.whdr.type = chan->last_type; |
73 | pkt.whdr.flags = conn->out_clientflag; | 77 | pkt.whdr.flags = conn->out_clientflag; |
@@ -79,27 +83,35 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, | |||
79 | len = sizeof(pkt.whdr); | 83 | len = sizeof(pkt.whdr); |
80 | switch (chan->last_type) { | 84 | switch (chan->last_type) { |
81 | case RXRPC_PACKET_TYPE_ABORT: | 85 | case RXRPC_PACKET_TYPE_ABORT: |
82 | pkt.abort.code = htonl(chan->last_abort); | 86 | pkt.abort_code = htonl(chan->last_abort); |
83 | len += sizeof(pkt.abort); | 87 | iov[0].iov_len += sizeof(pkt.abort_code); |
88 | len += sizeof(pkt.abort_code); | ||
89 | ioc = 1; | ||
84 | break; | 90 | break; |
85 | 91 | ||
86 | case RXRPC_PACKET_TYPE_ACK: | 92 | case RXRPC_PACKET_TYPE_ACK: |
87 | mtu = conn->params.peer->if_mtu; | 93 | mtu = conn->params.peer->if_mtu; |
88 | mtu -= conn->params.peer->hdrsize; | 94 | mtu -= conn->params.peer->hdrsize; |
89 | pkt.ack.bufferSpace = 0; | 95 | pkt.ack.bufferSpace = 0; |
90 | pkt.ack.maxSkew = htons(skb->priority); | 96 | pkt.ack.maxSkew = htons(skb ? skb->priority : 0); |
91 | pkt.ack.firstPacket = htonl(chan->last_seq); | 97 | pkt.ack.firstPacket = htonl(chan->last_seq + 1); |
92 | pkt.ack.previousPacket = htonl(chan->last_seq - 1); | 98 | pkt.ack.previousPacket = htonl(chan->last_seq); |
93 | pkt.ack.serial = htonl(sp->hdr.serial); | 99 | pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); |
94 | pkt.ack.reason = RXRPC_ACK_DUPLICATE; | 100 | pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; |
95 | pkt.ack.nAcks = 0; | 101 | pkt.ack.nAcks = 0; |
96 | pkt.info.rxMTU = htonl(rxrpc_rx_mtu); | 102 | ack_info.rxMTU = htonl(rxrpc_rx_mtu); |
97 | pkt.info.maxMTU = htonl(mtu); | 103 | ack_info.maxMTU = htonl(mtu); |
98 | pkt.info.rwind = htonl(rxrpc_rx_window_size); | 104 | ack_info.rwind = htonl(rxrpc_rx_window_size); |
99 | pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); | 105 | ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max); |
100 | pkt.whdr.flags |= RXRPC_SLOW_START_OK; | 106 | pkt.whdr.flags |= RXRPC_SLOW_START_OK; |
101 | len += sizeof(pkt.ack) + sizeof(pkt.info); | 107 | padding = 0; |
108 | iov[0].iov_len += sizeof(pkt.ack); | ||
109 | len += sizeof(pkt.ack) + 3 + sizeof(ack_info); | ||
110 | ioc = 3; | ||
102 | break; | 111 | break; |
112 | |||
113 | default: | ||
114 | return; | ||
103 | } | 115 | } |
104 | 116 | ||
105 | /* Resync with __rxrpc_disconnect_call() and check that the last call | 117 | /* Resync with __rxrpc_disconnect_call() and check that the last call |
@@ -109,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, | |||
109 | if (READ_ONCE(chan->last_call) != call_id) | 121 | if (READ_ONCE(chan->last_call) != call_id) |
110 | return; | 122 | return; |
111 | 123 | ||
112 | iov.iov_base = &pkt; | ||
113 | iov.iov_len = len; | ||
114 | |||
115 | serial = atomic_inc_return(&conn->serial); | 124 | serial = atomic_inc_return(&conn->serial); |
116 | pkt.whdr.serial = htonl(serial); | 125 | pkt.whdr.serial = htonl(serial); |
117 | 126 | ||
@@ -126,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, | |||
126 | break; | 135 | break; |
127 | } | 136 | } |
128 | 137 | ||
129 | kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); | 138 | kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); |
130 | _leave(""); | 139 | _leave(""); |
131 | return; | 140 | return; |
132 | } | 141 | } |
@@ -272,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, | |||
272 | switch (sp->hdr.type) { | 281 | switch (sp->hdr.type) { |
273 | case RXRPC_PACKET_TYPE_DATA: | 282 | case RXRPC_PACKET_TYPE_DATA: |
274 | case RXRPC_PACKET_TYPE_ACK: | 283 | case RXRPC_PACKET_TYPE_ACK: |
275 | rxrpc_conn_retransmit_call(conn, skb); | 284 | rxrpc_conn_retransmit_call(conn, skb, |
285 | sp->hdr.cid & RXRPC_CHANNELMASK); | ||
276 | return 0; | 286 | return 0; |
277 | 287 | ||
278 | case RXRPC_PACKET_TYPE_BUSY: | 288 | case RXRPC_PACKET_TYPE_BUSY: |
@@ -379,6 +389,48 @@ abort: | |||
379 | } | 389 | } |
380 | 390 | ||
381 | /* | 391 | /* |
392 | * Process delayed final ACKs that we haven't subsumed into a subsequent call. | ||
393 | */ | ||
394 | static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn) | ||
395 | { | ||
396 | unsigned long j = jiffies, next_j; | ||
397 | unsigned int channel; | ||
398 | bool set; | ||
399 | |||
400 | again: | ||
401 | next_j = j + LONG_MAX; | ||
402 | set = false; | ||
403 | for (channel = 0; channel < RXRPC_MAXCALLS; channel++) { | ||
404 | struct rxrpc_channel *chan = &conn->channels[channel]; | ||
405 | unsigned long ack_at; | ||
406 | |||
407 | if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) | ||
408 | continue; | ||
409 | |||
410 | smp_rmb(); /* vs rxrpc_disconnect_client_call */ | ||
411 | ack_at = READ_ONCE(chan->final_ack_at); | ||
412 | |||
413 | if (time_before(j, ack_at)) { | ||
414 | if (time_before(ack_at, next_j)) { | ||
415 | next_j = ack_at; | ||
416 | set = true; | ||
417 | } | ||
418 | continue; | ||
419 | } | ||
420 | |||
421 | if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, | ||
422 | &conn->flags)) | ||
423 | rxrpc_conn_retransmit_call(conn, NULL, channel); | ||
424 | } | ||
425 | |||
426 | j = jiffies; | ||
427 | if (time_before_eq(next_j, j)) | ||
428 | goto again; | ||
429 | if (set) | ||
430 | rxrpc_reduce_conn_timer(conn, next_j); | ||
431 | } | ||
432 | |||
433 | /* | ||
382 | * connection-level event processor | 434 | * connection-level event processor |
383 | */ | 435 | */ |
384 | void rxrpc_process_connection(struct work_struct *work) | 436 | void rxrpc_process_connection(struct work_struct *work) |
@@ -394,6 +446,10 @@ void rxrpc_process_connection(struct work_struct *work) | |||
394 | if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) | 446 | if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) |
395 | rxrpc_secure_connection(conn); | 447 | rxrpc_secure_connection(conn); |
396 | 448 | ||
449 | /* Process delayed ACKs whose time has come. */ | ||
450 | if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) | ||
451 | rxrpc_process_delayed_final_acks(conn); | ||
452 | |||
397 | /* go through the conn-level event packets, releasing the ref on this | 453 | /* go through the conn-level event packets, releasing the ref on this |
398 | * connection that each one has when we've finished with it */ | 454 | * connection that each one has when we've finished with it */ |
399 | while ((skb = skb_dequeue(&conn->rx_queue))) { | 455 | while ((skb = skb_dequeue(&conn->rx_queue))) { |
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index fe575798592f..c628351eb900 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c | |||
@@ -20,10 +20,19 @@ | |||
20 | /* | 20 | /* |
21 | * Time till a connection expires after last use (in seconds). | 21 | * Time till a connection expires after last use (in seconds). |
22 | */ | 22 | */ |
23 | unsigned int rxrpc_connection_expiry = 10 * 60; | 23 | unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; |
24 | unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; | ||
24 | 25 | ||
25 | static void rxrpc_destroy_connection(struct rcu_head *); | 26 | static void rxrpc_destroy_connection(struct rcu_head *); |
26 | 27 | ||
28 | static void rxrpc_connection_timer(struct timer_list *timer) | ||
29 | { | ||
30 | struct rxrpc_connection *conn = | ||
31 | container_of(timer, struct rxrpc_connection, timer); | ||
32 | |||
33 | rxrpc_queue_conn(conn); | ||
34 | } | ||
35 | |||
27 | /* | 36 | /* |
28 | * allocate a new connection | 37 | * allocate a new connection |
29 | */ | 38 | */ |
@@ -38,6 +47,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) | |||
38 | INIT_LIST_HEAD(&conn->cache_link); | 47 | INIT_LIST_HEAD(&conn->cache_link); |
39 | spin_lock_init(&conn->channel_lock); | 48 | spin_lock_init(&conn->channel_lock); |
40 | INIT_LIST_HEAD(&conn->waiting_calls); | 49 | INIT_LIST_HEAD(&conn->waiting_calls); |
50 | timer_setup(&conn->timer, &rxrpc_connection_timer, 0); | ||
41 | INIT_WORK(&conn->processor, &rxrpc_process_connection); | 51 | INIT_WORK(&conn->processor, &rxrpc_process_connection); |
42 | INIT_LIST_HEAD(&conn->proc_link); | 52 | INIT_LIST_HEAD(&conn->proc_link); |
43 | INIT_LIST_HEAD(&conn->link); | 53 | INIT_LIST_HEAD(&conn->link); |
@@ -301,21 +311,29 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) | |||
301 | } | 311 | } |
302 | 312 | ||
303 | /* | 313 | /* |
314 | * Set the service connection reap timer. | ||
315 | */ | ||
316 | static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, | ||
317 | unsigned long reap_at) | ||
318 | { | ||
319 | if (rxnet->live) | ||
320 | timer_reduce(&rxnet->service_conn_reap_timer, reap_at); | ||
321 | } | ||
322 | |||
323 | /* | ||
304 | * Release a service connection | 324 | * Release a service connection |
305 | */ | 325 | */ |
306 | void rxrpc_put_service_conn(struct rxrpc_connection *conn) | 326 | void rxrpc_put_service_conn(struct rxrpc_connection *conn) |
307 | { | 327 | { |
308 | struct rxrpc_net *rxnet; | ||
309 | const void *here = __builtin_return_address(0); | 328 | const void *here = __builtin_return_address(0); |
310 | int n; | 329 | int n; |
311 | 330 | ||
312 | n = atomic_dec_return(&conn->usage); | 331 | n = atomic_dec_return(&conn->usage); |
313 | trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); | 332 | trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); |
314 | ASSERTCMP(n, >=, 0); | 333 | ASSERTCMP(n, >=, 0); |
315 | if (n == 0) { | 334 | if (n == 1) |
316 | rxnet = conn->params.local->rxnet; | 335 | rxrpc_set_service_reap_timer(conn->params.local->rxnet, |
317 | rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); | 336 | jiffies + rxrpc_connection_expiry); |
318 | } | ||
319 | } | 337 | } |
320 | 338 | ||
321 | /* | 339 | /* |
@@ -332,6 +350,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) | |||
332 | 350 | ||
333 | _net("DESTROY CONN %d", conn->debug_id); | 351 | _net("DESTROY CONN %d", conn->debug_id); |
334 | 352 | ||
353 | del_timer_sync(&conn->timer); | ||
335 | rxrpc_purge_queue(&conn->rx_queue); | 354 | rxrpc_purge_queue(&conn->rx_queue); |
336 | 355 | ||
337 | conn->security->clear(conn); | 356 | conn->security->clear(conn); |
@@ -351,17 +370,15 @@ void rxrpc_service_connection_reaper(struct work_struct *work) | |||
351 | { | 370 | { |
352 | struct rxrpc_connection *conn, *_p; | 371 | struct rxrpc_connection *conn, *_p; |
353 | struct rxrpc_net *rxnet = | 372 | struct rxrpc_net *rxnet = |
354 | container_of(to_delayed_work(work), | 373 | container_of(work, struct rxrpc_net, service_conn_reaper); |
355 | struct rxrpc_net, service_conn_reaper); | 374 | unsigned long expire_at, earliest, idle_timestamp, now; |
356 | unsigned long reap_older_than, earliest, idle_timestamp, now; | ||
357 | 375 | ||
358 | LIST_HEAD(graveyard); | 376 | LIST_HEAD(graveyard); |
359 | 377 | ||
360 | _enter(""); | 378 | _enter(""); |
361 | 379 | ||
362 | now = jiffies; | 380 | now = jiffies; |
363 | reap_older_than = now - rxrpc_connection_expiry * HZ; | 381 | earliest = now + MAX_JIFFY_OFFSET; |
364 | earliest = ULONG_MAX; | ||
365 | 382 | ||
366 | write_lock(&rxnet->conn_lock); | 383 | write_lock(&rxnet->conn_lock); |
367 | list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { | 384 | list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { |
@@ -371,15 +388,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work) | |||
371 | if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) | 388 | if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) |
372 | continue; | 389 | continue; |
373 | 390 | ||
374 | idle_timestamp = READ_ONCE(conn->idle_timestamp); | 391 | if (rxnet->live) { |
375 | _debug("reap CONN %d { u=%d,t=%ld }", | 392 | idle_timestamp = READ_ONCE(conn->idle_timestamp); |
376 | conn->debug_id, atomic_read(&conn->usage), | 393 | expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; |
377 | (long)reap_older_than - (long)idle_timestamp); | 394 | if (conn->params.local->service_closed) |
378 | 395 | expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; | |
379 | if (time_after(idle_timestamp, reap_older_than)) { | 396 | |
380 | if (time_before(idle_timestamp, earliest)) | 397 | _debug("reap CONN %d { u=%d,t=%ld }", |
381 | earliest = idle_timestamp; | 398 | conn->debug_id, atomic_read(&conn->usage), |
382 | continue; | 399 | (long)expire_at - (long)now); |
400 | |||
401 | if (time_before(now, expire_at)) { | ||
402 | if (time_before(expire_at, earliest)) | ||
403 | earliest = expire_at; | ||
404 | continue; | ||
405 | } | ||
383 | } | 406 | } |
384 | 407 | ||
385 | /* The usage count sits at 1 whilst the object is unused on the | 408 | /* The usage count sits at 1 whilst the object is unused on the |
@@ -387,6 +410,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) | |||
387 | */ | 410 | */ |
388 | if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) | 411 | if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) |
389 | continue; | 412 | continue; |
413 | trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0); | ||
390 | 414 | ||
391 | if (rxrpc_conn_is_client(conn)) | 415 | if (rxrpc_conn_is_client(conn)) |
392 | BUG(); | 416 | BUG(); |
@@ -397,11 +421,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work) | |||
397 | } | 421 | } |
398 | write_unlock(&rxnet->conn_lock); | 422 | write_unlock(&rxnet->conn_lock); |
399 | 423 | ||
400 | if (earliest != ULONG_MAX) { | 424 | if (earliest != now + MAX_JIFFY_OFFSET) { |
401 | _debug("reschedule reaper %ld", (long) earliest - now); | 425 | _debug("reschedule reaper %ld", (long)earliest - (long)now); |
402 | ASSERT(time_after(earliest, now)); | 426 | ASSERT(time_after(earliest, now)); |
403 | rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, | 427 | rxrpc_set_service_reap_timer(rxnet, earliest); |
404 | earliest - now); | ||
405 | } | 428 | } |
406 | 429 | ||
407 | while (!list_empty(&graveyard)) { | 430 | while (!list_empty(&graveyard)) { |
@@ -429,9 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) | |||
429 | 452 | ||
430 | rxrpc_destroy_all_client_connections(rxnet); | 453 | rxrpc_destroy_all_client_connections(rxnet); |
431 | 454 | ||
432 | rxrpc_connection_expiry = 0; | 455 | del_timer_sync(&rxnet->service_conn_reap_timer); |
433 | cancel_delayed_work(&rxnet->client_conn_reaper); | 456 | rxrpc_queue_work(&rxnet->service_conn_reaper); |
434 | rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); | ||
435 | flush_workqueue(rxrpc_workqueue); | 457 | flush_workqueue(rxrpc_workqueue); |
436 | 458 | ||
437 | write_lock(&rxnet->conn_lock); | 459 | write_lock(&rxnet->conn_lock); |
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1b592073ec96..6fc61400337f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c | |||
@@ -318,16 +318,18 @@ bad_state: | |||
318 | static bool rxrpc_receiving_reply(struct rxrpc_call *call) | 318 | static bool rxrpc_receiving_reply(struct rxrpc_call *call) |
319 | { | 319 | { |
320 | struct rxrpc_ack_summary summary = { 0 }; | 320 | struct rxrpc_ack_summary summary = { 0 }; |
321 | unsigned long now, timo; | ||
321 | rxrpc_seq_t top = READ_ONCE(call->tx_top); | 322 | rxrpc_seq_t top = READ_ONCE(call->tx_top); |
322 | 323 | ||
323 | if (call->ackr_reason) { | 324 | if (call->ackr_reason) { |
324 | spin_lock_bh(&call->lock); | 325 | spin_lock_bh(&call->lock); |
325 | call->ackr_reason = 0; | 326 | call->ackr_reason = 0; |
326 | call->resend_at = call->expire_at; | ||
327 | call->ack_at = call->expire_at; | ||
328 | spin_unlock_bh(&call->lock); | 327 | spin_unlock_bh(&call->lock); |
329 | rxrpc_set_timer(call, rxrpc_timer_init_for_reply, | 328 | now = jiffies; |
330 | ktime_get_real()); | 329 | timo = now + MAX_JIFFY_OFFSET; |
330 | WRITE_ONCE(call->resend_at, timo); | ||
331 | WRITE_ONCE(call->ack_at, timo); | ||
332 | trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); | ||
331 | } | 333 | } |
332 | 334 | ||
333 | if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) | 335 | if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) |
@@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, | |||
437 | if (state >= RXRPC_CALL_COMPLETE) | 439 | if (state >= RXRPC_CALL_COMPLETE) |
438 | return; | 440 | return; |
439 | 441 | ||
442 | if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { | ||
443 | unsigned long timo = READ_ONCE(call->next_req_timo); | ||
444 | unsigned long now, expect_req_by; | ||
445 | |||
446 | if (timo) { | ||
447 | now = jiffies; | ||
448 | expect_req_by = now + timo; | ||
449 | WRITE_ONCE(call->expect_req_by, expect_req_by); | ||
450 | rxrpc_reduce_call_timer(call, expect_req_by, now, | ||
451 | rxrpc_timer_set_for_idle); | ||
452 | } | ||
453 | } | ||
454 | |||
440 | /* Received data implicitly ACKs all of the request packets we sent | 455 | /* Received data implicitly ACKs all of the request packets we sent |
441 | * when we're acting as a client. | 456 | * when we're acting as a client. |
442 | */ | 457 | */ |
@@ -616,6 +631,43 @@ found: | |||
616 | } | 631 | } |
617 | 632 | ||
618 | /* | 633 | /* |
634 | * Process the response to a ping that we sent to find out if we lost an ACK. | ||
635 | * | ||
636 | * If we got back a ping response that indicates a lower tx_top than what we | ||
637 | * had at the time of the ping transmission, we adjudge all the DATA packets | ||
638 | * sent between the response tx_top and the ping-time tx_top to have been lost. | ||
639 | */ | ||
640 | static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) | ||
641 | { | ||
642 | rxrpc_seq_t top, bottom, seq; | ||
643 | bool resend = false; | ||
644 | |||
645 | spin_lock_bh(&call->lock); | ||
646 | |||
647 | bottom = call->tx_hard_ack + 1; | ||
648 | top = call->acks_lost_top; | ||
649 | if (before(bottom, top)) { | ||
650 | for (seq = bottom; before_eq(seq, top); seq++) { | ||
651 | int ix = seq & RXRPC_RXTX_BUFF_MASK; | ||
652 | u8 annotation = call->rxtx_annotations[ix]; | ||
653 | u8 anno_type = annotation & RXRPC_TX_ANNO_MASK; | ||
654 | |||
655 | if (anno_type != RXRPC_TX_ANNO_UNACK) | ||
656 | continue; | ||
657 | annotation &= ~RXRPC_TX_ANNO_MASK; | ||
658 | annotation |= RXRPC_TX_ANNO_RETRANS; | ||
659 | call->rxtx_annotations[ix] = annotation; | ||
660 | resend = true; | ||
661 | } | ||
662 | } | ||
663 | |||
664 | spin_unlock_bh(&call->lock); | ||
665 | |||
666 | if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) | ||
667 | rxrpc_queue_call(call); | ||
668 | } | ||
669 | |||
670 | /* | ||
619 | * Process a ping response. | 671 | * Process a ping response. |
620 | */ | 672 | */ |
621 | static void rxrpc_input_ping_response(struct rxrpc_call *call, | 673 | static void rxrpc_input_ping_response(struct rxrpc_call *call, |
@@ -630,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, | |||
630 | smp_rmb(); | 682 | smp_rmb(); |
631 | ping_serial = call->ping_serial; | 683 | ping_serial = call->ping_serial; |
632 | 684 | ||
685 | if (orig_serial == call->acks_lost_ping) | ||
686 | rxrpc_input_check_for_lost_ack(call); | ||
687 | |||
633 | if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || | 688 | if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || |
634 | before(orig_serial, ping_serial)) | 689 | before(orig_serial, ping_serial)) |
635 | return; | 690 | return; |
@@ -908,9 +963,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, | |||
908 | struct sk_buff *skb, u16 skew) | 963 | struct sk_buff *skb, u16 skew) |
909 | { | 964 | { |
910 | struct rxrpc_skb_priv *sp = rxrpc_skb(skb); | 965 | struct rxrpc_skb_priv *sp = rxrpc_skb(skb); |
966 | unsigned long timo; | ||
911 | 967 | ||
912 | _enter("%p,%p", call, skb); | 968 | _enter("%p,%p", call, skb); |
913 | 969 | ||
970 | timo = READ_ONCE(call->next_rx_timo); | ||
971 | if (timo) { | ||
972 | unsigned long now = jiffies, expect_rx_by; | ||
973 | |||
974 | expect_rx_by = jiffies + timo; | ||
975 | WRITE_ONCE(call->expect_rx_by, expect_rx_by); | ||
976 | rxrpc_reduce_call_timer(call, expect_rx_by, now, | ||
977 | rxrpc_timer_set_for_normal); | ||
978 | } | ||
979 | |||
914 | switch (sp->hdr.type) { | 980 | switch (sp->hdr.type) { |
915 | case RXRPC_PACKET_TYPE_DATA: | 981 | case RXRPC_PACKET_TYPE_DATA: |
916 | rxrpc_input_data(call, skb, skew); | 982 | rxrpc_input_data(call, skb, skew); |
@@ -1147,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk) | |||
1147 | goto reupgrade; | 1213 | goto reupgrade; |
1148 | conn->service_id = sp->hdr.serviceId; | 1214 | conn->service_id = sp->hdr.serviceId; |
1149 | } | 1215 | } |
1150 | 1216 | ||
1151 | if (sp->hdr.callNumber == 0) { | 1217 | if (sp->hdr.callNumber == 0) { |
1152 | /* Connection-level packet */ | 1218 | /* Connection-level packet */ |
1153 | _debug("CONN %p {%d}", conn, conn->debug_id); | 1219 | _debug("CONN %p {%d}", conn, conn->debug_id); |
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1a2d4b112064..c1d9e7fd7448 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c | |||
@@ -21,33 +21,28 @@ | |||
21 | unsigned int rxrpc_max_backlog __read_mostly = 10; | 21 | unsigned int rxrpc_max_backlog __read_mostly = 10; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * Maximum lifetime of a call (in mx). | ||
25 | */ | ||
26 | unsigned int rxrpc_max_call_lifetime = 60 * 1000; | ||
27 | |||
28 | /* | ||
29 | * How long to wait before scheduling ACK generation after seeing a | 24 | * How long to wait before scheduling ACK generation after seeing a |
30 | * packet with RXRPC_REQUEST_ACK set (in ms). | 25 | * packet with RXRPC_REQUEST_ACK set (in jiffies). |
31 | */ | 26 | */ |
32 | unsigned int rxrpc_requested_ack_delay = 1; | 27 | unsigned long rxrpc_requested_ack_delay = 1; |
33 | 28 | ||
34 | /* | 29 | /* |
35 | * How long to wait before scheduling an ACK with subtype DELAY (in ms). | 30 | * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). |
36 | * | 31 | * |
37 | * We use this when we've received new data packets. If those packets aren't | 32 | * We use this when we've received new data packets. If those packets aren't |
38 | * all consumed within this time we will send a DELAY ACK if an ACK was not | 33 | * all consumed within this time we will send a DELAY ACK if an ACK was not |
39 | * requested to let the sender know it doesn't need to resend. | 34 | * requested to let the sender know it doesn't need to resend. |
40 | */ | 35 | */ |
41 | unsigned int rxrpc_soft_ack_delay = 1 * 1000; | 36 | unsigned long rxrpc_soft_ack_delay = HZ; |
42 | 37 | ||
43 | /* | 38 | /* |
44 | * How long to wait before scheduling an ACK with subtype IDLE (in ms). | 39 | * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). |
45 | * | 40 | * |
46 | * We use this when we've consumed some previously soft-ACK'd packets when | 41 | * We use this when we've consumed some previously soft-ACK'd packets when |
47 | * further packets aren't immediately received to decide when to send an IDLE | 42 | * further packets aren't immediately received to decide when to send an IDLE |
48 | * ACK let the other end know that it can free up its Tx buffer space. | 43 | * ACK let the other end know that it can free up its Tx buffer space. |
49 | */ | 44 | */ |
50 | unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; | 45 | unsigned long rxrpc_idle_ack_delay = HZ / 2; |
51 | 46 | ||
52 | /* | 47 | /* |
53 | * Receive window size in packets. This indicates the maximum number of | 48 | * Receive window size in packets. This indicates the maximum number of |
@@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4; | |||
75 | /* | 70 | /* |
76 | * Time till packet resend (in milliseconds). | 71 | * Time till packet resend (in milliseconds). |
77 | */ | 72 | */ |
78 | unsigned int rxrpc_resend_timeout = 4 * 1000; | 73 | unsigned long rxrpc_resend_timeout = 4 * HZ; |
79 | 74 | ||
80 | const s8 rxrpc_ack_priority[] = { | 75 | const s8 rxrpc_ack_priority[] = { |
81 | [0] = 0, | 76 | [0] = 0, |
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 7edceb8522f5..f18c9248e0d4 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c | |||
@@ -14,6 +14,24 @@ | |||
14 | 14 | ||
15 | unsigned int rxrpc_net_id; | 15 | unsigned int rxrpc_net_id; |
16 | 16 | ||
17 | static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) | ||
18 | { | ||
19 | struct rxrpc_net *rxnet = | ||
20 | container_of(timer, struct rxrpc_net, client_conn_reap_timer); | ||
21 | |||
22 | if (rxnet->live) | ||
23 | rxrpc_queue_work(&rxnet->client_conn_reaper); | ||
24 | } | ||
25 | |||
26 | static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) | ||
27 | { | ||
28 | struct rxrpc_net *rxnet = | ||
29 | container_of(timer, struct rxrpc_net, service_conn_reap_timer); | ||
30 | |||
31 | if (rxnet->live) | ||
32 | rxrpc_queue_work(&rxnet->service_conn_reaper); | ||
33 | } | ||
34 | |||
17 | /* | 35 | /* |
18 | * Initialise a per-network namespace record. | 36 | * Initialise a per-network namespace record. |
19 | */ | 37 | */ |
@@ -22,6 +40,7 @@ static __net_init int rxrpc_init_net(struct net *net) | |||
22 | struct rxrpc_net *rxnet = rxrpc_net(net); | 40 | struct rxrpc_net *rxnet = rxrpc_net(net); |
23 | int ret; | 41 | int ret; |
24 | 42 | ||
43 | rxnet->live = true; | ||
25 | get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); | 44 | get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); |
26 | rxnet->epoch |= RXRPC_RANDOM_EPOCH; | 45 | rxnet->epoch |= RXRPC_RANDOM_EPOCH; |
27 | 46 | ||
@@ -31,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net) | |||
31 | INIT_LIST_HEAD(&rxnet->conn_proc_list); | 50 | INIT_LIST_HEAD(&rxnet->conn_proc_list); |
32 | INIT_LIST_HEAD(&rxnet->service_conns); | 51 | INIT_LIST_HEAD(&rxnet->service_conns); |
33 | rwlock_init(&rxnet->conn_lock); | 52 | rwlock_init(&rxnet->conn_lock); |
34 | INIT_DELAYED_WORK(&rxnet->service_conn_reaper, | 53 | INIT_WORK(&rxnet->service_conn_reaper, |
35 | rxrpc_service_connection_reaper); | 54 | rxrpc_service_connection_reaper); |
55 | timer_setup(&rxnet->service_conn_reap_timer, | ||
56 | rxrpc_service_conn_reap_timeout, 0); | ||
36 | 57 | ||
37 | rxnet->nr_client_conns = 0; | 58 | rxnet->nr_client_conns = 0; |
38 | rxnet->nr_active_client_conns = 0; | 59 | rxnet->nr_active_client_conns = 0; |
@@ -42,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net) | |||
42 | INIT_LIST_HEAD(&rxnet->waiting_client_conns); | 63 | INIT_LIST_HEAD(&rxnet->waiting_client_conns); |
43 | INIT_LIST_HEAD(&rxnet->active_client_conns); | 64 | INIT_LIST_HEAD(&rxnet->active_client_conns); |
44 | INIT_LIST_HEAD(&rxnet->idle_client_conns); | 65 | INIT_LIST_HEAD(&rxnet->idle_client_conns); |
45 | INIT_DELAYED_WORK(&rxnet->client_conn_reaper, | 66 | INIT_WORK(&rxnet->client_conn_reaper, |
46 | rxrpc_discard_expired_client_conns); | 67 | rxrpc_discard_expired_client_conns); |
68 | timer_setup(&rxnet->client_conn_reap_timer, | ||
69 | rxrpc_client_conn_reap_timeout, 0); | ||
47 | 70 | ||
48 | INIT_LIST_HEAD(&rxnet->local_endpoints); | 71 | INIT_LIST_HEAD(&rxnet->local_endpoints); |
49 | mutex_init(&rxnet->local_mutex); | 72 | mutex_init(&rxnet->local_mutex); |
@@ -60,6 +83,7 @@ static __net_init int rxrpc_init_net(struct net *net) | |||
60 | return 0; | 83 | return 0; |
61 | 84 | ||
62 | err_proc: | 85 | err_proc: |
86 | rxnet->live = false; | ||
63 | return ret; | 87 | return ret; |
64 | } | 88 | } |
65 | 89 | ||
@@ -70,6 +94,7 @@ static __net_exit void rxrpc_exit_net(struct net *net) | |||
70 | { | 94 | { |
71 | struct rxrpc_net *rxnet = rxrpc_net(net); | 95 | struct rxrpc_net *rxnet = rxrpc_net(net); |
72 | 96 | ||
97 | rxnet->live = false; | ||
73 | rxrpc_destroy_all_calls(rxnet); | 98 | rxrpc_destroy_all_calls(rxnet); |
74 | rxrpc_destroy_all_connections(rxnet); | 99 | rxrpc_destroy_all_connections(rxnet); |
75 | rxrpc_destroy_all_locals(rxnet); | 100 | rxrpc_destroy_all_locals(rxnet); |
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f47659c7b224..42410e910aff 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c | |||
@@ -33,6 +33,24 @@ struct rxrpc_abort_buffer { | |||
33 | }; | 33 | }; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * Arrange for a keepalive ping a certain time after we last transmitted. This | ||
37 | * lets the far side know we're still interested in this call and helps keep | ||
38 | * the route through any intervening firewall open. | ||
39 | * | ||
40 | * Receiving a response to the ping will prevent the ->expect_rx_by timer from | ||
41 | * expiring. | ||
42 | */ | ||
43 | static void rxrpc_set_keepalive(struct rxrpc_call *call) | ||
44 | { | ||
45 | unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; | ||
46 | |||
47 | keepalive_at += now; | ||
48 | WRITE_ONCE(call->keepalive_at, keepalive_at); | ||
49 | rxrpc_reduce_call_timer(call, keepalive_at, now, | ||
50 | rxrpc_timer_set_for_keepalive); | ||
51 | } | ||
52 | |||
53 | /* | ||
36 | * Fill out an ACK packet. | 54 | * Fill out an ACK packet. |
37 | */ | 55 | */ |
38 | static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, | 56 | static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, |
@@ -95,7 +113,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, | |||
95 | /* | 113 | /* |
96 | * Send an ACK call packet. | 114 | * Send an ACK call packet. |
97 | */ | 115 | */ |
98 | int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) | 116 | int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, |
117 | rxrpc_serial_t *_serial) | ||
99 | { | 118 | { |
100 | struct rxrpc_connection *conn = NULL; | 119 | struct rxrpc_connection *conn = NULL; |
101 | struct rxrpc_ack_buffer *pkt; | 120 | struct rxrpc_ack_buffer *pkt; |
@@ -165,6 +184,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) | |||
165 | ntohl(pkt->ack.firstPacket), | 184 | ntohl(pkt->ack.firstPacket), |
166 | ntohl(pkt->ack.serial), | 185 | ntohl(pkt->ack.serial), |
167 | pkt->ack.reason, pkt->ack.nAcks); | 186 | pkt->ack.reason, pkt->ack.nAcks); |
187 | if (_serial) | ||
188 | *_serial = serial; | ||
168 | 189 | ||
169 | if (ping) { | 190 | if (ping) { |
170 | call->ping_serial = serial; | 191 | call->ping_serial = serial; |
@@ -202,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) | |||
202 | call->ackr_seen = top; | 223 | call->ackr_seen = top; |
203 | spin_unlock_bh(&call->lock); | 224 | spin_unlock_bh(&call->lock); |
204 | } | 225 | } |
226 | |||
227 | rxrpc_set_keepalive(call); | ||
205 | } | 228 | } |
206 | 229 | ||
207 | out: | 230 | out: |
@@ -323,7 +346,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, | |||
323 | * ACKs if a DATA packet appears to have been lost. | 346 | * ACKs if a DATA packet appears to have been lost. |
324 | */ | 347 | */ |
325 | if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && | 348 | if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && |
326 | (retrans || | 349 | (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || |
350 | retrans || | ||
327 | call->cong_mode == RXRPC_CALL_SLOW_START || | 351 | call->cong_mode == RXRPC_CALL_SLOW_START || |
328 | (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || | 352 | (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || |
329 | ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), | 353 | ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), |
@@ -370,8 +394,23 @@ done: | |||
370 | if (whdr.flags & RXRPC_REQUEST_ACK) { | 394 | if (whdr.flags & RXRPC_REQUEST_ACK) { |
371 | call->peer->rtt_last_req = now; | 395 | call->peer->rtt_last_req = now; |
372 | trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); | 396 | trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); |
397 | if (call->peer->rtt_usage > 1) { | ||
398 | unsigned long nowj = jiffies, ack_lost_at; | ||
399 | |||
400 | ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); | ||
401 | if (ack_lost_at < 1) | ||
402 | ack_lost_at = 1; | ||
403 | |||
404 | ack_lost_at += nowj; | ||
405 | WRITE_ONCE(call->ack_lost_at, ack_lost_at); | ||
406 | rxrpc_reduce_call_timer(call, ack_lost_at, nowj, | ||
407 | rxrpc_timer_set_for_lost_ack); | ||
408 | } | ||
373 | } | 409 | } |
374 | } | 410 | } |
411 | |||
412 | rxrpc_set_keepalive(call); | ||
413 | |||
375 | _leave(" = %d [%u]", ret, call->peer->maxdata); | 414 | _leave(" = %d [%u]", ret, call->peer->maxdata); |
376 | return ret; | 415 | return ret; |
377 | 416 | ||
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8510a98b87e1..cc21e8db25b0 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c | |||
@@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) | |||
144 | trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); | 144 | trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); |
145 | ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); | 145 | ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); |
146 | 146 | ||
147 | #if 0 // TODO: May want to transmit final ACK under some circumstances anyway | ||
147 | if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { | 148 | if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { |
148 | rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, | 149 | rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, |
149 | rxrpc_propose_ack_terminal_ack); | 150 | rxrpc_propose_ack_terminal_ack); |
150 | rxrpc_send_ack_packet(call, false); | 151 | rxrpc_send_ack_packet(call, false, NULL); |
151 | } | 152 | } |
153 | #endif | ||
152 | 154 | ||
153 | write_lock_bh(&call->state_lock); | 155 | write_lock_bh(&call->state_lock); |
154 | 156 | ||
@@ -161,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) | |||
161 | case RXRPC_CALL_SERVER_RECV_REQUEST: | 163 | case RXRPC_CALL_SERVER_RECV_REQUEST: |
162 | call->tx_phase = true; | 164 | call->tx_phase = true; |
163 | call->state = RXRPC_CALL_SERVER_ACK_REQUEST; | 165 | call->state = RXRPC_CALL_SERVER_ACK_REQUEST; |
164 | call->ack_at = call->expire_at; | 166 | call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; |
165 | write_unlock_bh(&call->state_lock); | 167 | write_unlock_bh(&call->state_lock); |
166 | rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, | 168 | rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, |
167 | rxrpc_propose_ack_processing_op); | 169 | rxrpc_propose_ack_processing_op); |
@@ -217,10 +219,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) | |||
217 | after_eq(top, call->ackr_seen + 2) || | 219 | after_eq(top, call->ackr_seen + 2) || |
218 | (hard_ack == top && after(hard_ack, call->ackr_consumed))) | 220 | (hard_ack == top && after(hard_ack, call->ackr_consumed))) |
219 | rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, | 221 | rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, |
220 | true, false, | 222 | true, true, |
221 | rxrpc_propose_ack_rotate_rx); | 223 | rxrpc_propose_ack_rotate_rx); |
222 | if (call->ackr_reason) | 224 | if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) |
223 | rxrpc_send_ack_packet(call, false); | 225 | rxrpc_send_ack_packet(call, false, NULL); |
224 | } | 226 | } |
225 | } | 227 | } |
226 | 228 | ||
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7d2595582c09..09f2a3e05221 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c | |||
@@ -21,22 +21,6 @@ | |||
21 | #include <net/af_rxrpc.h> | 21 | #include <net/af_rxrpc.h> |
22 | #include "ar-internal.h" | 22 | #include "ar-internal.h" |
23 | 23 | ||
24 | enum rxrpc_command { | ||
25 | RXRPC_CMD_SEND_DATA, /* send data message */ | ||
26 | RXRPC_CMD_SEND_ABORT, /* request abort generation */ | ||
27 | RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ | ||
28 | RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ | ||
29 | }; | ||
30 | |||
31 | struct rxrpc_send_params { | ||
32 | s64 tx_total_len; /* Total Tx data length (if send data) */ | ||
33 | unsigned long user_call_ID; /* User's call ID */ | ||
34 | u32 abort_code; /* Abort code to Tx (if abort) */ | ||
35 | enum rxrpc_command command : 8; /* The command to implement */ | ||
36 | bool exclusive; /* Shared or exclusive call */ | ||
37 | bool upgrade; /* If the connection is upgradeable */ | ||
38 | }; | ||
39 | |||
40 | /* | 24 | /* |
41 | * Wait for space to appear in the Tx queue or a signal to occur. | 25 | * Wait for space to appear in the Tx queue or a signal to occur. |
42 | */ | 26 | */ |
@@ -174,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, | |||
174 | rxrpc_notify_end_tx_t notify_end_tx) | 158 | rxrpc_notify_end_tx_t notify_end_tx) |
175 | { | 159 | { |
176 | struct rxrpc_skb_priv *sp = rxrpc_skb(skb); | 160 | struct rxrpc_skb_priv *sp = rxrpc_skb(skb); |
161 | unsigned long now; | ||
177 | rxrpc_seq_t seq = sp->hdr.seq; | 162 | rxrpc_seq_t seq = sp->hdr.seq; |
178 | int ret, ix; | 163 | int ret, ix; |
179 | u8 annotation = RXRPC_TX_ANNO_UNACK; | 164 | u8 annotation = RXRPC_TX_ANNO_UNACK; |
@@ -213,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, | |||
213 | break; | 198 | break; |
214 | case RXRPC_CALL_SERVER_ACK_REQUEST: | 199 | case RXRPC_CALL_SERVER_ACK_REQUEST: |
215 | call->state = RXRPC_CALL_SERVER_SEND_REPLY; | 200 | call->state = RXRPC_CALL_SERVER_SEND_REPLY; |
216 | call->ack_at = call->expire_at; | 201 | now = jiffies; |
202 | WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET); | ||
217 | if (call->ackr_reason == RXRPC_ACK_DELAY) | 203 | if (call->ackr_reason == RXRPC_ACK_DELAY) |
218 | call->ackr_reason = 0; | 204 | call->ackr_reason = 0; |
219 | __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, | 205 | trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); |
220 | ktime_get_real()); | ||
221 | if (!last) | 206 | if (!last) |
222 | break; | 207 | break; |
223 | /* Fall through */ | 208 | /* Fall through */ |
@@ -239,14 +224,19 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, | |||
239 | _debug("need instant resend %d", ret); | 224 | _debug("need instant resend %d", ret); |
240 | rxrpc_instant_resend(call, ix); | 225 | rxrpc_instant_resend(call, ix); |
241 | } else { | 226 | } else { |
242 | ktime_t now = ktime_get_real(), resend_at; | 227 | unsigned long now = jiffies, resend_at; |
243 | 228 | ||
244 | resend_at = ktime_add_ms(now, rxrpc_resend_timeout); | 229 | if (call->peer->rtt_usage > 1) |
245 | 230 | resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); | |
246 | if (ktime_before(resend_at, call->resend_at)) { | 231 | else |
247 | call->resend_at = resend_at; | 232 | resend_at = rxrpc_resend_timeout; |
248 | rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); | 233 | if (resend_at < 1) |
249 | } | 234 | resend_at = 1; |
235 | |||
236 | resend_at += now; | ||
237 | WRITE_ONCE(call->resend_at, resend_at); | ||
238 | rxrpc_reduce_call_timer(call, resend_at, now, | ||
239 | rxrpc_timer_set_for_send); | ||
250 | } | 240 | } |
251 | 241 | ||
252 | rxrpc_free_skb(skb, rxrpc_skb_tx_freed); | 242 | rxrpc_free_skb(skb, rxrpc_skb_tx_freed); |
@@ -295,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, | |||
295 | do { | 285 | do { |
296 | /* Check to see if there's a ping ACK to reply to. */ | 286 | /* Check to see if there's a ping ACK to reply to. */ |
297 | if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) | 287 | if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) |
298 | rxrpc_send_ack_packet(call, false); | 288 | rxrpc_send_ack_packet(call, false, NULL); |
299 | 289 | ||
300 | if (!skb) { | 290 | if (!skb) { |
301 | size_t size, chunk, max, space; | 291 | size_t size, chunk, max, space; |
@@ -480,11 +470,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) | |||
480 | if (msg->msg_flags & MSG_CMSG_COMPAT) { | 470 | if (msg->msg_flags & MSG_CMSG_COMPAT) { |
481 | if (len != sizeof(u32)) | 471 | if (len != sizeof(u32)) |
482 | return -EINVAL; | 472 | return -EINVAL; |
483 | p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); | 473 | p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg); |
484 | } else { | 474 | } else { |
485 | if (len != sizeof(unsigned long)) | 475 | if (len != sizeof(unsigned long)) |
486 | return -EINVAL; | 476 | return -EINVAL; |
487 | p->user_call_ID = *(unsigned long *) | 477 | p->call.user_call_ID = *(unsigned long *) |
488 | CMSG_DATA(cmsg); | 478 | CMSG_DATA(cmsg); |
489 | } | 479 | } |
490 | got_user_ID = true; | 480 | got_user_ID = true; |
@@ -522,11 +512,24 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) | |||
522 | break; | 512 | break; |
523 | 513 | ||
524 | case RXRPC_TX_LENGTH: | 514 | case RXRPC_TX_LENGTH: |
525 | if (p->tx_total_len != -1 || len != sizeof(__s64)) | 515 | if (p->call.tx_total_len != -1 || len != sizeof(__s64)) |
516 | return -EINVAL; | ||
517 | p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg); | ||
518 | if (p->call.tx_total_len < 0) | ||
526 | return -EINVAL; | 519 | return -EINVAL; |
527 | p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); | 520 | break; |
528 | if (p->tx_total_len < 0) | 521 | |
522 | case RXRPC_SET_CALL_TIMEOUT: | ||
523 | if (len & 3 || len < 4 || len > 12) | ||
529 | return -EINVAL; | 524 | return -EINVAL; |
525 | memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len); | ||
526 | p->call.nr_timeouts = len / 4; | ||
527 | if (p->call.timeouts.hard > INT_MAX / HZ) | ||
528 | return -ERANGE; | ||
529 | if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000) | ||
530 | return -ERANGE; | ||
531 | if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000) | ||
532 | return -ERANGE; | ||
530 | break; | 533 | break; |
531 | 534 | ||
532 | default: | 535 | default: |
@@ -536,7 +539,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) | |||
536 | 539 | ||
537 | if (!got_user_ID) | 540 | if (!got_user_ID) |
538 | return -EINVAL; | 541 | return -EINVAL; |
539 | if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) | 542 | if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) |
540 | return -EINVAL; | 543 | return -EINVAL; |
541 | _leave(" = 0"); | 544 | _leave(" = 0"); |
542 | return 0; | 545 | return 0; |
@@ -576,8 +579,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, | |||
576 | cp.exclusive = rx->exclusive | p->exclusive; | 579 | cp.exclusive = rx->exclusive | p->exclusive; |
577 | cp.upgrade = p->upgrade; | 580 | cp.upgrade = p->upgrade; |
578 | cp.service_id = srx->srx_service; | 581 | cp.service_id = srx->srx_service; |
579 | call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, | 582 | call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL); |
580 | p->tx_total_len, GFP_KERNEL); | ||
581 | /* The socket is now unlocked */ | 583 | /* The socket is now unlocked */ |
582 | 584 | ||
583 | _leave(" = %p\n", call); | 585 | _leave(" = %p\n", call); |
@@ -594,15 +596,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) | |||
594 | { | 596 | { |
595 | enum rxrpc_call_state state; | 597 | enum rxrpc_call_state state; |
596 | struct rxrpc_call *call; | 598 | struct rxrpc_call *call; |
599 | unsigned long now, j; | ||
597 | int ret; | 600 | int ret; |
598 | 601 | ||
599 | struct rxrpc_send_params p = { | 602 | struct rxrpc_send_params p = { |
600 | .tx_total_len = -1, | 603 | .call.tx_total_len = -1, |
601 | .user_call_ID = 0, | 604 | .call.user_call_ID = 0, |
602 | .abort_code = 0, | 605 | .call.nr_timeouts = 0, |
603 | .command = RXRPC_CMD_SEND_DATA, | 606 | .abort_code = 0, |
604 | .exclusive = false, | 607 | .command = RXRPC_CMD_SEND_DATA, |
605 | .upgrade = true, | 608 | .exclusive = false, |
609 | .upgrade = false, | ||
606 | }; | 610 | }; |
607 | 611 | ||
608 | _enter(""); | 612 | _enter(""); |
@@ -615,15 +619,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) | |||
615 | ret = -EINVAL; | 619 | ret = -EINVAL; |
616 | if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) | 620 | if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) |
617 | goto error_release_sock; | 621 | goto error_release_sock; |
618 | call = rxrpc_accept_call(rx, p.user_call_ID, NULL); | 622 | call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL); |
619 | /* The socket is now unlocked. */ | 623 | /* The socket is now unlocked. */ |
620 | if (IS_ERR(call)) | 624 | if (IS_ERR(call)) |
621 | return PTR_ERR(call); | 625 | return PTR_ERR(call); |
622 | rxrpc_put_call(call, rxrpc_call_put); | 626 | ret = 0; |
623 | return 0; | 627 | goto out_put_unlock; |
624 | } | 628 | } |
625 | 629 | ||
626 | call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); | 630 | call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); |
627 | if (!call) { | 631 | if (!call) { |
628 | ret = -EBADSLT; | 632 | ret = -EBADSLT; |
629 | if (p.command != RXRPC_CMD_SEND_DATA) | 633 | if (p.command != RXRPC_CMD_SEND_DATA) |
@@ -653,14 +657,39 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) | |||
653 | goto error_put; | 657 | goto error_put; |
654 | } | 658 | } |
655 | 659 | ||
656 | if (p.tx_total_len != -1) { | 660 | if (p.call.tx_total_len != -1) { |
657 | ret = -EINVAL; | 661 | ret = -EINVAL; |
658 | if (call->tx_total_len != -1 || | 662 | if (call->tx_total_len != -1 || |
659 | call->tx_pending || | 663 | call->tx_pending || |
660 | call->tx_top != 0) | 664 | call->tx_top != 0) |
661 | goto error_put; | 665 | goto error_put; |
662 | call->tx_total_len = p.tx_total_len; | 666 | call->tx_total_len = p.call.tx_total_len; |
667 | } | ||
668 | } | ||
669 | |||
670 | switch (p.call.nr_timeouts) { | ||
671 | case 3: | ||
672 | j = msecs_to_jiffies(p.call.timeouts.normal); | ||
673 | if (p.call.timeouts.normal > 0 && j == 0) | ||
674 | j = 1; | ||
675 | WRITE_ONCE(call->next_rx_timo, j); | ||
676 | /* Fall through */ | ||
677 | case 2: | ||
678 | j = msecs_to_jiffies(p.call.timeouts.idle); | ||
679 | if (p.call.timeouts.idle > 0 && j == 0) | ||
680 | j = 1; | ||
681 | WRITE_ONCE(call->next_req_timo, j); | ||
682 | /* Fall through */ | ||
683 | case 1: | ||
684 | if (p.call.timeouts.hard > 0) { | ||
685 | j = msecs_to_jiffies(p.call.timeouts.hard); | ||
686 | now = jiffies; | ||
687 | j += now; | ||
688 | WRITE_ONCE(call->expect_term_by, j); | ||
689 | rxrpc_reduce_call_timer(call, j, now, | ||
690 | rxrpc_timer_set_for_hard); | ||
663 | } | 691 | } |
692 | break; | ||
664 | } | 693 | } |
665 | 694 | ||
666 | state = READ_ONCE(call->state); | 695 | state = READ_ONCE(call->state); |
@@ -689,6 +718,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) | |||
689 | ret = rxrpc_send_data(rx, call, msg, len, NULL); | 718 | ret = rxrpc_send_data(rx, call, msg, len, NULL); |
690 | } | 719 | } |
691 | 720 | ||
721 | out_put_unlock: | ||
692 | mutex_unlock(&call->user_mutex); | 722 | mutex_unlock(&call->user_mutex); |
693 | error_put: | 723 | error_put: |
694 | rxrpc_put_call(call, rxrpc_call_put); | 724 | rxrpc_put_call(call, rxrpc_call_put); |
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 34c706d2f79c..4a7af7aff37d 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c | |||
@@ -21,6 +21,8 @@ static const unsigned int four = 4; | |||
21 | static const unsigned int thirtytwo = 32; | 21 | static const unsigned int thirtytwo = 32; |
22 | static const unsigned int n_65535 = 65535; | 22 | static const unsigned int n_65535 = 65535; |
23 | static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; | 23 | static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; |
24 | static const unsigned long one_jiffy = 1; | ||
25 | static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; | ||
24 | 26 | ||
25 | /* | 27 | /* |
26 | * RxRPC operating parameters. | 28 | * RxRPC operating parameters. |
@@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; | |||
29 | * information on the individual parameters. | 31 | * information on the individual parameters. |
30 | */ | 32 | */ |
31 | static struct ctl_table rxrpc_sysctl_table[] = { | 33 | static struct ctl_table rxrpc_sysctl_table[] = { |
32 | /* Values measured in milliseconds */ | 34 | /* Values measured in milliseconds but used in jiffies */ |
33 | { | 35 | { |
34 | .procname = "req_ack_delay", | 36 | .procname = "req_ack_delay", |
35 | .data = &rxrpc_requested_ack_delay, | 37 | .data = &rxrpc_requested_ack_delay, |
36 | .maxlen = sizeof(unsigned int), | 38 | .maxlen = sizeof(unsigned long), |
37 | .mode = 0644, | 39 | .mode = 0644, |
38 | .proc_handler = proc_dointvec, | 40 | .proc_handler = proc_doulongvec_ms_jiffies_minmax, |
39 | .extra1 = (void *)&zero, | 41 | .extra1 = (void *)&one_jiffy, |
42 | .extra2 = (void *)&max_jiffies, | ||
40 | }, | 43 | }, |
41 | { | 44 | { |
42 | .procname = "soft_ack_delay", | 45 | .procname = "soft_ack_delay", |
43 | .data = &rxrpc_soft_ack_delay, | 46 | .data = &rxrpc_soft_ack_delay, |
44 | .maxlen = sizeof(unsigned int), | 47 | .maxlen = sizeof(unsigned long), |
45 | .mode = 0644, | 48 | .mode = 0644, |
46 | .proc_handler = proc_dointvec, | 49 | .proc_handler = proc_doulongvec_ms_jiffies_minmax, |
47 | .extra1 = (void *)&one, | 50 | .extra1 = (void *)&one_jiffy, |
51 | .extra2 = (void *)&max_jiffies, | ||
48 | }, | 52 | }, |
49 | { | 53 | { |
50 | .procname = "idle_ack_delay", | 54 | .procname = "idle_ack_delay", |
51 | .data = &rxrpc_idle_ack_delay, | 55 | .data = &rxrpc_idle_ack_delay, |
52 | .maxlen = sizeof(unsigned int), | 56 | .maxlen = sizeof(unsigned long), |
53 | .mode = 0644, | 57 | .mode = 0644, |
54 | .proc_handler = proc_dointvec, | 58 | .proc_handler = proc_doulongvec_ms_jiffies_minmax, |
55 | .extra1 = (void *)&one, | 59 | .extra1 = (void *)&one_jiffy, |
56 | }, | 60 | .extra2 = (void *)&max_jiffies, |
57 | { | ||
58 | .procname = "resend_timeout", | ||
59 | .data = &rxrpc_resend_timeout, | ||
60 | .maxlen = sizeof(unsigned int), | ||
61 | .mode = 0644, | ||
62 | .proc_handler = proc_dointvec, | ||
63 | .extra1 = (void *)&one, | ||
64 | }, | 61 | }, |
65 | { | 62 | { |
66 | .procname = "idle_conn_expiry", | 63 | .procname = "idle_conn_expiry", |
67 | .data = &rxrpc_conn_idle_client_expiry, | 64 | .data = &rxrpc_conn_idle_client_expiry, |
68 | .maxlen = sizeof(unsigned int), | 65 | .maxlen = sizeof(unsigned long), |
69 | .mode = 0644, | 66 | .mode = 0644, |
70 | .proc_handler = proc_dointvec_ms_jiffies, | 67 | .proc_handler = proc_doulongvec_ms_jiffies_minmax, |
71 | .extra1 = (void *)&one, | 68 | .extra1 = (void *)&one_jiffy, |
69 | .extra2 = (void *)&max_jiffies, | ||
72 | }, | 70 | }, |
73 | { | 71 | { |
74 | .procname = "idle_conn_fast_expiry", | 72 | .procname = "idle_conn_fast_expiry", |
75 | .data = &rxrpc_conn_idle_client_fast_expiry, | 73 | .data = &rxrpc_conn_idle_client_fast_expiry, |
76 | .maxlen = sizeof(unsigned int), | 74 | .maxlen = sizeof(unsigned long), |
77 | .mode = 0644, | 75 | .mode = 0644, |
78 | .proc_handler = proc_dointvec_ms_jiffies, | 76 | .proc_handler = proc_doulongvec_ms_jiffies_minmax, |
79 | .extra1 = (void *)&one, | 77 | .extra1 = (void *)&one_jiffy, |
78 | .extra2 = (void *)&max_jiffies, | ||
80 | }, | 79 | }, |
81 | |||
82 | /* Values measured in seconds but used in jiffies */ | ||
83 | { | 80 | { |
84 | .procname = "max_call_lifetime", | 81 | .procname = "resend_timeout", |
85 | .data = &rxrpc_max_call_lifetime, | 82 | .data = &rxrpc_resend_timeout, |
86 | .maxlen = sizeof(unsigned int), | 83 | .maxlen = sizeof(unsigned long), |
87 | .mode = 0644, | 84 | .mode = 0644, |
88 | .proc_handler = proc_dointvec, | 85 | .proc_handler = proc_doulongvec_ms_jiffies_minmax, |
89 | .extra1 = (void *)&one, | 86 | .extra1 = (void *)&one_jiffy, |
87 | .extra2 = (void *)&max_jiffies, | ||
90 | }, | 88 | }, |
91 | 89 | ||
92 | /* Non-time values */ | 90 | /* Non-time values */ |
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e29a48ef7fc3..a0ac42b3ed06 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c | |||
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, | |||
159 | if (action == TC_ACT_SHOT) | 159 | if (action == TC_ACT_SHOT) |
160 | this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; | 160 | this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; |
161 | 161 | ||
162 | tm->lastuse = lastuse; | 162 | tm->lastuse = max_t(u64, tm->lastuse, lastuse); |
163 | } | 163 | } |
164 | 164 | ||
165 | static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, | 165 | static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, |
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c index 1e3f10e5da99..6445184b2759 100644 --- a/net/sched/act_meta_mark.c +++ b/net/sched/act_meta_mark.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <net/pkt_sched.h> | 22 | #include <net/pkt_sched.h> |
23 | #include <uapi/linux/tc_act/tc_ife.h> | 23 | #include <uapi/linux/tc_act/tc_ife.h> |
24 | #include <net/tc_act/tc_ife.h> | 24 | #include <net/tc_act/tc_ife.h> |
25 | #include <linux/rtnetlink.h> | ||
26 | 25 | ||
27 | static int skbmark_encode(struct sk_buff *skb, void *skbdata, | 26 | static int skbmark_encode(struct sk_buff *skb, void *skbdata, |
28 | struct tcf_meta_info *e) | 27 | struct tcf_meta_info *e) |
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c index 2ea1f26c9e96..7221437ca3a6 100644 --- a/net/sched/act_meta_skbtcindex.c +++ b/net/sched/act_meta_skbtcindex.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <net/pkt_sched.h> | 22 | #include <net/pkt_sched.h> |
23 | #include <uapi/linux/tc_act/tc_ife.h> | 23 | #include <uapi/linux/tc_act/tc_ife.h> |
24 | #include <net/tc_act/tc_ife.h> | 24 | #include <net/tc_act/tc_ife.h> |
25 | #include <linux/rtnetlink.h> | ||
26 | 25 | ||
27 | static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, | 26 | static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, |
28 | struct tcf_meta_info *e) | 27 | struct tcf_meta_info *e) |
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8b3e59388480..08b61849c2a2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c | |||
@@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, | |||
239 | struct tcf_t *tm = &m->tcf_tm; | 239 | struct tcf_t *tm = &m->tcf_tm; |
240 | 240 | ||
241 | _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); | 241 | _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); |
242 | tm->lastuse = lastuse; | 242 | tm->lastuse = max_t(u64, tm->lastuse, lastuse); |
243 | } | 243 | } |
244 | 244 | ||
245 | static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, | 245 | static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, |
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 8b5abcd2f32f..9438969290a6 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c | |||
@@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, | |||
96 | return ret; | 96 | return ret; |
97 | } | 97 | } |
98 | 98 | ||
99 | static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) | 99 | static void tcf_sample_cleanup(struct tc_action *a, int bind) |
100 | { | 100 | { |
101 | struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); | 101 | struct tcf_sample *s = to_sample(a); |
102 | struct psample_group *psample_group; | 102 | struct psample_group *psample_group; |
103 | 103 | ||
104 | psample_group = rcu_dereference_protected(s->psample_group, 1); | 104 | psample_group = rtnl_dereference(s->psample_group); |
105 | RCU_INIT_POINTER(s->psample_group, NULL); | 105 | RCU_INIT_POINTER(s->psample_group, NULL); |
106 | psample_group_put(psample_group); | 106 | psample_group_put(psample_group); |
107 | } | 107 | } |
108 | 108 | ||
109 | static void tcf_sample_cleanup(struct tc_action *a, int bind) | ||
110 | { | ||
111 | struct tcf_sample *s = to_sample(a); | ||
112 | |||
113 | call_rcu(&s->rcu, tcf_sample_cleanup_rcu); | ||
114 | } | ||
115 | |||
116 | static bool tcf_sample_dev_ok_push(struct net_device *dev) | 109 | static bool tcf_sample_dev_ok_push(struct net_device *dev) |
117 | { | 110 | { |
118 | switch (dev->type) { | 111 | switch (dev->type) { |
@@ -264,7 +257,6 @@ static int __init sample_init_module(void) | |||
264 | 257 | ||
265 | static void __exit sample_cleanup_module(void) | 258 | static void __exit sample_cleanup_module(void) |
266 | { | 259 | { |
267 | rcu_barrier(); | ||
268 | tcf_unregister_action(&act_sample_ops, &sample_net_ops); | 260 | tcf_unregister_action(&act_sample_ops, &sample_net_ops); |
269 | } | 261 | } |
270 | 262 | ||
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7d97f612c9b9..b9d63d2246e6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/skbuff.h> | 23 | #include <linux/skbuff.h> |
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/kmod.h> | 25 | #include <linux/kmod.h> |
26 | #include <linux/err.h> | ||
27 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
28 | #include <net/net_namespace.h> | 27 | #include <net/net_namespace.h> |
29 | #include <net/sock.h> | 28 | #include <net/sock.h> |
@@ -336,7 +335,8 @@ static void tcf_block_put_final(struct work_struct *work) | |||
336 | struct tcf_chain *chain, *tmp; | 335 | struct tcf_chain *chain, *tmp; |
337 | 336 | ||
338 | rtnl_lock(); | 337 | rtnl_lock(); |
339 | /* Only chain 0 should be still here. */ | 338 | |
339 | /* At this point, all the chains should have refcnt == 1. */ | ||
340 | list_for_each_entry_safe(chain, tmp, &block->chain_list, list) | 340 | list_for_each_entry_safe(chain, tmp, &block->chain_list, list) |
341 | tcf_chain_put(chain); | 341 | tcf_chain_put(chain); |
342 | rtnl_unlock(); | 342 | rtnl_unlock(); |
@@ -344,15 +344,23 @@ static void tcf_block_put_final(struct work_struct *work) | |||
344 | } | 344 | } |
345 | 345 | ||
346 | /* XXX: Standalone actions are not allowed to jump to any chain, and bound | 346 | /* XXX: Standalone actions are not allowed to jump to any chain, and bound |
347 | * actions should be all removed after flushing. However, filters are now | 347 | * actions should be all removed after flushing. |
348 | * destroyed in tc filter workqueue with RTNL lock, they can not race here. | ||
349 | */ | 348 | */ |
350 | void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, | 349 | void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, |
351 | struct tcf_block_ext_info *ei) | 350 | struct tcf_block_ext_info *ei) |
352 | { | 351 | { |
353 | struct tcf_chain *chain, *tmp; | 352 | struct tcf_chain *chain; |
354 | 353 | ||
355 | list_for_each_entry_safe(chain, tmp, &block->chain_list, list) | 354 | if (!block) |
355 | return; | ||
356 | /* Hold a refcnt for all chains, except 0, so that they don't disappear | ||
357 | * while we are iterating. | ||
358 | */ | ||
359 | list_for_each_entry(chain, &block->chain_list, list) | ||
360 | if (chain->index) | ||
361 | tcf_chain_hold(chain); | ||
362 | |||
363 | list_for_each_entry(chain, &block->chain_list, list) | ||
356 | tcf_chain_flush(chain); | 364 | tcf_chain_flush(chain); |
357 | 365 | ||
358 | tcf_block_offload_unbind(block, q, ei); | 366 | tcf_block_offload_unbind(block, q, ei); |
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a9f3e317055c..a62586e2dbdb 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c | |||
@@ -42,7 +42,6 @@ struct cls_bpf_prog { | |||
42 | struct list_head link; | 42 | struct list_head link; |
43 | struct tcf_result res; | 43 | struct tcf_result res; |
44 | bool exts_integrated; | 44 | bool exts_integrated; |
45 | bool offloaded; | ||
46 | u32 gen_flags; | 45 | u32 gen_flags; |
47 | struct tcf_exts exts; | 46 | struct tcf_exts exts; |
48 | u32 handle; | 47 | u32 handle; |
@@ -148,73 +147,63 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) | |||
148 | } | 147 | } |
149 | 148 | ||
150 | static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, | 149 | static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, |
151 | enum tc_clsbpf_command cmd) | 150 | struct cls_bpf_prog *oldprog) |
152 | { | 151 | { |
153 | bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE; | ||
154 | struct tcf_block *block = tp->chain->block; | 152 | struct tcf_block *block = tp->chain->block; |
155 | bool skip_sw = tc_skip_sw(prog->gen_flags); | ||
156 | struct tc_cls_bpf_offload cls_bpf = {}; | 153 | struct tc_cls_bpf_offload cls_bpf = {}; |
154 | struct cls_bpf_prog *obj; | ||
155 | bool skip_sw; | ||
157 | int err; | 156 | int err; |
158 | 157 | ||
158 | skip_sw = prog && tc_skip_sw(prog->gen_flags); | ||
159 | obj = prog ?: oldprog; | ||
160 | |||
159 | tc_cls_common_offload_init(&cls_bpf.common, tp); | 161 | tc_cls_common_offload_init(&cls_bpf.common, tp); |
160 | cls_bpf.command = cmd; | 162 | cls_bpf.command = TC_CLSBPF_OFFLOAD; |
161 | cls_bpf.exts = &prog->exts; | 163 | cls_bpf.exts = &obj->exts; |
162 | cls_bpf.prog = prog->filter; | 164 | cls_bpf.prog = prog ? prog->filter : NULL; |
163 | cls_bpf.name = prog->bpf_name; | 165 | cls_bpf.oldprog = oldprog ? oldprog->filter : NULL; |
164 | cls_bpf.exts_integrated = prog->exts_integrated; | 166 | cls_bpf.name = obj->bpf_name; |
165 | cls_bpf.gen_flags = prog->gen_flags; | 167 | cls_bpf.exts_integrated = obj->exts_integrated; |
168 | cls_bpf.gen_flags = obj->gen_flags; | ||
166 | 169 | ||
167 | err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); | 170 | err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); |
168 | if (addorrep) { | 171 | if (prog) { |
169 | if (err < 0) { | 172 | if (err < 0) { |
170 | cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); | 173 | cls_bpf_offload_cmd(tp, oldprog, prog); |
171 | return err; | 174 | return err; |
172 | } else if (err > 0) { | 175 | } else if (err > 0) { |
173 | prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; | 176 | prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; |
174 | } | 177 | } |
175 | } | 178 | } |
176 | 179 | ||
177 | if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) | 180 | if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) |
178 | return -EINVAL; | 181 | return -EINVAL; |
179 | 182 | ||
180 | return 0; | 183 | return 0; |
181 | } | 184 | } |
182 | 185 | ||
186 | static u32 cls_bpf_flags(u32 flags) | ||
187 | { | ||
188 | return flags & CLS_BPF_SUPPORTED_GEN_FLAGS; | ||
189 | } | ||
190 | |||
183 | static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, | 191 | static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, |
184 | struct cls_bpf_prog *oldprog) | 192 | struct cls_bpf_prog *oldprog) |
185 | { | 193 | { |
186 | struct cls_bpf_prog *obj = prog; | 194 | if (prog && oldprog && |
187 | enum tc_clsbpf_command cmd; | 195 | cls_bpf_flags(prog->gen_flags) != |
188 | bool skip_sw; | 196 | cls_bpf_flags(oldprog->gen_flags)) |
189 | int ret; | 197 | return -EINVAL; |
190 | |||
191 | skip_sw = tc_skip_sw(prog->gen_flags) || | ||
192 | (oldprog && tc_skip_sw(oldprog->gen_flags)); | ||
193 | |||
194 | if (oldprog && oldprog->offloaded) { | ||
195 | if (!tc_skip_hw(prog->gen_flags)) { | ||
196 | cmd = TC_CLSBPF_REPLACE; | ||
197 | } else if (!tc_skip_sw(prog->gen_flags)) { | ||
198 | obj = oldprog; | ||
199 | cmd = TC_CLSBPF_DESTROY; | ||
200 | } else { | ||
201 | return -EINVAL; | ||
202 | } | ||
203 | } else { | ||
204 | if (tc_skip_hw(prog->gen_flags)) | ||
205 | return skip_sw ? -EINVAL : 0; | ||
206 | cmd = TC_CLSBPF_ADD; | ||
207 | } | ||
208 | |||
209 | ret = cls_bpf_offload_cmd(tp, obj, cmd); | ||
210 | if (ret) | ||
211 | return ret; | ||
212 | 198 | ||
213 | obj->offloaded = true; | 199 | if (prog && tc_skip_hw(prog->gen_flags)) |
214 | if (oldprog) | 200 | prog = NULL; |
215 | oldprog->offloaded = false; | 201 | if (oldprog && tc_skip_hw(oldprog->gen_flags)) |
202 | oldprog = NULL; | ||
203 | if (!prog && !oldprog) | ||
204 | return 0; | ||
216 | 205 | ||
217 | return 0; | 206 | return cls_bpf_offload_cmd(tp, prog, oldprog); |
218 | } | 207 | } |
219 | 208 | ||
220 | static void cls_bpf_stop_offload(struct tcf_proto *tp, | 209 | static void cls_bpf_stop_offload(struct tcf_proto *tp, |
@@ -222,25 +211,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp, | |||
222 | { | 211 | { |
223 | int err; | 212 | int err; |
224 | 213 | ||
225 | if (!prog->offloaded) | 214 | err = cls_bpf_offload_cmd(tp, NULL, prog); |
226 | return; | 215 | if (err) |
227 | |||
228 | err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); | ||
229 | if (err) { | ||
230 | pr_err("Stopping hardware offload failed: %d\n", err); | 216 | pr_err("Stopping hardware offload failed: %d\n", err); |
231 | return; | ||
232 | } | ||
233 | |||
234 | prog->offloaded = false; | ||
235 | } | 217 | } |
236 | 218 | ||
237 | static void cls_bpf_offload_update_stats(struct tcf_proto *tp, | 219 | static void cls_bpf_offload_update_stats(struct tcf_proto *tp, |
238 | struct cls_bpf_prog *prog) | 220 | struct cls_bpf_prog *prog) |
239 | { | 221 | { |
240 | if (!prog->offloaded) | 222 | struct tcf_block *block = tp->chain->block; |
241 | return; | 223 | struct tc_cls_bpf_offload cls_bpf = {}; |
242 | 224 | ||
243 | cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); | 225 | tc_cls_common_offload_init(&cls_bpf.common, tp); |
226 | cls_bpf.command = TC_CLSBPF_STATS; | ||
227 | cls_bpf.exts = &prog->exts; | ||
228 | cls_bpf.prog = prog->filter; | ||
229 | cls_bpf.name = prog->bpf_name; | ||
230 | cls_bpf.exts_integrated = prog->exts_integrated; | ||
231 | cls_bpf.gen_flags = prog->gen_flags; | ||
232 | |||
233 | tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); | ||
244 | } | 234 | } |
245 | 235 | ||
246 | static int cls_bpf_init(struct tcf_proto *tp) | 236 | static int cls_bpf_init(struct tcf_proto *tp) |
@@ -258,11 +248,8 @@ static int cls_bpf_init(struct tcf_proto *tp) | |||
258 | return 0; | 248 | return 0; |
259 | } | 249 | } |
260 | 250 | ||
261 | static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) | 251 | static void cls_bpf_free_parms(struct cls_bpf_prog *prog) |
262 | { | 252 | { |
263 | tcf_exts_destroy(&prog->exts); | ||
264 | tcf_exts_put_net(&prog->exts); | ||
265 | |||
266 | if (cls_bpf_is_ebpf(prog)) | 253 | if (cls_bpf_is_ebpf(prog)) |
267 | bpf_prog_put(prog->filter); | 254 | bpf_prog_put(prog->filter); |
268 | else | 255 | else |
@@ -270,6 +257,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) | |||
270 | 257 | ||
271 | kfree(prog->bpf_name); | 258 | kfree(prog->bpf_name); |
272 | kfree(prog->bpf_ops); | 259 | kfree(prog->bpf_ops); |
260 | } | ||
261 | |||
262 | static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) | ||
263 | { | ||
264 | tcf_exts_destroy(&prog->exts); | ||
265 | tcf_exts_put_net(&prog->exts); | ||
266 | |||
267 | cls_bpf_free_parms(prog); | ||
273 | kfree(prog); | 268 | kfree(prog); |
274 | } | 269 | } |
275 | 270 | ||
@@ -514,12 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, | |||
514 | goto errout_idr; | 509 | goto errout_idr; |
515 | 510 | ||
516 | ret = cls_bpf_offload(tp, prog, oldprog); | 511 | ret = cls_bpf_offload(tp, prog, oldprog); |
517 | if (ret) { | 512 | if (ret) |
518 | if (!oldprog) | 513 | goto errout_parms; |
519 | idr_remove_ext(&head->handle_idr, prog->handle); | ||
520 | __cls_bpf_delete_prog(prog); | ||
521 | return ret; | ||
522 | } | ||
523 | 514 | ||
524 | if (!tc_in_hw(prog->gen_flags)) | 515 | if (!tc_in_hw(prog->gen_flags)) |
525 | prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; | 516 | prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; |
@@ -537,6 +528,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, | |||
537 | *arg = prog; | 528 | *arg = prog; |
538 | return 0; | 529 | return 0; |
539 | 530 | ||
531 | errout_parms: | ||
532 | cls_bpf_free_parms(prog); | ||
540 | errout_idr: | 533 | errout_idr: |
541 | if (!oldprog) | 534 | if (!oldprog) |
542 | idr_remove_ext(&head->handle_idr, prog->handle); | 535 | idr_remove_ext(&head->handle_idr, prog->handle); |
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ac152b4f4247..507859cdd1cb 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c | |||
@@ -45,7 +45,6 @@ | |||
45 | #include <net/netlink.h> | 45 | #include <net/netlink.h> |
46 | #include <net/act_api.h> | 46 | #include <net/act_api.h> |
47 | #include <net/pkt_cls.h> | 47 | #include <net/pkt_cls.h> |
48 | #include <linux/netdevice.h> | ||
49 | #include <linux/idr.h> | 48 | #include <linux/idr.h> |
50 | 49 | ||
51 | struct tc_u_knode { | 50 | struct tc_u_knode { |
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index df3110d69585..07c10bac06a0 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c | |||
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em, | |||
51 | if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) | 51 | if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) |
52 | return 0; | 52 | return 0; |
53 | 53 | ||
54 | return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len); | 54 | return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len); |
55 | } | 55 | } |
56 | 56 | ||
57 | static struct tcf_ematch_ops em_nbyte_ops = { | 57 | static struct tcf_ematch_ops em_nbyte_ops = { |
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b6c4f536876b..52529b7f8d96 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
795 | tcm->tcm_info = refcount_read(&q->refcnt); | 795 | tcm->tcm_info = refcount_read(&q->refcnt); |
796 | if (nla_put_string(skb, TCA_KIND, q->ops->id)) | 796 | if (nla_put_string(skb, TCA_KIND, q->ops->id)) |
797 | goto nla_put_failure; | 797 | goto nla_put_failure; |
798 | if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) | ||
799 | goto nla_put_failure; | ||
798 | if (q->ops->dump && q->ops->dump(q, skb) < 0) | 800 | if (q->ops->dump && q->ops->dump(q, skb) < 0) |
799 | goto nla_put_failure; | 801 | goto nla_put_failure; |
800 | qlen = q->q.qlen; | 802 | qlen = q->q.qlen; |
@@ -1061,17 +1063,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev, | |||
1061 | } | 1063 | } |
1062 | 1064 | ||
1063 | if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { | 1065 | if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { |
1064 | if (qdisc_is_percpu_stats(sch)) { | ||
1065 | sch->cpu_bstats = | ||
1066 | netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); | ||
1067 | if (!sch->cpu_bstats) | ||
1068 | goto err_out4; | ||
1069 | |||
1070 | sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); | ||
1071 | if (!sch->cpu_qstats) | ||
1072 | goto err_out4; | ||
1073 | } | ||
1074 | |||
1075 | if (tca[TCA_STAB]) { | 1066 | if (tca[TCA_STAB]) { |
1076 | stab = qdisc_get_stab(tca[TCA_STAB]); | 1067 | stab = qdisc_get_stab(tca[TCA_STAB]); |
1077 | if (IS_ERR(stab)) { | 1068 | if (IS_ERR(stab)) { |
@@ -1113,7 +1104,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, | |||
1113 | ops->destroy(sch); | 1104 | ops->destroy(sch); |
1114 | err_out3: | 1105 | err_out3: |
1115 | dev_put(dev); | 1106 | dev_put(dev); |
1116 | kfree((char *) sch - sch->padded); | 1107 | qdisc_free(sch); |
1117 | err_out2: | 1108 | err_out2: |
1118 | module_put(ops->owner); | 1109 | module_put(ops->owner); |
1119 | err_out: | 1110 | err_out: |
@@ -1121,8 +1112,6 @@ err_out: | |||
1121 | return NULL; | 1112 | return NULL; |
1122 | 1113 | ||
1123 | err_out4: | 1114 | err_out4: |
1124 | free_percpu(sch->cpu_bstats); | ||
1125 | free_percpu(sch->cpu_qstats); | ||
1126 | /* | 1115 | /* |
1127 | * Any broken qdiscs that would require a ops->reset() here? | 1116 | * Any broken qdiscs that would require a ops->reset() here? |
1128 | * The qdisc was never in action so it shouldn't be necessary. | 1117 | * The qdisc was never in action so it shouldn't be necessary. |
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6361be7881f1..525eb3a6d625 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c | |||
@@ -1158,9 +1158,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) | |||
1158 | if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) | 1158 | if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) |
1159 | return -EINVAL; | 1159 | return -EINVAL; |
1160 | 1160 | ||
1161 | err = tcf_block_get(&q->link.block, &q->link.filter_list, sch); | ||
1162 | if (err) | ||
1163 | goto put_rtab; | ||
1164 | |||
1161 | err = qdisc_class_hash_init(&q->clhash); | 1165 | err = qdisc_class_hash_init(&q->clhash); |
1162 | if (err < 0) | 1166 | if (err < 0) |
1163 | goto put_rtab; | 1167 | goto put_block; |
1164 | 1168 | ||
1165 | q->link.sibling = &q->link; | 1169 | q->link.sibling = &q->link; |
1166 | q->link.common.classid = sch->handle; | 1170 | q->link.common.classid = sch->handle; |
@@ -1194,6 +1198,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) | |||
1194 | cbq_addprio(q, &q->link); | 1198 | cbq_addprio(q, &q->link); |
1195 | return 0; | 1199 | return 0; |
1196 | 1200 | ||
1201 | put_block: | ||
1202 | tcf_block_put(q->link.block); | ||
1203 | |||
1197 | put_rtab: | 1204 | put_rtab: |
1198 | qdisc_put_rtab(q->link.R_tab); | 1205 | qdisc_put_rtab(q->link.R_tab); |
1199 | return err; | 1206 | return err; |
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index b30a2c70bd48..531250fceb9e 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c | |||
@@ -369,6 +369,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) | |||
369 | 369 | ||
370 | ctl = nla_data(tb[TCA_CHOKE_PARMS]); | 370 | ctl = nla_data(tb[TCA_CHOKE_PARMS]); |
371 | 371 | ||
372 | if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) | ||
373 | return -EINVAL; | ||
374 | |||
372 | if (ctl->limit > CHOKE_MAX_QUEUE) | 375 | if (ctl->limit > CHOKE_MAX_QUEUE) |
373 | return -EINVAL; | 376 | return -EINVAL; |
374 | 377 | ||
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3839cbbdc32b..cac003fddf3e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/list.h> | 26 | #include <linux/list.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/if_vlan.h> | 28 | #include <linux/if_vlan.h> |
29 | #include <linux/if_macvlan.h> | ||
29 | #include <net/sch_generic.h> | 30 | #include <net/sch_generic.h> |
30 | #include <net/pkt_sched.h> | 31 | #include <net/pkt_sched.h> |
31 | #include <net/dst.h> | 32 | #include <net/dst.h> |
@@ -277,6 +278,8 @@ unsigned long dev_trans_start(struct net_device *dev) | |||
277 | 278 | ||
278 | if (is_vlan_dev(dev)) | 279 | if (is_vlan_dev(dev)) |
279 | dev = vlan_dev_real_dev(dev); | 280 | dev = vlan_dev_real_dev(dev); |
281 | else if (netif_is_macvlan(dev)) | ||
282 | dev = macvlan_dev_real_dev(dev); | ||
280 | res = netdev_get_tx_queue(dev, 0)->trans_start; | 283 | res = netdev_get_tx_queue(dev, 0)->trans_start; |
281 | for (i = 1; i < dev->num_tx_queues; i++) { | 284 | for (i = 1; i < dev->num_tx_queues; i++) { |
282 | val = netdev_get_tx_queue(dev, i)->trans_start; | 285 | val = netdev_get_tx_queue(dev, i)->trans_start; |
@@ -630,6 +633,19 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
630 | qdisc_skb_head_init(&sch->q); | 633 | qdisc_skb_head_init(&sch->q); |
631 | spin_lock_init(&sch->q.lock); | 634 | spin_lock_init(&sch->q.lock); |
632 | 635 | ||
636 | if (ops->static_flags & TCQ_F_CPUSTATS) { | ||
637 | sch->cpu_bstats = | ||
638 | netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); | ||
639 | if (!sch->cpu_bstats) | ||
640 | goto errout1; | ||
641 | |||
642 | sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); | ||
643 | if (!sch->cpu_qstats) { | ||
644 | free_percpu(sch->cpu_bstats); | ||
645 | goto errout1; | ||
646 | } | ||
647 | } | ||
648 | |||
633 | spin_lock_init(&sch->busylock); | 649 | spin_lock_init(&sch->busylock); |
634 | lockdep_set_class(&sch->busylock, | 650 | lockdep_set_class(&sch->busylock, |
635 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); | 651 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); |
@@ -639,6 +655,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
639 | dev->qdisc_running_key ?: &qdisc_running_key); | 655 | dev->qdisc_running_key ?: &qdisc_running_key); |
640 | 656 | ||
641 | sch->ops = ops; | 657 | sch->ops = ops; |
658 | sch->flags = ops->static_flags; | ||
642 | sch->enqueue = ops->enqueue; | 659 | sch->enqueue = ops->enqueue; |
643 | sch->dequeue = ops->dequeue; | 660 | sch->dequeue = ops->dequeue; |
644 | sch->dev_queue = dev_queue; | 661 | sch->dev_queue = dev_queue; |
@@ -646,6 +663,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
646 | refcount_set(&sch->refcnt, 1); | 663 | refcount_set(&sch->refcnt, 1); |
647 | 664 | ||
648 | return sch; | 665 | return sch; |
666 | errout1: | ||
667 | kfree(p); | ||
649 | errout: | 668 | errout: |
650 | return ERR_PTR(err); | 669 | return ERR_PTR(err); |
651 | } | 670 | } |
@@ -695,7 +714,7 @@ void qdisc_reset(struct Qdisc *qdisc) | |||
695 | } | 714 | } |
696 | EXPORT_SYMBOL(qdisc_reset); | 715 | EXPORT_SYMBOL(qdisc_reset); |
697 | 716 | ||
698 | static void qdisc_free(struct Qdisc *qdisc) | 717 | void qdisc_free(struct Qdisc *qdisc) |
699 | { | 718 | { |
700 | if (qdisc_is_percpu_stats(qdisc)) { | 719 | if (qdisc_is_percpu_stats(qdisc)) { |
701 | free_percpu(qdisc->cpu_bstats); | 720 | free_percpu(qdisc->cpu_bstats); |
@@ -1037,6 +1056,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, | |||
1037 | 1056 | ||
1038 | if (!tp_head) { | 1057 | if (!tp_head) { |
1039 | RCU_INIT_POINTER(*miniqp->p_miniq, NULL); | 1058 | RCU_INIT_POINTER(*miniqp->p_miniq, NULL); |
1059 | /* Wait for flying RCU callback before it is freed. */ | ||
1060 | rcu_barrier_bh(); | ||
1040 | return; | 1061 | return; |
1041 | } | 1062 | } |
1042 | 1063 | ||
@@ -1052,7 +1073,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, | |||
1052 | rcu_assign_pointer(*miniqp->p_miniq, miniq); | 1073 | rcu_assign_pointer(*miniqp->p_miniq, miniq); |
1053 | 1074 | ||
1054 | if (miniq_old) | 1075 | if (miniq_old) |
1055 | /* This is counterpart of the rcu barrier above. We need to | 1076 | /* This is counterpart of the rcu barriers above. We need to |
1056 | * block potential new user of miniq_old until all readers | 1077 | * block potential new user of miniq_old until all readers |
1057 | * are not seeing it. | 1078 | * are not seeing it. |
1058 | */ | 1079 | */ |
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 17c7130454bd..bc30f9186ac6 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c | |||
@@ -356,6 +356,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, | |||
356 | struct gred_sched *table = qdisc_priv(sch); | 356 | struct gred_sched *table = qdisc_priv(sch); |
357 | struct gred_sched_data *q = table->tab[dp]; | 357 | struct gred_sched_data *q = table->tab[dp]; |
358 | 358 | ||
359 | if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) | ||
360 | return -EINVAL; | ||
361 | |||
359 | if (!q) { | 362 | if (!q) { |
360 | table->tab[dp] = q = *prealloc; | 363 | table->tab[dp] = q = *prealloc; |
361 | *prealloc = NULL; | 364 | *prealloc = NULL; |
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 5ecc38f35d47..003e1b063447 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c | |||
@@ -66,7 +66,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) | |||
66 | { | 66 | { |
67 | struct ingress_sched_data *q = qdisc_priv(sch); | 67 | struct ingress_sched_data *q = qdisc_priv(sch); |
68 | struct net_device *dev = qdisc_dev(sch); | 68 | struct net_device *dev = qdisc_dev(sch); |
69 | int err; | 69 | |
70 | net_inc_ingress_queue(); | ||
70 | 71 | ||
71 | mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); | 72 | mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); |
72 | 73 | ||
@@ -74,14 +75,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) | |||
74 | q->block_info.chain_head_change = clsact_chain_head_change; | 75 | q->block_info.chain_head_change = clsact_chain_head_change; |
75 | q->block_info.chain_head_change_priv = &q->miniqp; | 76 | q->block_info.chain_head_change_priv = &q->miniqp; |
76 | 77 | ||
77 | err = tcf_block_get_ext(&q->block, sch, &q->block_info); | 78 | return tcf_block_get_ext(&q->block, sch, &q->block_info); |
78 | if (err) | ||
79 | return err; | ||
80 | |||
81 | net_inc_ingress_queue(); | ||
82 | sch->flags |= TCQ_F_CPUSTATS; | ||
83 | |||
84 | return 0; | ||
85 | } | 79 | } |
86 | 80 | ||
87 | static void ingress_destroy(struct Qdisc *sch) | 81 | static void ingress_destroy(struct Qdisc *sch) |
@@ -120,6 +114,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { | |||
120 | .cl_ops = &ingress_class_ops, | 114 | .cl_ops = &ingress_class_ops, |
121 | .id = "ingress", | 115 | .id = "ingress", |
122 | .priv_size = sizeof(struct ingress_sched_data), | 116 | .priv_size = sizeof(struct ingress_sched_data), |
117 | .static_flags = TCQ_F_CPUSTATS, | ||
123 | .init = ingress_init, | 118 | .init = ingress_init, |
124 | .destroy = ingress_destroy, | 119 | .destroy = ingress_destroy, |
125 | .dump = ingress_dump, | 120 | .dump = ingress_dump, |
@@ -172,6 +167,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) | |||
172 | struct net_device *dev = qdisc_dev(sch); | 167 | struct net_device *dev = qdisc_dev(sch); |
173 | int err; | 168 | int err; |
174 | 169 | ||
170 | net_inc_ingress_queue(); | ||
171 | net_inc_egress_queue(); | ||
172 | |||
175 | mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); | 173 | mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); |
176 | 174 | ||
177 | q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; | 175 | q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; |
@@ -188,20 +186,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) | |||
188 | q->egress_block_info.chain_head_change = clsact_chain_head_change; | 186 | q->egress_block_info.chain_head_change = clsact_chain_head_change; |
189 | q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; | 187 | q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; |
190 | 188 | ||
191 | err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); | 189 | return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); |
192 | if (err) | ||
193 | goto err_egress_block_get; | ||
194 | |||
195 | net_inc_ingress_queue(); | ||
196 | net_inc_egress_queue(); | ||
197 | |||
198 | sch->flags |= TCQ_F_CPUSTATS; | ||
199 | |||
200 | return 0; | ||
201 | |||
202 | err_egress_block_get: | ||
203 | tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); | ||
204 | return err; | ||
205 | } | 190 | } |
206 | 191 | ||
207 | static void clsact_destroy(struct Qdisc *sch) | 192 | static void clsact_destroy(struct Qdisc *sch) |
@@ -228,6 +213,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { | |||
228 | .cl_ops = &clsact_class_ops, | 213 | .cl_ops = &clsact_class_ops, |
229 | .id = "clsact", | 214 | .id = "clsact", |
230 | .priv_size = sizeof(struct clsact_sched_data), | 215 | .priv_size = sizeof(struct clsact_sched_data), |
216 | .static_flags = TCQ_F_CPUSTATS, | ||
231 | .init = clsact_init, | 217 | .init = clsact_init, |
232 | .destroy = clsact_destroy, | 218 | .destroy = clsact_destroy, |
233 | .dump = ingress_dump, | 219 | .dump = ingress_dump, |
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7f8ea9e297c3..f0747eb87dc4 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c | |||
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable) | |||
157 | .handle = sch->handle, | 157 | .handle = sch->handle, |
158 | .parent = sch->parent, | 158 | .parent = sch->parent, |
159 | }; | 159 | }; |
160 | int err; | ||
160 | 161 | ||
161 | if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) | 162 | if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) |
162 | return -EOPNOTSUPP; | 163 | return -EOPNOTSUPP; |
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable) | |||
171 | opt.command = TC_RED_DESTROY; | 172 | opt.command = TC_RED_DESTROY; |
172 | } | 173 | } |
173 | 174 | ||
174 | return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); | 175 | err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); |
176 | |||
177 | if (!err && enable) | ||
178 | sch->flags |= TCQ_F_OFFLOADED; | ||
179 | else | ||
180 | sch->flags &= ~TCQ_F_OFFLOADED; | ||
181 | |||
182 | return err; | ||
175 | } | 183 | } |
176 | 184 | ||
177 | static void red_destroy(struct Qdisc *sch) | 185 | static void red_destroy(struct Qdisc *sch) |
@@ -212,6 +220,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) | |||
212 | max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; | 220 | max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; |
213 | 221 | ||
214 | ctl = nla_data(tb[TCA_RED_PARMS]); | 222 | ctl = nla_data(tb[TCA_RED_PARMS]); |
223 | if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) | ||
224 | return -EINVAL; | ||
215 | 225 | ||
216 | if (ctl->limit > 0) { | 226 | if (ctl->limit > 0) { |
217 | child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); | 227 | child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); |
@@ -272,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt) | |||
272 | return red_change(sch, opt); | 282 | return red_change(sch, opt); |
273 | } | 283 | } |
274 | 284 | ||
275 | static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) | 285 | static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) |
276 | { | 286 | { |
277 | struct net_device *dev = qdisc_dev(sch); | 287 | struct net_device *dev = qdisc_dev(sch); |
278 | struct tc_red_qopt_offload hw_stats = { | 288 | struct tc_red_qopt_offload hw_stats = { |
@@ -284,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) | |||
284 | .stats.qstats = &sch->qstats, | 294 | .stats.qstats = &sch->qstats, |
285 | }, | 295 | }, |
286 | }; | 296 | }; |
287 | int err; | ||
288 | 297 | ||
289 | opt->flags &= ~TC_RED_OFFLOADED; | 298 | if (!(sch->flags & TCQ_F_OFFLOADED)) |
290 | if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) | ||
291 | return 0; | ||
292 | |||
293 | err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, | ||
294 | &hw_stats); | ||
295 | if (err == -EOPNOTSUPP) | ||
296 | return 0; | 299 | return 0; |
297 | 300 | ||
298 | if (!err) | 301 | return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, |
299 | opt->flags |= TC_RED_OFFLOADED; | 302 | &hw_stats); |
300 | |||
301 | return err; | ||
302 | } | 303 | } |
303 | 304 | ||
304 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) | 305 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) |
@@ -317,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
317 | int err; | 318 | int err; |
318 | 319 | ||
319 | sch->qstats.backlog = q->qdisc->qstats.backlog; | 320 | sch->qstats.backlog = q->qdisc->qstats.backlog; |
320 | err = red_dump_offload(sch, &opt); | 321 | err = red_dump_offload_stats(sch, &opt); |
321 | if (err) | 322 | if (err) |
322 | goto nla_put_failure; | 323 | goto nla_put_failure; |
323 | 324 | ||
@@ -345,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | |||
345 | .marked = q->stats.prob_mark + q->stats.forced_mark, | 346 | .marked = q->stats.prob_mark + q->stats.forced_mark, |
346 | }; | 347 | }; |
347 | 348 | ||
348 | if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { | 349 | if (sch->flags & TCQ_F_OFFLOADED) { |
349 | struct red_stats hw_stats = {0}; | 350 | struct red_stats hw_stats = {0}; |
350 | struct tc_red_qopt_offload hw_stats_request = { | 351 | struct tc_red_qopt_offload hw_stats_request = { |
351 | .command = TC_RED_XSTATS, | 352 | .command = TC_RED_XSTATS, |
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 890f4a4564e7..930e5bd26d3d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c | |||
@@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
639 | if (ctl->divisor && | 639 | if (ctl->divisor && |
640 | (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) | 640 | (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) |
641 | return -EINVAL; | 641 | return -EINVAL; |
642 | if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, | ||
643 | ctl_v1->Wlog)) | ||
644 | return -EINVAL; | ||
642 | if (ctl_v1 && ctl_v1->qth_min) { | 645 | if (ctl_v1 && ctl_v1->qth_min) { |
643 | p = kmalloc(sizeof(*p), GFP_KERNEL); | 646 | p = kmalloc(sizeof(*p), GFP_KERNEL); |
644 | if (!p) | 647 | if (!p) |
@@ -724,6 +727,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | |||
724 | int i; | 727 | int i; |
725 | int err; | 728 | int err; |
726 | 729 | ||
730 | q->sch = sch; | ||
727 | timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); | 731 | timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); |
728 | 732 | ||
729 | err = tcf_block_get(&q->block, &q->filter_list, sch); | 733 | err = tcf_block_get(&q->block, &q->filter_list, sch); |
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7b261afc47b9..7f8baa48e7c2 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c | |||
@@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) | |||
53 | msg->send_failed = 0; | 53 | msg->send_failed = 0; |
54 | msg->send_error = 0; | 54 | msg->send_error = 0; |
55 | msg->can_delay = 1; | 55 | msg->can_delay = 1; |
56 | msg->abandoned = 0; | ||
56 | msg->expires_at = 0; | 57 | msg->expires_at = 0; |
57 | INIT_LIST_HEAD(&msg->chunks); | 58 | INIT_LIST_HEAD(&msg->chunks); |
58 | } | 59 | } |
@@ -304,6 +305,13 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) | |||
304 | if (!chunk->asoc->peer.prsctp_capable) | 305 | if (!chunk->asoc->peer.prsctp_capable) |
305 | return 0; | 306 | return 0; |
306 | 307 | ||
308 | if (chunk->msg->abandoned) | ||
309 | return 1; | ||
310 | |||
311 | if (!chunk->has_tsn && | ||
312 | !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG)) | ||
313 | return 0; | ||
314 | |||
307 | if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && | 315 | if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && |
308 | time_after(jiffies, chunk->msg->expires_at)) { | 316 | time_after(jiffies, chunk->msg->expires_at)) { |
309 | struct sctp_stream_out *streamout = | 317 | struct sctp_stream_out *streamout = |
@@ -316,6 +324,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) | |||
316 | chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; | 324 | chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; |
317 | streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; | 325 | streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; |
318 | } | 326 | } |
327 | chunk->msg->abandoned = 1; | ||
319 | return 1; | 328 | return 1; |
320 | } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && | 329 | } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && |
321 | chunk->sent_count > chunk->sinfo.sinfo_timetolive) { | 330 | chunk->sent_count > chunk->sinfo.sinfo_timetolive) { |
@@ -324,10 +333,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) | |||
324 | 333 | ||
325 | chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; | 334 | chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; |
326 | streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; | 335 | streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; |
336 | chunk->msg->abandoned = 1; | ||
327 | return 1; | 337 | return 1; |
328 | } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && | 338 | } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && |
329 | chunk->msg->expires_at && | 339 | chunk->msg->expires_at && |
330 | time_after(jiffies, chunk->msg->expires_at)) { | 340 | time_after(jiffies, chunk->msg->expires_at)) { |
341 | chunk->msg->abandoned = 1; | ||
331 | return 1; | 342 | return 1; |
332 | } | 343 | } |
333 | /* PRIO policy is processed by sendmsg, not here */ | 344 | /* PRIO policy is processed by sendmsg, not here */ |
diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 3f619fdcbf0a..291c97b07058 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c | |||
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid) | |||
78 | case SCTP_CID_AUTH: | 78 | case SCTP_CID_AUTH: |
79 | return "AUTH"; | 79 | return "AUTH"; |
80 | 80 | ||
81 | case SCTP_CID_RECONF: | ||
82 | return "RECONF"; | ||
83 | |||
81 | default: | 84 | default: |
82 | break; | 85 | break; |
83 | } | 86 | } |
diff --git a/net/sctp/input.c b/net/sctp/input.c index 621b5ca3fd1c..141c9c466ec1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c | |||
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, | |||
399 | return; | 399 | return; |
400 | } | 400 | } |
401 | 401 | ||
402 | if (t->param_flags & SPP_PMTUD_ENABLE) { | 402 | if (!(t->param_flags & SPP_PMTUD_ENABLE)) |
403 | /* Update transports view of the MTU */ | 403 | /* We can't allow retransmitting in such case, as the |
404 | sctp_transport_update_pmtu(t, pmtu); | 404 | * retransmission would be sized just as before, and thus we |
405 | 405 | * would get another icmp, and retransmit again. | |
406 | /* Update association pmtu. */ | 406 | */ |
407 | sctp_assoc_sync_pmtu(asoc); | 407 | return; |
408 | } | ||
409 | 408 | ||
410 | /* Retransmit with the new pmtu setting. | 409 | /* Update transports view of the MTU. Return if no update was needed. |
411 | * Normally, if PMTU discovery is disabled, an ICMP Fragmentation | 410 | * If an update wasn't needed/possible, it also doesn't make sense to |
412 | * Needed will never be sent, but if a message was sent before | 411 | * try to retransmit now. |
413 | * PMTU discovery was disabled that was larger than the PMTU, it | ||
414 | * would not be fragmented, so it must be re-transmitted fragmented. | ||
415 | */ | 412 | */ |
413 | if (!sctp_transport_update_pmtu(t, pmtu)) | ||
414 | return; | ||
415 | |||
416 | /* Update association pmtu. */ | ||
417 | sctp_assoc_sync_pmtu(asoc); | ||
418 | |||
419 | /* Retransmit with the new pmtu setting. */ | ||
416 | sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); | 420 | sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); |
417 | } | 421 | } |
418 | 422 | ||
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 3b18085e3b10..5d4c15bf66d2 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c | |||
@@ -826,6 +826,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) | |||
826 | case AF_INET: | 826 | case AF_INET: |
827 | if (!__ipv6_only_sock(sctp_opt2sk(sp))) | 827 | if (!__ipv6_only_sock(sctp_opt2sk(sp))) |
828 | return 1; | 828 | return 1; |
829 | /* fallthru */ | ||
829 | default: | 830 | default: |
830 | return 0; | 831 | return 0; |
831 | } | 832 | } |
diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 275925b93b29..35bc7106d182 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c | |||
@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, | |||
45 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 45 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
46 | struct sctphdr *sh; | 46 | struct sctphdr *sh; |
47 | 47 | ||
48 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) | ||
49 | goto out; | ||
50 | |||
48 | sh = sctp_hdr(skb); | 51 | sh = sctp_hdr(skb); |
49 | if (!pskb_may_pull(skb, sizeof(*sh))) | 52 | if (!pskb_may_pull(skb, sizeof(*sh))) |
50 | goto out; | 53 | goto out; |
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4db012aa25f7..c4ec99b20150 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c | |||
@@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, | |||
364 | list_for_each_entry_safe(chk, temp, queue, transmitted_list) { | 364 | list_for_each_entry_safe(chk, temp, queue, transmitted_list) { |
365 | struct sctp_stream_out *streamout; | 365 | struct sctp_stream_out *streamout; |
366 | 366 | ||
367 | if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || | 367 | if (!chk->msg->abandoned && |
368 | chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) | 368 | (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || |
369 | chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) | ||
369 | continue; | 370 | continue; |
370 | 371 | ||
372 | chk->msg->abandoned = 1; | ||
371 | list_del_init(&chk->transmitted_list); | 373 | list_del_init(&chk->transmitted_list); |
372 | sctp_insert_list(&asoc->outqueue.abandoned, | 374 | sctp_insert_list(&asoc->outqueue.abandoned, |
373 | &chk->transmitted_list); | 375 | &chk->transmitted_list); |
@@ -377,7 +379,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, | |||
377 | asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; | 379 | asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; |
378 | streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; | 380 | streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; |
379 | 381 | ||
380 | if (!chk->tsn_gap_acked) { | 382 | if (queue != &asoc->outqueue.retransmit && |
383 | !chk->tsn_gap_acked) { | ||
381 | if (chk->transport) | 384 | if (chk->transport) |
382 | chk->transport->flight_size -= | 385 | chk->transport->flight_size -= |
383 | sctp_data_size(chk); | 386 | sctp_data_size(chk); |
@@ -403,10 +406,13 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, | |||
403 | q->sched->unsched_all(&asoc->stream); | 406 | q->sched->unsched_all(&asoc->stream); |
404 | 407 | ||
405 | list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { | 408 | list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { |
406 | if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || | 409 | if (!chk->msg->abandoned && |
407 | chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) | 410 | (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) || |
411 | !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || | ||
412 | chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) | ||
408 | continue; | 413 | continue; |
409 | 414 | ||
415 | chk->msg->abandoned = 1; | ||
410 | sctp_sched_dequeue_common(q, chk); | 416 | sctp_sched_dequeue_common(q, chk); |
411 | asoc->sent_cnt_removable--; | 417 | asoc->sent_cnt_removable--; |
412 | asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; | 418 | asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; |
@@ -912,9 +918,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) | |||
912 | break; | 918 | break; |
913 | 919 | ||
914 | case SCTP_CID_ABORT: | 920 | case SCTP_CID_ABORT: |
915 | if (sctp_test_T_bit(chunk)) { | 921 | if (sctp_test_T_bit(chunk)) |
916 | packet->vtag = asoc->c.my_vtag; | 922 | packet->vtag = asoc->c.my_vtag; |
917 | } | 923 | /* fallthru */ |
918 | /* The following chunks are "response" chunks, i.e. | 924 | /* The following chunks are "response" chunks, i.e. |
919 | * they are generated in response to something we | 925 | * they are generated in response to something we |
920 | * received. If we are sending these, then we can | 926 | * received. If we are sending these, then we can |
@@ -1434,7 +1440,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, | |||
1434 | /* If this chunk has not been acked, stop | 1440 | /* If this chunk has not been acked, stop |
1435 | * considering it as 'outstanding'. | 1441 | * considering it as 'outstanding'. |
1436 | */ | 1442 | */ |
1437 | if (!tchunk->tsn_gap_acked) { | 1443 | if (transmitted_queue != &q->retransmit && |
1444 | !tchunk->tsn_gap_acked) { | ||
1438 | if (tchunk->transport) | 1445 | if (tchunk->transport) |
1439 | tchunk->transport->flight_size -= | 1446 | tchunk->transport->flight_size -= |
1440 | sctp_data_size(tchunk); | 1447 | sctp_data_size(tchunk); |
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f5172c21349b..6a38c2503649 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c | |||
@@ -1499,6 +1499,7 @@ static __init int sctp_init(void) | |||
1499 | INIT_LIST_HEAD(&sctp_address_families); | 1499 | INIT_LIST_HEAD(&sctp_address_families); |
1500 | sctp_v4_pf_init(); | 1500 | sctp_v4_pf_init(); |
1501 | sctp_v6_pf_init(); | 1501 | sctp_v6_pf_init(); |
1502 | sctp_sched_ops_init(); | ||
1502 | 1503 | ||
1503 | status = register_pernet_subsys(&sctp_defaults_ops); | 1504 | status = register_pernet_subsys(&sctp_defaults_ops); |
1504 | if (status) | 1505 | if (status) |
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3204a9b29407..737e551fbf67 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
@@ -85,7 +85,7 @@ | |||
85 | static int sctp_writeable(struct sock *sk); | 85 | static int sctp_writeable(struct sock *sk); |
86 | static void sctp_wfree(struct sk_buff *skb); | 86 | static void sctp_wfree(struct sk_buff *skb); |
87 | static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, | 87 | static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, |
88 | size_t msg_len, struct sock **orig_sk); | 88 | size_t msg_len); |
89 | static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); | 89 | static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); |
90 | static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); | 90 | static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); |
91 | static int sctp_wait_for_accept(struct sock *sk, long timeo); | 91 | static int sctp_wait_for_accept(struct sock *sk, long timeo); |
@@ -188,13 +188,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, | |||
188 | list_for_each_entry(chunk, &t->transmitted, transmitted_list) | 188 | list_for_each_entry(chunk, &t->transmitted, transmitted_list) |
189 | cb(chunk); | 189 | cb(chunk); |
190 | 190 | ||
191 | list_for_each_entry(chunk, &q->retransmit, list) | 191 | list_for_each_entry(chunk, &q->retransmit, transmitted_list) |
192 | cb(chunk); | 192 | cb(chunk); |
193 | 193 | ||
194 | list_for_each_entry(chunk, &q->sacked, list) | 194 | list_for_each_entry(chunk, &q->sacked, transmitted_list) |
195 | cb(chunk); | 195 | cb(chunk); |
196 | 196 | ||
197 | list_for_each_entry(chunk, &q->abandoned, list) | 197 | list_for_each_entry(chunk, &q->abandoned, transmitted_list) |
198 | cb(chunk); | 198 | cb(chunk); |
199 | 199 | ||
200 | list_for_each_entry(chunk, &q->out_chunk_list, list) | 200 | list_for_each_entry(chunk, &q->out_chunk_list, list) |
@@ -335,16 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, | |||
335 | if (len < sizeof (struct sockaddr)) | 335 | if (len < sizeof (struct sockaddr)) |
336 | return NULL; | 336 | return NULL; |
337 | 337 | ||
338 | if (!opt->pf->af_supported(addr->sa.sa_family, opt)) | ||
339 | return NULL; | ||
340 | |||
338 | /* V4 mapped address are really of AF_INET family */ | 341 | /* V4 mapped address are really of AF_INET family */ |
339 | if (addr->sa.sa_family == AF_INET6 && | 342 | if (addr->sa.sa_family == AF_INET6 && |
340 | ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { | 343 | ipv6_addr_v4mapped(&addr->v6.sin6_addr) && |
341 | if (!opt->pf->af_supported(AF_INET, opt)) | 344 | !opt->pf->af_supported(AF_INET, opt)) |
342 | return NULL; | 345 | return NULL; |
343 | } else { | ||
344 | /* Does this PF support this AF? */ | ||
345 | if (!opt->pf->af_supported(addr->sa.sa_family, opt)) | ||
346 | return NULL; | ||
347 | } | ||
348 | 346 | ||
349 | /* If we get this far, af is valid. */ | 347 | /* If we get this far, af is valid. */ |
350 | af = sctp_get_af_specific(addr->sa.sa_family); | 348 | af = sctp_get_af_specific(addr->sa.sa_family); |
@@ -970,13 +968,6 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) | |||
970 | * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() | 968 | * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() |
971 | * from userspace. | 969 | * from userspace. |
972 | * | 970 | * |
973 | * We don't use copy_from_user() for optimization: we first do the | ||
974 | * sanity checks (buffer size -fast- and access check-healthy | ||
975 | * pointer); if all of those succeed, then we can alloc the memory | ||
976 | * (expensive operation) needed to copy the data to kernel. Then we do | ||
977 | * the copying without checking the user space area | ||
978 | * (__copy_from_user()). | ||
979 | * | ||
980 | * On exit there is no need to do sockfd_put(), sys_setsockopt() does | 971 | * On exit there is no need to do sockfd_put(), sys_setsockopt() does |
981 | * it. | 972 | * it. |
982 | * | 973 | * |
@@ -1006,25 +997,15 @@ static int sctp_setsockopt_bindx(struct sock *sk, | |||
1006 | if (unlikely(addrs_size <= 0)) | 997 | if (unlikely(addrs_size <= 0)) |
1007 | return -EINVAL; | 998 | return -EINVAL; |
1008 | 999 | ||
1009 | /* Check the user passed a healthy pointer. */ | 1000 | kaddrs = vmemdup_user(addrs, addrs_size); |
1010 | if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) | 1001 | if (unlikely(IS_ERR(kaddrs))) |
1011 | return -EFAULT; | 1002 | return PTR_ERR(kaddrs); |
1012 | |||
1013 | /* Alloc space for the address array in kernel memory. */ | ||
1014 | kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN); | ||
1015 | if (unlikely(!kaddrs)) | ||
1016 | return -ENOMEM; | ||
1017 | |||
1018 | if (__copy_from_user(kaddrs, addrs, addrs_size)) { | ||
1019 | kfree(kaddrs); | ||
1020 | return -EFAULT; | ||
1021 | } | ||
1022 | 1003 | ||
1023 | /* Walk through the addrs buffer and count the number of addresses. */ | 1004 | /* Walk through the addrs buffer and count the number of addresses. */ |
1024 | addr_buf = kaddrs; | 1005 | addr_buf = kaddrs; |
1025 | while (walk_size < addrs_size) { | 1006 | while (walk_size < addrs_size) { |
1026 | if (walk_size + sizeof(sa_family_t) > addrs_size) { | 1007 | if (walk_size + sizeof(sa_family_t) > addrs_size) { |
1027 | kfree(kaddrs); | 1008 | kvfree(kaddrs); |
1028 | return -EINVAL; | 1009 | return -EINVAL; |
1029 | } | 1010 | } |
1030 | 1011 | ||
@@ -1035,7 +1016,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, | |||
1035 | * causes the address buffer to overflow return EINVAL. | 1016 | * causes the address buffer to overflow return EINVAL. |
1036 | */ | 1017 | */ |
1037 | if (!af || (walk_size + af->sockaddr_len) > addrs_size) { | 1018 | if (!af || (walk_size + af->sockaddr_len) > addrs_size) { |
1038 | kfree(kaddrs); | 1019 | kvfree(kaddrs); |
1039 | return -EINVAL; | 1020 | return -EINVAL; |
1040 | } | 1021 | } |
1041 | addrcnt++; | 1022 | addrcnt++; |
@@ -1065,7 +1046,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, | |||
1065 | } | 1046 | } |
1066 | 1047 | ||
1067 | out: | 1048 | out: |
1068 | kfree(kaddrs); | 1049 | kvfree(kaddrs); |
1069 | 1050 | ||
1070 | return err; | 1051 | return err; |
1071 | } | 1052 | } |
@@ -1323,13 +1304,6 @@ out_free: | |||
1323 | * land and invoking either sctp_connectx(). This is used for tunneling | 1304 | * land and invoking either sctp_connectx(). This is used for tunneling |
1324 | * the sctp_connectx() request through sctp_setsockopt() from userspace. | 1305 | * the sctp_connectx() request through sctp_setsockopt() from userspace. |
1325 | * | 1306 | * |
1326 | * We don't use copy_from_user() for optimization: we first do the | ||
1327 | * sanity checks (buffer size -fast- and access check-healthy | ||
1328 | * pointer); if all of those succeed, then we can alloc the memory | ||
1329 | * (expensive operation) needed to copy the data to kernel. Then we do | ||
1330 | * the copying without checking the user space area | ||
1331 | * (__copy_from_user()). | ||
1332 | * | ||
1333 | * On exit there is no need to do sockfd_put(), sys_setsockopt() does | 1307 | * On exit there is no need to do sockfd_put(), sys_setsockopt() does |
1334 | * it. | 1308 | * it. |
1335 | * | 1309 | * |
@@ -1345,7 +1319,6 @@ static int __sctp_setsockopt_connectx(struct sock *sk, | |||
1345 | sctp_assoc_t *assoc_id) | 1319 | sctp_assoc_t *assoc_id) |
1346 | { | 1320 | { |
1347 | struct sockaddr *kaddrs; | 1321 | struct sockaddr *kaddrs; |
1348 | gfp_t gfp = GFP_KERNEL; | ||
1349 | int err = 0; | 1322 | int err = 0; |
1350 | 1323 | ||
1351 | pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", | 1324 | pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", |
@@ -1354,24 +1327,12 @@ static int __sctp_setsockopt_connectx(struct sock *sk, | |||
1354 | if (unlikely(addrs_size <= 0)) | 1327 | if (unlikely(addrs_size <= 0)) |
1355 | return -EINVAL; | 1328 | return -EINVAL; |
1356 | 1329 | ||
1357 | /* Check the user passed a healthy pointer. */ | 1330 | kaddrs = vmemdup_user(addrs, addrs_size); |
1358 | if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size))) | 1331 | if (unlikely(IS_ERR(kaddrs))) |
1359 | return -EFAULT; | 1332 | return PTR_ERR(kaddrs); |
1360 | |||
1361 | /* Alloc space for the address array in kernel memory. */ | ||
1362 | if (sk->sk_socket->file) | ||
1363 | gfp = GFP_USER | __GFP_NOWARN; | ||
1364 | kaddrs = kmalloc(addrs_size, gfp); | ||
1365 | if (unlikely(!kaddrs)) | ||
1366 | return -ENOMEM; | ||
1367 | |||
1368 | if (__copy_from_user(kaddrs, addrs, addrs_size)) { | ||
1369 | err = -EFAULT; | ||
1370 | } else { | ||
1371 | err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); | ||
1372 | } | ||
1373 | 1333 | ||
1374 | kfree(kaddrs); | 1334 | err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); |
1335 | kvfree(kaddrs); | ||
1375 | 1336 | ||
1376 | return err; | 1337 | return err; |
1377 | } | 1338 | } |
@@ -1883,8 +1844,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) | |||
1883 | */ | 1844 | */ |
1884 | if (sinit) { | 1845 | if (sinit) { |
1885 | if (sinit->sinit_num_ostreams) { | 1846 | if (sinit->sinit_num_ostreams) { |
1886 | asoc->c.sinit_num_ostreams = | 1847 | __u16 outcnt = sinit->sinit_num_ostreams; |
1887 | sinit->sinit_num_ostreams; | 1848 | |
1849 | asoc->c.sinit_num_ostreams = outcnt; | ||
1850 | /* outcnt has been changed, so re-init stream */ | ||
1851 | err = sctp_stream_init(&asoc->stream, outcnt, 0, | ||
1852 | GFP_KERNEL); | ||
1853 | if (err) | ||
1854 | goto out_free; | ||
1888 | } | 1855 | } |
1889 | if (sinit->sinit_max_instreams) { | 1856 | if (sinit->sinit_max_instreams) { |
1890 | asoc->c.sinit_max_instreams = | 1857 | asoc->c.sinit_max_instreams = |
@@ -1971,7 +1938,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) | |||
1971 | timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); | 1938 | timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); |
1972 | if (!sctp_wspace(asoc)) { | 1939 | if (!sctp_wspace(asoc)) { |
1973 | /* sk can be changed by peel off when waiting for buf. */ | 1940 | /* sk can be changed by peel off when waiting for buf. */ |
1974 | err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); | 1941 | err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); |
1975 | if (err) { | 1942 | if (err) { |
1976 | if (err == -ESRCH) { | 1943 | if (err == -ESRCH) { |
1977 | /* asoc is already dead. */ | 1944 | /* asoc is already dead. */ |
@@ -2277,7 +2244,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, | |||
2277 | 2244 | ||
2278 | if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { | 2245 | if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { |
2279 | event = sctp_ulpevent_make_sender_dry_event(asoc, | 2246 | event = sctp_ulpevent_make_sender_dry_event(asoc, |
2280 | GFP_ATOMIC); | 2247 | GFP_USER | __GFP_NOWARN); |
2281 | if (!event) | 2248 | if (!event) |
2282 | return -ENOMEM; | 2249 | return -ENOMEM; |
2283 | 2250 | ||
@@ -3498,6 +3465,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, | |||
3498 | 3465 | ||
3499 | if (optlen < sizeof(struct sctp_hmacalgo)) | 3466 | if (optlen < sizeof(struct sctp_hmacalgo)) |
3500 | return -EINVAL; | 3467 | return -EINVAL; |
3468 | optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) + | ||
3469 | SCTP_AUTH_NUM_HMACS * sizeof(u16)); | ||
3501 | 3470 | ||
3502 | hmacs = memdup_user(optval, optlen); | 3471 | hmacs = memdup_user(optval, optlen); |
3503 | if (IS_ERR(hmacs)) | 3472 | if (IS_ERR(hmacs)) |
@@ -3536,6 +3505,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk, | |||
3536 | 3505 | ||
3537 | if (optlen <= sizeof(struct sctp_authkey)) | 3506 | if (optlen <= sizeof(struct sctp_authkey)) |
3538 | return -EINVAL; | 3507 | return -EINVAL; |
3508 | /* authkey->sca_keylength is u16, so optlen can't be bigger than | ||
3509 | * this. | ||
3510 | */ | ||
3511 | optlen = min_t(unsigned int, optlen, USHRT_MAX + | ||
3512 | sizeof(struct sctp_authkey)); | ||
3539 | 3513 | ||
3540 | authkey = memdup_user(optval, optlen); | 3514 | authkey = memdup_user(optval, optlen); |
3541 | if (IS_ERR(authkey)) | 3515 | if (IS_ERR(authkey)) |
@@ -3891,13 +3865,20 @@ static int sctp_setsockopt_reset_streams(struct sock *sk, | |||
3891 | struct sctp_association *asoc; | 3865 | struct sctp_association *asoc; |
3892 | int retval = -EINVAL; | 3866 | int retval = -EINVAL; |
3893 | 3867 | ||
3894 | if (optlen < sizeof(struct sctp_reset_streams)) | 3868 | if (optlen < sizeof(*params)) |
3895 | return -EINVAL; | 3869 | return -EINVAL; |
3870 | /* srs_number_streams is u16, so optlen can't be bigger than this. */ | ||
3871 | optlen = min_t(unsigned int, optlen, USHRT_MAX + | ||
3872 | sizeof(__u16) * sizeof(*params)); | ||
3896 | 3873 | ||
3897 | params = memdup_user(optval, optlen); | 3874 | params = memdup_user(optval, optlen); |
3898 | if (IS_ERR(params)) | 3875 | if (IS_ERR(params)) |
3899 | return PTR_ERR(params); | 3876 | return PTR_ERR(params); |
3900 | 3877 | ||
3878 | if (params->srs_number_streams * sizeof(__u16) > | ||
3879 | optlen - sizeof(*params)) | ||
3880 | goto out; | ||
3881 | |||
3901 | asoc = sctp_id2assoc(sk, params->srs_assoc_id); | 3882 | asoc = sctp_id2assoc(sk, params->srs_assoc_id); |
3902 | if (!asoc) | 3883 | if (!asoc) |
3903 | goto out; | 3884 | goto out; |
@@ -4494,7 +4475,7 @@ static int sctp_init_sock(struct sock *sk) | |||
4494 | SCTP_DBG_OBJCNT_INC(sock); | 4475 | SCTP_DBG_OBJCNT_INC(sock); |
4495 | 4476 | ||
4496 | local_bh_disable(); | 4477 | local_bh_disable(); |
4497 | percpu_counter_inc(&sctp_sockets_allocated); | 4478 | sk_sockets_allocated_inc(sk); |
4498 | sock_prot_inuse_add(net, sk->sk_prot, 1); | 4479 | sock_prot_inuse_add(net, sk->sk_prot, 1); |
4499 | 4480 | ||
4500 | /* Nothing can fail after this block, otherwise | 4481 | /* Nothing can fail after this block, otherwise |
@@ -4538,7 +4519,7 @@ static void sctp_destroy_sock(struct sock *sk) | |||
4538 | } | 4519 | } |
4539 | sctp_endpoint_free(sp->ep); | 4520 | sctp_endpoint_free(sp->ep); |
4540 | local_bh_disable(); | 4521 | local_bh_disable(); |
4541 | percpu_counter_dec(&sctp_sockets_allocated); | 4522 | sk_sockets_allocated_dec(sk); |
4542 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 4523 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
4543 | local_bh_enable(); | 4524 | local_bh_enable(); |
4544 | } | 4525 | } |
@@ -5011,7 +4992,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv | |||
5011 | len = sizeof(int); | 4992 | len = sizeof(int); |
5012 | if (put_user(len, optlen)) | 4993 | if (put_user(len, optlen)) |
5013 | return -EFAULT; | 4994 | return -EFAULT; |
5014 | if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int))) | 4995 | if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) |
5015 | return -EFAULT; | 4996 | return -EFAULT; |
5016 | return 0; | 4997 | return 0; |
5017 | } | 4998 | } |
@@ -5080,7 +5061,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p | |||
5080 | *newfile = sock_alloc_file(newsock, 0, NULL); | 5061 | *newfile = sock_alloc_file(newsock, 0, NULL); |
5081 | if (IS_ERR(*newfile)) { | 5062 | if (IS_ERR(*newfile)) { |
5082 | put_unused_fd(retval); | 5063 | put_unused_fd(retval); |
5083 | sock_release(newsock); | ||
5084 | retval = PTR_ERR(*newfile); | 5064 | retval = PTR_ERR(*newfile); |
5085 | *newfile = NULL; | 5065 | *newfile = NULL; |
5086 | return retval; | 5066 | return retval; |
@@ -5642,6 +5622,9 @@ copy_getaddrs: | |||
5642 | err = -EFAULT; | 5622 | err = -EFAULT; |
5643 | goto out; | 5623 | goto out; |
5644 | } | 5624 | } |
5625 | /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too, | ||
5626 | * but we can't change it anymore. | ||
5627 | */ | ||
5645 | if (put_user(bytes_copied, optlen)) | 5628 | if (put_user(bytes_copied, optlen)) |
5646 | err = -EFAULT; | 5629 | err = -EFAULT; |
5647 | out: | 5630 | out: |
@@ -6078,7 +6061,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, | |||
6078 | params.assoc_id = 0; | 6061 | params.assoc_id = 0; |
6079 | } else if (len >= sizeof(struct sctp_assoc_value)) { | 6062 | } else if (len >= sizeof(struct sctp_assoc_value)) { |
6080 | len = sizeof(struct sctp_assoc_value); | 6063 | len = sizeof(struct sctp_assoc_value); |
6081 | if (copy_from_user(¶ms, optval, sizeof(params))) | 6064 | if (copy_from_user(¶ms, optval, len)) |
6082 | return -EFAULT; | 6065 | return -EFAULT; |
6083 | } else | 6066 | } else |
6084 | return -EINVAL; | 6067 | return -EINVAL; |
@@ -6248,7 +6231,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, | |||
6248 | 6231 | ||
6249 | if (len < sizeof(struct sctp_authkeyid)) | 6232 | if (len < sizeof(struct sctp_authkeyid)) |
6250 | return -EINVAL; | 6233 | return -EINVAL; |
6251 | if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) | 6234 | |
6235 | len = sizeof(struct sctp_authkeyid); | ||
6236 | if (copy_from_user(&val, optval, len)) | ||
6252 | return -EFAULT; | 6237 | return -EFAULT; |
6253 | 6238 | ||
6254 | asoc = sctp_id2assoc(sk, val.scact_assoc_id); | 6239 | asoc = sctp_id2assoc(sk, val.scact_assoc_id); |
@@ -6260,7 +6245,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, | |||
6260 | else | 6245 | else |
6261 | val.scact_keynumber = ep->active_key_id; | 6246 | val.scact_keynumber = ep->active_key_id; |
6262 | 6247 | ||
6263 | len = sizeof(struct sctp_authkeyid); | ||
6264 | if (put_user(len, optlen)) | 6248 | if (put_user(len, optlen)) |
6265 | return -EFAULT; | 6249 | return -EFAULT; |
6266 | if (copy_to_user(optval, &val, len)) | 6250 | if (copy_to_user(optval, &val, len)) |
@@ -6286,7 +6270,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, | |||
6286 | if (len < sizeof(struct sctp_authchunks)) | 6270 | if (len < sizeof(struct sctp_authchunks)) |
6287 | return -EINVAL; | 6271 | return -EINVAL; |
6288 | 6272 | ||
6289 | if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) | 6273 | if (copy_from_user(&val, optval, sizeof(val))) |
6290 | return -EFAULT; | 6274 | return -EFAULT; |
6291 | 6275 | ||
6292 | to = p->gauth_chunks; | 6276 | to = p->gauth_chunks; |
@@ -6331,7 +6315,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, | |||
6331 | if (len < sizeof(struct sctp_authchunks)) | 6315 | if (len < sizeof(struct sctp_authchunks)) |
6332 | return -EINVAL; | 6316 | return -EINVAL; |
6333 | 6317 | ||
6334 | if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) | 6318 | if (copy_from_user(&val, optval, sizeof(val))) |
6335 | return -EFAULT; | 6319 | return -EFAULT; |
6336 | 6320 | ||
6337 | to = p->gauth_chunks; | 6321 | to = p->gauth_chunks; |
@@ -7497,11 +7481,11 @@ out: | |||
7497 | * here, again, by modeling the current TCP/UDP code. We don't have | 7481 | * here, again, by modeling the current TCP/UDP code. We don't have |
7498 | * a good way to test with it yet. | 7482 | * a good way to test with it yet. |
7499 | */ | 7483 | */ |
7500 | unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) | 7484 | __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) |
7501 | { | 7485 | { |
7502 | struct sock *sk = sock->sk; | 7486 | struct sock *sk = sock->sk; |
7503 | struct sctp_sock *sp = sctp_sk(sk); | 7487 | struct sctp_sock *sp = sctp_sk(sk); |
7504 | unsigned int mask; | 7488 | __poll_t mask; |
7505 | 7489 | ||
7506 | poll_wait(file, sk_sleep(sk), wait); | 7490 | poll_wait(file, sk_sleep(sk), wait); |
7507 | 7491 | ||
@@ -7999,12 +7983,12 @@ void sctp_sock_rfree(struct sk_buff *skb) | |||
7999 | 7983 | ||
8000 | /* Helper function to wait for space in the sndbuf. */ | 7984 | /* Helper function to wait for space in the sndbuf. */ |
8001 | static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, | 7985 | static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, |
8002 | size_t msg_len, struct sock **orig_sk) | 7986 | size_t msg_len) |
8003 | { | 7987 | { |
8004 | struct sock *sk = asoc->base.sk; | 7988 | struct sock *sk = asoc->base.sk; |
8005 | int err = 0; | ||
8006 | long current_timeo = *timeo_p; | 7989 | long current_timeo = *timeo_p; |
8007 | DEFINE_WAIT(wait); | 7990 | DEFINE_WAIT(wait); |
7991 | int err = 0; | ||
8008 | 7992 | ||
8009 | pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, | 7993 | pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, |
8010 | *timeo_p, msg_len); | 7994 | *timeo_p, msg_len); |
@@ -8033,17 +8017,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, | |||
8033 | release_sock(sk); | 8017 | release_sock(sk); |
8034 | current_timeo = schedule_timeout(current_timeo); | 8018 | current_timeo = schedule_timeout(current_timeo); |
8035 | lock_sock(sk); | 8019 | lock_sock(sk); |
8036 | if (sk != asoc->base.sk) { | 8020 | if (sk != asoc->base.sk) |
8037 | release_sock(sk); | 8021 | goto do_error; |
8038 | sk = asoc->base.sk; | ||
8039 | lock_sock(sk); | ||
8040 | } | ||
8041 | 8022 | ||
8042 | *timeo_p = current_timeo; | 8023 | *timeo_p = current_timeo; |
8043 | } | 8024 | } |
8044 | 8025 | ||
8045 | out: | 8026 | out: |
8046 | *orig_sk = sk; | ||
8047 | finish_wait(&asoc->wait, &wait); | 8027 | finish_wait(&asoc->wait, &wait); |
8048 | 8028 | ||
8049 | /* Release the association's refcnt. */ | 8029 | /* Release the association's refcnt. */ |
diff --git a/net/sctp/stream.c b/net/sctp/stream.c index a11db21dc8a0..524dfeb94c41 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c | |||
@@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, | |||
64 | */ | 64 | */ |
65 | 65 | ||
66 | /* Mark as failed send. */ | 66 | /* Mark as failed send. */ |
67 | sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); | 67 | sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM); |
68 | if (asoc->peer.prsctp_capable && | 68 | if (asoc->peer.prsctp_capable && |
69 | SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) | 69 | SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) |
70 | asoc->sent_cnt_removable--; | 70 | asoc->sent_cnt_removable--; |
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, | |||
156 | sctp_stream_outq_migrate(stream, NULL, outcnt); | 156 | sctp_stream_outq_migrate(stream, NULL, outcnt); |
157 | sched->sched_all(stream); | 157 | sched->sched_all(stream); |
158 | 158 | ||
159 | i = sctp_stream_alloc_out(stream, outcnt, gfp); | 159 | ret = sctp_stream_alloc_out(stream, outcnt, gfp); |
160 | if (i) | 160 | if (ret) |
161 | return i; | 161 | goto out; |
162 | 162 | ||
163 | stream->outcnt = outcnt; | 163 | stream->outcnt = outcnt; |
164 | for (i = 0; i < stream->outcnt; i++) | 164 | for (i = 0; i < stream->outcnt; i++) |
@@ -170,19 +170,17 @@ in: | |||
170 | if (!incnt) | 170 | if (!incnt) |
171 | goto out; | 171 | goto out; |
172 | 172 | ||
173 | i = sctp_stream_alloc_in(stream, incnt, gfp); | 173 | ret = sctp_stream_alloc_in(stream, incnt, gfp); |
174 | if (i) { | 174 | if (ret) { |
175 | ret = -ENOMEM; | 175 | sched->free(stream); |
176 | goto free; | 176 | kfree(stream->out); |
177 | stream->out = NULL; | ||
178 | stream->outcnt = 0; | ||
179 | goto out; | ||
177 | } | 180 | } |
178 | 181 | ||
179 | stream->incnt = incnt; | 182 | stream->incnt = incnt; |
180 | goto out; | ||
181 | 183 | ||
182 | free: | ||
183 | sched->free(stream); | ||
184 | kfree(stream->out); | ||
185 | stream->out = NULL; | ||
186 | out: | 184 | out: |
187 | return ret; | 185 | return ret; |
188 | } | 186 | } |
@@ -254,6 +252,30 @@ static int sctp_send_reconf(struct sctp_association *asoc, | |||
254 | return retval; | 252 | return retval; |
255 | } | 253 | } |
256 | 254 | ||
255 | static bool sctp_stream_outq_is_empty(struct sctp_stream *stream, | ||
256 | __u16 str_nums, __be16 *str_list) | ||
257 | { | ||
258 | struct sctp_association *asoc; | ||
259 | __u16 i; | ||
260 | |||
261 | asoc = container_of(stream, struct sctp_association, stream); | ||
262 | if (!asoc->outqueue.out_qlen) | ||
263 | return true; | ||
264 | |||
265 | if (!str_nums) | ||
266 | return false; | ||
267 | |||
268 | for (i = 0; i < str_nums; i++) { | ||
269 | __u16 sid = ntohs(str_list[i]); | ||
270 | |||
271 | if (stream->out[sid].ext && | ||
272 | !list_empty(&stream->out[sid].ext->outq)) | ||
273 | return false; | ||
274 | } | ||
275 | |||
276 | return true; | ||
277 | } | ||
278 | |||
257 | int sctp_send_reset_streams(struct sctp_association *asoc, | 279 | int sctp_send_reset_streams(struct sctp_association *asoc, |
258 | struct sctp_reset_streams *params) | 280 | struct sctp_reset_streams *params) |
259 | { | 281 | { |
@@ -317,6 +339,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc, | |||
317 | for (i = 0; i < str_nums; i++) | 339 | for (i = 0; i < str_nums; i++) |
318 | nstr_list[i] = htons(str_list[i]); | 340 | nstr_list[i] = htons(str_list[i]); |
319 | 341 | ||
342 | if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) { | ||
343 | retval = -EAGAIN; | ||
344 | goto out; | ||
345 | } | ||
346 | |||
320 | chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); | 347 | chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); |
321 | 348 | ||
322 | kfree(nstr_list); | 349 | kfree(nstr_list); |
@@ -377,6 +404,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) | |||
377 | if (asoc->strreset_outstanding) | 404 | if (asoc->strreset_outstanding) |
378 | return -EINPROGRESS; | 405 | return -EINPROGRESS; |
379 | 406 | ||
407 | if (!sctp_outq_is_empty(&asoc->outqueue)) | ||
408 | return -EAGAIN; | ||
409 | |||
380 | chunk = sctp_make_strreset_tsnreq(asoc); | 410 | chunk = sctp_make_strreset_tsnreq(asoc); |
381 | if (!chunk) | 411 | if (!chunk) |
382 | return -ENOMEM; | 412 | return -ENOMEM; |
@@ -563,7 +593,7 @@ struct sctp_chunk *sctp_process_strreset_outreq( | |||
563 | flags = SCTP_STREAM_RESET_INCOMING_SSN; | 593 | flags = SCTP_STREAM_RESET_INCOMING_SSN; |
564 | } | 594 | } |
565 | 595 | ||
566 | nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; | 596 | nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); |
567 | if (nums) { | 597 | if (nums) { |
568 | str_p = outreq->list_of_streams; | 598 | str_p = outreq->list_of_streams; |
569 | for (i = 0; i < nums; i++) { | 599 | for (i = 0; i < nums; i++) { |
@@ -627,7 +657,7 @@ struct sctp_chunk *sctp_process_strreset_inreq( | |||
627 | goto out; | 657 | goto out; |
628 | } | 658 | } |
629 | 659 | ||
630 | nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2; | 660 | nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16); |
631 | str_p = inreq->list_of_streams; | 661 | str_p = inreq->list_of_streams; |
632 | for (i = 0; i < nums; i++) { | 662 | for (i = 0; i < nums; i++) { |
633 | if (ntohs(str_p[i]) >= stream->outcnt) { | 663 | if (ntohs(str_p[i]) >= stream->outcnt) { |
@@ -636,6 +666,12 @@ struct sctp_chunk *sctp_process_strreset_inreq( | |||
636 | } | 666 | } |
637 | } | 667 | } |
638 | 668 | ||
669 | if (!sctp_stream_outq_is_empty(stream, nums, str_p)) { | ||
670 | result = SCTP_STRRESET_IN_PROGRESS; | ||
671 | asoc->strreset_inseq--; | ||
672 | goto err; | ||
673 | } | ||
674 | |||
639 | chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); | 675 | chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); |
640 | if (!chunk) | 676 | if (!chunk) |
641 | goto out; | 677 | goto out; |
@@ -687,12 +723,18 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( | |||
687 | i = asoc->strreset_inseq - request_seq - 1; | 723 | i = asoc->strreset_inseq - request_seq - 1; |
688 | result = asoc->strreset_result[i]; | 724 | result = asoc->strreset_result[i]; |
689 | if (result == SCTP_STRRESET_PERFORMED) { | 725 | if (result == SCTP_STRRESET_PERFORMED) { |
690 | next_tsn = asoc->next_tsn; | 726 | next_tsn = asoc->ctsn_ack_point + 1; |
691 | init_tsn = | 727 | init_tsn = |
692 | sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; | 728 | sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; |
693 | } | 729 | } |
694 | goto err; | 730 | goto err; |
695 | } | 731 | } |
732 | |||
733 | if (!sctp_outq_is_empty(&asoc->outqueue)) { | ||
734 | result = SCTP_STRRESET_IN_PROGRESS; | ||
735 | goto err; | ||
736 | } | ||
737 | |||
696 | asoc->strreset_inseq++; | 738 | asoc->strreset_inseq++; |
697 | 739 | ||
698 | if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) | 740 | if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) |
@@ -703,9 +745,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( | |||
703 | goto out; | 745 | goto out; |
704 | } | 746 | } |
705 | 747 | ||
706 | /* G3: The same processing as though a SACK chunk with no gap report | 748 | /* G4: The same processing as though a FWD-TSN chunk (as defined in |
707 | * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were | 749 | * [RFC3758]) with all streams affected and a new cumulative TSN |
708 | * received MUST be performed. | 750 | * ACK of the Receiver's Next TSN minus 1 were received MUST be |
751 | * performed. | ||
709 | */ | 752 | */ |
710 | max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); | 753 | max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); |
711 | sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); | 754 | sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); |
@@ -720,10 +763,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( | |||
720 | sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, | 763 | sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, |
721 | init_tsn, GFP_ATOMIC); | 764 | init_tsn, GFP_ATOMIC); |
722 | 765 | ||
723 | /* G4: The same processing as though a FWD-TSN chunk (as defined in | 766 | /* G3: The same processing as though a SACK chunk with no gap report |
724 | * [RFC3758]) with all streams affected and a new cumulative TSN | 767 | * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were |
725 | * ACK of the Receiver's Next TSN minus 1 were received MUST be | 768 | * received MUST be performed. |
726 | * performed. | ||
727 | */ | 769 | */ |
728 | sctp_outq_free(&asoc->outqueue); | 770 | sctp_outq_free(&asoc->outqueue); |
729 | 771 | ||
@@ -927,7 +969,8 @@ struct sctp_chunk *sctp_process_strreset_resp( | |||
927 | 969 | ||
928 | outreq = (struct sctp_strreset_outreq *)req; | 970 | outreq = (struct sctp_strreset_outreq *)req; |
929 | str_p = outreq->list_of_streams; | 971 | str_p = outreq->list_of_streams; |
930 | nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; | 972 | nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / |
973 | sizeof(__u16); | ||
931 | 974 | ||
932 | if (result == SCTP_STRRESET_PERFORMED) { | 975 | if (result == SCTP_STRRESET_PERFORMED) { |
933 | if (nums) { | 976 | if (nums) { |
@@ -956,7 +999,8 @@ struct sctp_chunk *sctp_process_strreset_resp( | |||
956 | 999 | ||
957 | inreq = (struct sctp_strreset_inreq *)req; | 1000 | inreq = (struct sctp_strreset_inreq *)req; |
958 | str_p = inreq->list_of_streams; | 1001 | str_p = inreq->list_of_streams; |
959 | nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; | 1002 | nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / |
1003 | sizeof(__u16); | ||
960 | 1004 | ||
961 | *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, | 1005 | *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, |
962 | nums, str_p, GFP_ATOMIC); | 1006 | nums, str_p, GFP_ATOMIC); |
@@ -975,6 +1019,7 @@ struct sctp_chunk *sctp_process_strreset_resp( | |||
975 | if (result == SCTP_STRRESET_PERFORMED) { | 1019 | if (result == SCTP_STRRESET_PERFORMED) { |
976 | __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( | 1020 | __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( |
977 | &asoc->peer.tsn_map); | 1021 | &asoc->peer.tsn_map); |
1022 | LIST_HEAD(temp); | ||
978 | 1023 | ||
979 | sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); | 1024 | sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); |
980 | sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); | 1025 | sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); |
@@ -983,7 +1028,13 @@ struct sctp_chunk *sctp_process_strreset_resp( | |||
983 | SCTP_TSN_MAP_INITIAL, | 1028 | SCTP_TSN_MAP_INITIAL, |
984 | stsn, GFP_ATOMIC); | 1029 | stsn, GFP_ATOMIC); |
985 | 1030 | ||
1031 | /* Clean up sacked and abandoned queues only. As the | ||
1032 | * out_chunk_list may not be empty, splice it to temp, | ||
1033 | * then get it back after sctp_outq_free is done. | ||
1034 | */ | ||
1035 | list_splice_init(&asoc->outqueue.out_chunk_list, &temp); | ||
986 | sctp_outq_free(&asoc->outqueue); | 1036 | sctp_outq_free(&asoc->outqueue); |
1037 | list_splice_init(&temp, &asoc->outqueue.out_chunk_list); | ||
987 | 1038 | ||
988 | asoc->next_tsn = rtsn; | 1039 | asoc->next_tsn = rtsn; |
989 | asoc->ctsn_ack_point = asoc->next_tsn - 1; | 1040 | asoc->ctsn_ack_point = asoc->next_tsn - 1; |
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 0b83ec51e43b..d8c162a4089c 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c | |||
@@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = { | |||
119 | .unsched_all = sctp_sched_fcfs_unsched_all, | 119 | .unsched_all = sctp_sched_fcfs_unsched_all, |
120 | }; | 120 | }; |
121 | 121 | ||
122 | static void sctp_sched_ops_fcfs_init(void) | ||
123 | { | ||
124 | sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs); | ||
125 | } | ||
126 | |||
122 | /* API to other parts of the stack */ | 127 | /* API to other parts of the stack */ |
123 | 128 | ||
124 | extern struct sctp_sched_ops sctp_sched_prio; | 129 | static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; |
125 | extern struct sctp_sched_ops sctp_sched_rr; | ||
126 | 130 | ||
127 | static struct sctp_sched_ops *sctp_sched_ops[] = { | 131 | void sctp_sched_ops_register(enum sctp_sched_type sched, |
128 | &sctp_sched_fcfs, | 132 | struct sctp_sched_ops *sched_ops) |
129 | &sctp_sched_prio, | 133 | { |
130 | &sctp_sched_rr, | 134 | sctp_sched_ops[sched] = sched_ops; |
131 | }; | 135 | } |
136 | |||
137 | void sctp_sched_ops_init(void) | ||
138 | { | ||
139 | sctp_sched_ops_fcfs_init(); | ||
140 | sctp_sched_ops_prio_init(); | ||
141 | sctp_sched_ops_rr_init(); | ||
142 | } | ||
132 | 143 | ||
133 | int sctp_sched_set_sched(struct sctp_association *asoc, | 144 | int sctp_sched_set_sched(struct sctp_association *asoc, |
134 | enum sctp_sched_type sched) | 145 | enum sctp_sched_type sched) |
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 384dbf3c8760..7997d35dd0fd 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c | |||
@@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) | |||
333 | sctp_sched_prio_unsched(soute); | 333 | sctp_sched_prio_unsched(soute); |
334 | } | 334 | } |
335 | 335 | ||
336 | struct sctp_sched_ops sctp_sched_prio = { | 336 | static struct sctp_sched_ops sctp_sched_prio = { |
337 | .set = sctp_sched_prio_set, | 337 | .set = sctp_sched_prio_set, |
338 | .get = sctp_sched_prio_get, | 338 | .get = sctp_sched_prio_get, |
339 | .init = sctp_sched_prio_init, | 339 | .init = sctp_sched_prio_init, |
@@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = { | |||
345 | .sched_all = sctp_sched_prio_sched_all, | 345 | .sched_all = sctp_sched_prio_sched_all, |
346 | .unsched_all = sctp_sched_prio_unsched_all, | 346 | .unsched_all = sctp_sched_prio_unsched_all, |
347 | }; | 347 | }; |
348 | |||
349 | void sctp_sched_ops_prio_init(void) | ||
350 | { | ||
351 | sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio); | ||
352 | } | ||
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index 7612a438c5b9..1155692448f1 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c | |||
@@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) | |||
187 | sctp_sched_rr_unsched(stream, soute); | 187 | sctp_sched_rr_unsched(stream, soute); |
188 | } | 188 | } |
189 | 189 | ||
190 | struct sctp_sched_ops sctp_sched_rr = { | 190 | static struct sctp_sched_ops sctp_sched_rr = { |
191 | .set = sctp_sched_rr_set, | 191 | .set = sctp_sched_rr_set, |
192 | .get = sctp_sched_rr_get, | 192 | .get = sctp_sched_rr_get, |
193 | .init = sctp_sched_rr_init, | 193 | .init = sctp_sched_rr_init, |
@@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = { | |||
199 | .sched_all = sctp_sched_rr_sched_all, | 199 | .sched_all = sctp_sched_rr_sched_all, |
200 | .unsched_all = sctp_sched_rr_unsched_all, | 200 | .unsched_all = sctp_sched_rr_unsched_all, |
201 | }; | 201 | }; |
202 | |||
203 | void sctp_sched_ops_rr_init(void) | ||
204 | { | ||
205 | sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr); | ||
206 | } | ||
diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1e5a22430cf5..47f82bd794d9 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c | |||
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) | |||
248 | transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; | 248 | transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; |
249 | } | 249 | } |
250 | 250 | ||
251 | void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) | 251 | bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) |
252 | { | 252 | { |
253 | struct dst_entry *dst = sctp_transport_dst_check(t); | 253 | struct dst_entry *dst = sctp_transport_dst_check(t); |
254 | bool change = true; | ||
254 | 255 | ||
255 | if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { | 256 | if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { |
256 | pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", | 257 | pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", |
257 | __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); | 258 | __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); |
258 | /* Use default minimum segment size and disable | 259 | /* Use default minimum segment instead */ |
259 | * pmtu discovery on this transport. | 260 | pmtu = SCTP_DEFAULT_MINSEGMENT; |
260 | */ | ||
261 | t->pathmtu = SCTP_DEFAULT_MINSEGMENT; | ||
262 | } else { | ||
263 | t->pathmtu = pmtu; | ||
264 | } | 261 | } |
262 | pmtu = SCTP_TRUNC4(pmtu); | ||
265 | 263 | ||
266 | if (dst) { | 264 | if (dst) { |
267 | dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); | 265 | dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); |
268 | dst = sctp_transport_dst_check(t); | 266 | dst = sctp_transport_dst_check(t); |
269 | } | 267 | } |
270 | 268 | ||
271 | if (!dst) | 269 | if (!dst) { |
272 | t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); | 270 | t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); |
271 | dst = t->dst; | ||
272 | } | ||
273 | |||
274 | if (dst) { | ||
275 | /* Re-fetch, as under layers may have a higher minimum size */ | ||
276 | pmtu = SCTP_TRUNC4(dst_mtu(dst)); | ||
277 | change = t->pathmtu != pmtu; | ||
278 | } | ||
279 | t->pathmtu = pmtu; | ||
280 | |||
281 | return change; | ||
273 | } | 282 | } |
274 | 283 | ||
275 | /* Caches the dst entry and source address for a transport's destination | 284 | /* Caches the dst entry and source address for a transport's destination |
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index a71be33f3afe..e36ec5dd64c6 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c | |||
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, | |||
1084 | void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, | 1084 | void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, |
1085 | gfp_t gfp) | 1085 | gfp_t gfp) |
1086 | { | 1086 | { |
1087 | struct sctp_association *asoc; | 1087 | struct sctp_association *asoc = ulpq->asoc; |
1088 | __u16 needed, freed; | 1088 | __u32 freed = 0; |
1089 | 1089 | __u16 needed; | |
1090 | asoc = ulpq->asoc; | ||
1091 | 1090 | ||
1092 | if (chunk) { | 1091 | needed = ntohs(chunk->chunk_hdr->length) - |
1093 | needed = ntohs(chunk->chunk_hdr->length); | 1092 | sizeof(struct sctp_data_chunk); |
1094 | needed -= sizeof(struct sctp_data_chunk); | ||
1095 | } else | ||
1096 | needed = SCTP_DEFAULT_MAXWINDOW; | ||
1097 | |||
1098 | freed = 0; | ||
1099 | 1093 | ||
1100 | if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { | 1094 | if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { |
1101 | freed = sctp_ulpq_renege_order(ulpq, needed); | 1095 | freed = sctp_ulpq_renege_order(ulpq, needed); |
1102 | if (freed < needed) { | 1096 | if (freed < needed) |
1103 | freed += sctp_ulpq_renege_frags(ulpq, needed - freed); | 1097 | freed += sctp_ulpq_renege_frags(ulpq, needed - freed); |
1104 | } | ||
1105 | } | 1098 | } |
1106 | /* If able to free enough room, accept this chunk. */ | 1099 | /* If able to free enough room, accept this chunk. */ |
1107 | if (chunk && (freed >= needed)) { | 1100 | if (freed >= needed) { |
1108 | int retval; | 1101 | int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); |
1109 | retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); | ||
1110 | /* | 1102 | /* |
1111 | * Enter partial delivery if chunk has not been | 1103 | * Enter partial delivery if chunk has not been |
1112 | * delivered; otherwise, drain the reassembly queue. | 1104 | * delivered; otherwise, drain the reassembly queue. |
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6451c5013e06..449f62e1e270 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c | |||
@@ -1107,7 +1107,7 @@ out: | |||
1107 | return rc; | 1107 | return rc; |
1108 | } | 1108 | } |
1109 | 1109 | ||
1110 | static unsigned int smc_accept_poll(struct sock *parent) | 1110 | static __poll_t smc_accept_poll(struct sock *parent) |
1111 | { | 1111 | { |
1112 | struct smc_sock *isk; | 1112 | struct smc_sock *isk; |
1113 | struct sock *sk; | 1113 | struct sock *sk; |
@@ -1126,11 +1126,11 @@ static unsigned int smc_accept_poll(struct sock *parent) | |||
1126 | return 0; | 1126 | return 0; |
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | static unsigned int smc_poll(struct file *file, struct socket *sock, | 1129 | static __poll_t smc_poll(struct file *file, struct socket *sock, |
1130 | poll_table *wait) | 1130 | poll_table *wait) |
1131 | { | 1131 | { |
1132 | struct sock *sk = sock->sk; | 1132 | struct sock *sk = sock->sk; |
1133 | unsigned int mask = 0; | 1133 | __poll_t mask = 0; |
1134 | struct smc_sock *smc; | 1134 | struct smc_sock *smc; |
1135 | int rc; | 1135 | int rc; |
1136 | 1136 | ||
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 1800e16b2a02..511548085d16 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c | |||
@@ -35,7 +35,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, | |||
35 | struct smc_clc_msg_hdr *clcm = buf; | 35 | struct smc_clc_msg_hdr *clcm = buf; |
36 | struct msghdr msg = {NULL, 0}; | 36 | struct msghdr msg = {NULL, 0}; |
37 | int reason_code = 0; | 37 | int reason_code = 0; |
38 | struct kvec vec; | 38 | struct kvec vec = {buf, buflen}; |
39 | int len, datlen; | 39 | int len, datlen; |
40 | int krflags; | 40 | int krflags; |
41 | 41 | ||
@@ -43,12 +43,15 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, | |||
43 | * so we don't consume any subsequent CLC message or payload data | 43 | * so we don't consume any subsequent CLC message or payload data |
44 | * in the TCP byte stream | 44 | * in the TCP byte stream |
45 | */ | 45 | */ |
46 | vec.iov_base = buf; | 46 | /* |
47 | vec.iov_len = buflen; | 47 | * Caller must make sure that buflen is no less than |
48 | * sizeof(struct smc_clc_msg_hdr) | ||
49 | */ | ||
48 | krflags = MSG_PEEK | MSG_WAITALL; | 50 | krflags = MSG_PEEK | MSG_WAITALL; |
49 | smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; | 51 | smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; |
50 | len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, | 52 | iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, |
51 | sizeof(struct smc_clc_msg_hdr), krflags); | 53 | sizeof(struct smc_clc_msg_hdr)); |
54 | len = sock_recvmsg(smc->clcsock, &msg, krflags); | ||
52 | if (signal_pending(current)) { | 55 | if (signal_pending(current)) { |
53 | reason_code = -EINTR; | 56 | reason_code = -EINTR; |
54 | clc_sk->sk_err = EINTR; | 57 | clc_sk->sk_err = EINTR; |
@@ -83,12 +86,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, | |||
83 | } | 86 | } |
84 | 87 | ||
85 | /* receive the complete CLC message */ | 88 | /* receive the complete CLC message */ |
86 | vec.iov_base = buf; | ||
87 | vec.iov_len = buflen; | ||
88 | memset(&msg, 0, sizeof(struct msghdr)); | 89 | memset(&msg, 0, sizeof(struct msghdr)); |
90 | iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen); | ||
89 | krflags = MSG_WAITALL; | 91 | krflags = MSG_WAITALL; |
90 | smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; | 92 | smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; |
91 | len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); | 93 | len = sock_recvmsg(smc->clcsock, &msg, krflags); |
92 | if (len < datlen) { | 94 | if (len < datlen) { |
93 | smc->sk.sk_err = EPROTO; | 95 | smc->sk.sk_err = EPROTO; |
94 | reason_code = -EPROTO; | 96 | reason_code = -EPROTO; |
diff --git a/net/socket.c b/net/socket.c index 42d8e9c9ccd5..2f378449bc1b 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -118,7 +118,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); | |||
118 | static int sock_mmap(struct file *file, struct vm_area_struct *vma); | 118 | static int sock_mmap(struct file *file, struct vm_area_struct *vma); |
119 | 119 | ||
120 | static int sock_close(struct inode *inode, struct file *file); | 120 | static int sock_close(struct inode *inode, struct file *file); |
121 | static unsigned int sock_poll(struct file *file, | 121 | static __poll_t sock_poll(struct file *file, |
122 | struct poll_table_struct *wait); | 122 | struct poll_table_struct *wait); |
123 | static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 123 | static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
124 | #ifdef CONFIG_COMPAT | 124 | #ifdef CONFIG_COMPAT |
@@ -406,8 +406,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) | |||
406 | name.len = strlen(name.name); | 406 | name.len = strlen(name.name); |
407 | } | 407 | } |
408 | path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); | 408 | path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); |
409 | if (unlikely(!path.dentry)) | 409 | if (unlikely(!path.dentry)) { |
410 | sock_release(sock); | ||
410 | return ERR_PTR(-ENOMEM); | 411 | return ERR_PTR(-ENOMEM); |
412 | } | ||
411 | path.mnt = mntget(sock_mnt); | 413 | path.mnt = mntget(sock_mnt); |
412 | 414 | ||
413 | d_instantiate(path.dentry, SOCK_INODE(sock)); | 415 | d_instantiate(path.dentry, SOCK_INODE(sock)); |
@@ -415,9 +417,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) | |||
415 | file = alloc_file(&path, FMODE_READ | FMODE_WRITE, | 417 | file = alloc_file(&path, FMODE_READ | FMODE_WRITE, |
416 | &socket_file_ops); | 418 | &socket_file_ops); |
417 | if (IS_ERR(file)) { | 419 | if (IS_ERR(file)) { |
418 | /* drop dentry, keep inode */ | 420 | /* drop dentry, keep inode for a bit */ |
419 | ihold(d_inode(path.dentry)); | 421 | ihold(d_inode(path.dentry)); |
420 | path_put(&path); | 422 | path_put(&path); |
423 | /* ... and now kill it properly */ | ||
424 | sock_release(sock); | ||
421 | return file; | 425 | return file; |
422 | } | 426 | } |
423 | 427 | ||
@@ -432,8 +436,10 @@ static int sock_map_fd(struct socket *sock, int flags) | |||
432 | { | 436 | { |
433 | struct file *newfile; | 437 | struct file *newfile; |
434 | int fd = get_unused_fd_flags(flags); | 438 | int fd = get_unused_fd_flags(flags); |
435 | if (unlikely(fd < 0)) | 439 | if (unlikely(fd < 0)) { |
440 | sock_release(sock); | ||
436 | return fd; | 441 | return fd; |
442 | } | ||
437 | 443 | ||
438 | newfile = sock_alloc_file(sock, flags, NULL); | 444 | newfile = sock_alloc_file(sock, flags, NULL); |
439 | if (likely(!IS_ERR(newfile))) { | 445 | if (likely(!IS_ERR(newfile))) { |
@@ -1091,9 +1097,9 @@ out_release: | |||
1091 | EXPORT_SYMBOL(sock_create_lite); | 1097 | EXPORT_SYMBOL(sock_create_lite); |
1092 | 1098 | ||
1093 | /* No kernel lock held - perfect */ | 1099 | /* No kernel lock held - perfect */ |
1094 | static unsigned int sock_poll(struct file *file, poll_table *wait) | 1100 | static __poll_t sock_poll(struct file *file, poll_table *wait) |
1095 | { | 1101 | { |
1096 | unsigned int busy_flag = 0; | 1102 | __poll_t busy_flag = 0; |
1097 | struct socket *sock; | 1103 | struct socket *sock; |
1098 | 1104 | ||
1099 | /* | 1105 | /* |
@@ -1330,19 +1336,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) | |||
1330 | 1336 | ||
1331 | retval = sock_create(family, type, protocol, &sock); | 1337 | retval = sock_create(family, type, protocol, &sock); |
1332 | if (retval < 0) | 1338 | if (retval < 0) |
1333 | goto out; | 1339 | return retval; |
1334 | |||
1335 | retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); | ||
1336 | if (retval < 0) | ||
1337 | goto out_release; | ||
1338 | |||
1339 | out: | ||
1340 | /* It may be already another descriptor 8) Not kernel problem. */ | ||
1341 | return retval; | ||
1342 | 1340 | ||
1343 | out_release: | 1341 | return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); |
1344 | sock_release(sock); | ||
1345 | return retval; | ||
1346 | } | 1342 | } |
1347 | 1343 | ||
1348 | /* | 1344 | /* |
@@ -1366,87 +1362,72 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, | |||
1366 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; | 1362 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; |
1367 | 1363 | ||
1368 | /* | 1364 | /* |
1365 | * reserve descriptors and make sure we won't fail | ||
1366 | * to return them to userland. | ||
1367 | */ | ||
1368 | fd1 = get_unused_fd_flags(flags); | ||
1369 | if (unlikely(fd1 < 0)) | ||
1370 | return fd1; | ||
1371 | |||
1372 | fd2 = get_unused_fd_flags(flags); | ||
1373 | if (unlikely(fd2 < 0)) { | ||
1374 | put_unused_fd(fd1); | ||
1375 | return fd2; | ||
1376 | } | ||
1377 | |||
1378 | err = put_user(fd1, &usockvec[0]); | ||
1379 | if (err) | ||
1380 | goto out; | ||
1381 | |||
1382 | err = put_user(fd2, &usockvec[1]); | ||
1383 | if (err) | ||
1384 | goto out; | ||
1385 | |||
1386 | /* | ||
1369 | * Obtain the first socket and check if the underlying protocol | 1387 | * Obtain the first socket and check if the underlying protocol |
1370 | * supports the socketpair call. | 1388 | * supports the socketpair call. |
1371 | */ | 1389 | */ |
1372 | 1390 | ||
1373 | err = sock_create(family, type, protocol, &sock1); | 1391 | err = sock_create(family, type, protocol, &sock1); |
1374 | if (err < 0) | 1392 | if (unlikely(err < 0)) |
1375 | goto out; | 1393 | goto out; |
1376 | 1394 | ||
1377 | err = sock_create(family, type, protocol, &sock2); | 1395 | err = sock_create(family, type, protocol, &sock2); |
1378 | if (err < 0) | 1396 | if (unlikely(err < 0)) { |
1379 | goto out_release_1; | 1397 | sock_release(sock1); |
1380 | 1398 | goto out; | |
1381 | err = sock1->ops->socketpair(sock1, sock2); | ||
1382 | if (err < 0) | ||
1383 | goto out_release_both; | ||
1384 | |||
1385 | fd1 = get_unused_fd_flags(flags); | ||
1386 | if (unlikely(fd1 < 0)) { | ||
1387 | err = fd1; | ||
1388 | goto out_release_both; | ||
1389 | } | 1399 | } |
1390 | 1400 | ||
1391 | fd2 = get_unused_fd_flags(flags); | 1401 | err = sock1->ops->socketpair(sock1, sock2); |
1392 | if (unlikely(fd2 < 0)) { | 1402 | if (unlikely(err < 0)) { |
1393 | err = fd2; | 1403 | sock_release(sock2); |
1394 | goto out_put_unused_1; | 1404 | sock_release(sock1); |
1405 | goto out; | ||
1395 | } | 1406 | } |
1396 | 1407 | ||
1397 | newfile1 = sock_alloc_file(sock1, flags, NULL); | 1408 | newfile1 = sock_alloc_file(sock1, flags, NULL); |
1398 | if (IS_ERR(newfile1)) { | 1409 | if (IS_ERR(newfile1)) { |
1399 | err = PTR_ERR(newfile1); | 1410 | err = PTR_ERR(newfile1); |
1400 | goto out_put_unused_both; | 1411 | sock_release(sock2); |
1412 | goto out; | ||
1401 | } | 1413 | } |
1402 | 1414 | ||
1403 | newfile2 = sock_alloc_file(sock2, flags, NULL); | 1415 | newfile2 = sock_alloc_file(sock2, flags, NULL); |
1404 | if (IS_ERR(newfile2)) { | 1416 | if (IS_ERR(newfile2)) { |
1405 | err = PTR_ERR(newfile2); | 1417 | err = PTR_ERR(newfile2); |
1406 | goto out_fput_1; | 1418 | fput(newfile1); |
1419 | goto out; | ||
1407 | } | 1420 | } |
1408 | 1421 | ||
1409 | err = put_user(fd1, &usockvec[0]); | ||
1410 | if (err) | ||
1411 | goto out_fput_both; | ||
1412 | |||
1413 | err = put_user(fd2, &usockvec[1]); | ||
1414 | if (err) | ||
1415 | goto out_fput_both; | ||
1416 | |||
1417 | audit_fd_pair(fd1, fd2); | 1422 | audit_fd_pair(fd1, fd2); |
1418 | 1423 | ||
1419 | fd_install(fd1, newfile1); | 1424 | fd_install(fd1, newfile1); |
1420 | fd_install(fd2, newfile2); | 1425 | fd_install(fd2, newfile2); |
1421 | /* fd1 and fd2 may be already another descriptors. | ||
1422 | * Not kernel problem. | ||
1423 | */ | ||
1424 | |||
1425 | return 0; | 1426 | return 0; |
1426 | 1427 | ||
1427 | out_fput_both: | 1428 | out: |
1428 | fput(newfile2); | ||
1429 | fput(newfile1); | ||
1430 | put_unused_fd(fd2); | ||
1431 | put_unused_fd(fd1); | ||
1432 | goto out; | ||
1433 | |||
1434 | out_fput_1: | ||
1435 | fput(newfile1); | ||
1436 | put_unused_fd(fd2); | ||
1437 | put_unused_fd(fd1); | ||
1438 | sock_release(sock2); | ||
1439 | goto out; | ||
1440 | |||
1441 | out_put_unused_both: | ||
1442 | put_unused_fd(fd2); | 1429 | put_unused_fd(fd2); |
1443 | out_put_unused_1: | ||
1444 | put_unused_fd(fd1); | 1430 | put_unused_fd(fd1); |
1445 | out_release_both: | ||
1446 | sock_release(sock2); | ||
1447 | out_release_1: | ||
1448 | sock_release(sock1); | ||
1449 | out: | ||
1450 | return err; | 1431 | return err; |
1451 | } | 1432 | } |
1452 | 1433 | ||
@@ -1562,7 +1543,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, | |||
1562 | if (IS_ERR(newfile)) { | 1543 | if (IS_ERR(newfile)) { |
1563 | err = PTR_ERR(newfile); | 1544 | err = PTR_ERR(newfile); |
1564 | put_unused_fd(newfd); | 1545 | put_unused_fd(newfd); |
1565 | sock_release(newsock); | ||
1566 | goto out_put; | 1546 | goto out_put; |
1567 | } | 1547 | } |
1568 | 1548 | ||
@@ -2641,6 +2621,15 @@ out_fs: | |||
2641 | 2621 | ||
2642 | core_initcall(sock_init); /* early initcall */ | 2622 | core_initcall(sock_init); /* early initcall */ |
2643 | 2623 | ||
2624 | static int __init jit_init(void) | ||
2625 | { | ||
2626 | #ifdef CONFIG_BPF_JIT_ALWAYS_ON | ||
2627 | bpf_jit_enable = 1; | ||
2628 | #endif | ||
2629 | return 0; | ||
2630 | } | ||
2631 | pure_initcall(jit_init); | ||
2632 | |||
2644 | #ifdef CONFIG_PROC_FS | 2633 | #ifdef CONFIG_PROC_FS |
2645 | void socket_seq_show(struct seq_file *seq) | 2634 | void socket_seq_show(struct seq_file *seq) |
2646 | { | 2635 | { |
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index c5fda15ba319..1fdab5c4eda8 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c | |||
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp) | |||
401 | * allows a thread in BH context to safely check if the process | 401 | * allows a thread in BH context to safely check if the process |
402 | * lock is held. In this case, if the lock is held, queue work. | 402 | * lock is held. In this case, if the lock is held, queue work. |
403 | */ | 403 | */ |
404 | if (sock_owned_by_user(strp->sk)) { | 404 | if (sock_owned_by_user_nocheck(strp->sk)) { |
405 | queue_work(strp_wq, &strp->work); | 405 | queue_work(strp_wq, &strp->work); |
406 | return; | 406 | return; |
407 | } | 407 | } |
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index c4778cae58ef..444380f968f1 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c | |||
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, | |||
231 | goto out_free_groups; | 231 | goto out_free_groups; |
232 | creds->cr_group_info->gid[i] = kgid; | 232 | creds->cr_group_info->gid[i] = kgid; |
233 | } | 233 | } |
234 | groups_sort(creds->cr_group_info); | ||
234 | 235 | ||
235 | return 0; | 236 | return 0; |
236 | out_free_groups: | 237 | out_free_groups: |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73165e9ca5bf..26531193fce4 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -264,7 +264,7 @@ out: | |||
264 | return status; | 264 | return status; |
265 | } | 265 | } |
266 | 266 | ||
267 | static struct cache_detail rsi_cache_template = { | 267 | static const struct cache_detail rsi_cache_template = { |
268 | .owner = THIS_MODULE, | 268 | .owner = THIS_MODULE, |
269 | .hash_size = RSI_HASHMAX, | 269 | .hash_size = RSI_HASHMAX, |
270 | .name = "auth.rpcsec.init", | 270 | .name = "auth.rpcsec.init", |
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd, | |||
481 | goto out; | 481 | goto out; |
482 | rsci.cred.cr_group_info->gid[i] = kgid; | 482 | rsci.cred.cr_group_info->gid[i] = kgid; |
483 | } | 483 | } |
484 | groups_sort(rsci.cred.cr_group_info); | ||
484 | 485 | ||
485 | /* mech name */ | 486 | /* mech name */ |
486 | len = qword_get(&mesg, buf, mlen); | 487 | len = qword_get(&mesg, buf, mlen); |
@@ -524,7 +525,7 @@ out: | |||
524 | return status; | 525 | return status; |
525 | } | 526 | } |
526 | 527 | ||
527 | static struct cache_detail rsc_cache_template = { | 528 | static const struct cache_detail rsc_cache_template = { |
528 | .owner = THIS_MODULE, | 529 | .owner = THIS_MODULE, |
529 | .hash_size = RSC_HASHMAX, | 530 | .hash_size = RSC_HASHMAX, |
530 | .name = "auth.rpcsec.context", | 531 | .name = "auth.rpcsec.context", |
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 79d55d949d9a..aa36dad32db1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c | |||
@@ -930,10 +930,10 @@ out: | |||
930 | 930 | ||
931 | static DECLARE_WAIT_QUEUE_HEAD(queue_wait); | 931 | static DECLARE_WAIT_QUEUE_HEAD(queue_wait); |
932 | 932 | ||
933 | static unsigned int cache_poll(struct file *filp, poll_table *wait, | 933 | static __poll_t cache_poll(struct file *filp, poll_table *wait, |
934 | struct cache_detail *cd) | 934 | struct cache_detail *cd) |
935 | { | 935 | { |
936 | unsigned int mask; | 936 | __poll_t mask; |
937 | struct cache_reader *rp = filp->private_data; | 937 | struct cache_reader *rp = filp->private_data; |
938 | struct cache_queue *cq; | 938 | struct cache_queue *cq; |
939 | 939 | ||
@@ -1501,7 +1501,7 @@ static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, | |||
1501 | return cache_write(filp, buf, count, ppos, cd); | 1501 | return cache_write(filp, buf, count, ppos, cd); |
1502 | } | 1502 | } |
1503 | 1503 | ||
1504 | static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) | 1504 | static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait) |
1505 | { | 1505 | { |
1506 | struct cache_detail *cd = PDE_DATA(file_inode(filp)); | 1506 | struct cache_detail *cd = PDE_DATA(file_inode(filp)); |
1507 | 1507 | ||
@@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) | |||
1674 | } | 1674 | } |
1675 | EXPORT_SYMBOL_GPL(cache_unregister_net); | 1675 | EXPORT_SYMBOL_GPL(cache_unregister_net); |
1676 | 1676 | ||
1677 | struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) | 1677 | struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net) |
1678 | { | 1678 | { |
1679 | struct cache_detail *cd; | 1679 | struct cache_detail *cd; |
1680 | int i; | 1680 | int i; |
@@ -1720,7 +1720,7 @@ static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, | |||
1720 | return cache_write(filp, buf, count, ppos, cd); | 1720 | return cache_write(filp, buf, count, ppos, cd); |
1721 | } | 1721 | } |
1722 | 1722 | ||
1723 | static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) | 1723 | static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait) |
1724 | { | 1724 | { |
1725 | struct cache_detail *cd = RPC_I(file_inode(filp))->private; | 1725 | struct cache_detail *cd = RPC_I(file_inode(filp))->private; |
1726 | 1726 | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a801da812f86..6e432ecd7f99 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1376,22 +1376,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize | |||
1376 | EXPORT_SYMBOL_GPL(rpc_setbufsize); | 1376 | EXPORT_SYMBOL_GPL(rpc_setbufsize); |
1377 | 1377 | ||
1378 | /** | 1378 | /** |
1379 | * rpc_protocol - Get transport protocol number for an RPC client | ||
1380 | * @clnt: RPC client to query | ||
1381 | * | ||
1382 | */ | ||
1383 | int rpc_protocol(struct rpc_clnt *clnt) | ||
1384 | { | ||
1385 | int protocol; | ||
1386 | |||
1387 | rcu_read_lock(); | ||
1388 | protocol = rcu_dereference(clnt->cl_xprt)->prot; | ||
1389 | rcu_read_unlock(); | ||
1390 | return protocol; | ||
1391 | } | ||
1392 | EXPORT_SYMBOL_GPL(rpc_protocol); | ||
1393 | |||
1394 | /** | ||
1395 | * rpc_net_ns - Get the network namespace for this RPC client | 1379 | * rpc_net_ns - Get the network namespace for this RPC client |
1396 | * @clnt: RPC client to query | 1380 | * @clnt: RPC client to query |
1397 | * | 1381 | * |
@@ -1841,6 +1825,7 @@ call_bind_status(struct rpc_task *task) | |||
1841 | case -ECONNABORTED: | 1825 | case -ECONNABORTED: |
1842 | case -ENOTCONN: | 1826 | case -ENOTCONN: |
1843 | case -EHOSTDOWN: | 1827 | case -EHOSTDOWN: |
1828 | case -ENETDOWN: | ||
1844 | case -EHOSTUNREACH: | 1829 | case -EHOSTUNREACH: |
1845 | case -ENETUNREACH: | 1830 | case -ENETUNREACH: |
1846 | case -ENOBUFS: | 1831 | case -ENOBUFS: |
@@ -1917,6 +1902,7 @@ call_connect_status(struct rpc_task *task) | |||
1917 | /* fall through */ | 1902 | /* fall through */ |
1918 | case -ECONNRESET: | 1903 | case -ECONNRESET: |
1919 | case -ECONNABORTED: | 1904 | case -ECONNABORTED: |
1905 | case -ENETDOWN: | ||
1920 | case -ENETUNREACH: | 1906 | case -ENETUNREACH: |
1921 | case -EHOSTUNREACH: | 1907 | case -EHOSTUNREACH: |
1922 | case -EADDRINUSE: | 1908 | case -EADDRINUSE: |
@@ -2022,6 +2008,7 @@ call_transmit_status(struct rpc_task *task) | |||
2022 | */ | 2008 | */ |
2023 | case -ECONNREFUSED: | 2009 | case -ECONNREFUSED: |
2024 | case -EHOSTDOWN: | 2010 | case -EHOSTDOWN: |
2011 | case -ENETDOWN: | ||
2025 | case -EHOSTUNREACH: | 2012 | case -EHOSTUNREACH: |
2026 | case -ENETUNREACH: | 2013 | case -ENETUNREACH: |
2027 | case -EPERM: | 2014 | case -EPERM: |
@@ -2071,6 +2058,7 @@ call_bc_transmit(struct rpc_task *task) | |||
2071 | switch (task->tk_status) { | 2058 | switch (task->tk_status) { |
2072 | case 0: | 2059 | case 0: |
2073 | /* Success */ | 2060 | /* Success */ |
2061 | case -ENETDOWN: | ||
2074 | case -EHOSTDOWN: | 2062 | case -EHOSTDOWN: |
2075 | case -EHOSTUNREACH: | 2063 | case -EHOSTUNREACH: |
2076 | case -ENETUNREACH: | 2064 | case -ENETUNREACH: |
@@ -2139,6 +2127,7 @@ call_status(struct rpc_task *task) | |||
2139 | task->tk_status = 0; | 2127 | task->tk_status = 0; |
2140 | switch(status) { | 2128 | switch(status) { |
2141 | case -EHOSTDOWN: | 2129 | case -EHOSTDOWN: |
2130 | case -ENETDOWN: | ||
2142 | case -EHOSTUNREACH: | 2131 | case -EHOSTUNREACH: |
2143 | case -ENETUNREACH: | 2132 | case -ENETUNREACH: |
2144 | case -EPERM: | 2133 | case -EPERM: |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7803f3b6aa53..5c4330325787 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -340,12 +340,12 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of | |||
340 | return res; | 340 | return res; |
341 | } | 341 | } |
342 | 342 | ||
343 | static unsigned int | 343 | static __poll_t |
344 | rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) | 344 | rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) |
345 | { | 345 | { |
346 | struct inode *inode = file_inode(filp); | 346 | struct inode *inode = file_inode(filp); |
347 | struct rpc_inode *rpci = RPC_I(inode); | 347 | struct rpc_inode *rpci = RPC_I(inode); |
348 | unsigned int mask = POLLOUT | POLLWRNORM; | 348 | __poll_t mask = POLLOUT | POLLWRNORM; |
349 | 349 | ||
350 | poll_wait(filp, &rpci->waitq, wait); | 350 | poll_wait(filp, &rpci->waitq, wait); |
351 | 351 | ||
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b1b49edd7c4d..896691afbb1a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task) | |||
755 | void (*do_action)(struct rpc_task *); | 755 | void (*do_action)(struct rpc_task *); |
756 | 756 | ||
757 | /* | 757 | /* |
758 | * Execute any pending callback first. | 758 | * Perform the next FSM step or a pending callback. |
759 | * | ||
760 | * tk_action may be NULL if the task has been killed. | ||
761 | * In particular, note that rpc_killall_tasks may | ||
762 | * do this at any time, so beware when dereferencing. | ||
759 | */ | 763 | */ |
760 | do_action = task->tk_callback; | 764 | do_action = task->tk_action; |
761 | task->tk_callback = NULL; | 765 | if (task->tk_callback) { |
762 | if (do_action == NULL) { | 766 | do_action = task->tk_callback; |
763 | /* | 767 | task->tk_callback = NULL; |
764 | * Perform the next FSM step. | ||
765 | * tk_action may be NULL if the task has been killed. | ||
766 | * In particular, note that rpc_killall_tasks may | ||
767 | * do this at any time, so beware when dereferencing. | ||
768 | */ | ||
769 | do_action = task->tk_action; | ||
770 | if (do_action == NULL) | ||
771 | break; | ||
772 | } | 768 | } |
773 | trace_rpc_task_run_action(task->tk_client, task, task->tk_action); | 769 | if (!do_action) |
770 | break; | ||
771 | trace_rpc_task_run_action(task->tk_client, task, do_action); | ||
774 | do_action(task); | 772 | do_action(task); |
775 | 773 | ||
776 | /* | 774 | /* |
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index f81eaa8e0888..af7f28fb8102 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c | |||
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd, | |||
520 | ug.gi->gid[i] = kgid; | 520 | ug.gi->gid[i] = kgid; |
521 | } | 521 | } |
522 | 522 | ||
523 | groups_sort(ug.gi); | ||
523 | ugp = unix_gid_lookup(cd, uid); | 524 | ugp = unix_gid_lookup(cd, uid); |
524 | if (ugp) { | 525 | if (ugp) { |
525 | struct cache_head *ch; | 526 | struct cache_head *ch; |
@@ -569,7 +570,7 @@ static int unix_gid_show(struct seq_file *m, | |||
569 | return 0; | 570 | return 0; |
570 | } | 571 | } |
571 | 572 | ||
572 | static struct cache_detail unix_gid_cache_template = { | 573 | static const struct cache_detail unix_gid_cache_template = { |
573 | .owner = THIS_MODULE, | 574 | .owner = THIS_MODULE, |
574 | .hash_size = GID_HASHMAX, | 575 | .hash_size = GID_HASHMAX, |
575 | .name = "auth.unix.gid", | 576 | .name = "auth.unix.gid", |
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
819 | kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); | 820 | kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); |
820 | cred->cr_group_info->gid[i] = kgid; | 821 | cred->cr_group_info->gid[i] = kgid; |
821 | } | 822 | } |
823 | groups_sort(cred->cr_group_info); | ||
822 | if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { | 824 | if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { |
823 | *authp = rpc_autherr_badverf; | 825 | *authp = rpc_autherr_badverf; |
824 | return SVC_DENIED; | 826 | return SVC_DENIED; |
@@ -862,7 +864,7 @@ struct auth_ops svcauth_unix = { | |||
862 | .set_client = svcauth_unix_set_client, | 864 | .set_client = svcauth_unix_set_client, |
863 | }; | 865 | }; |
864 | 866 | ||
865 | static struct cache_detail ip_map_cache_template = { | 867 | static const struct cache_detail ip_map_cache_template = { |
866 | .owner = THIS_MODULE, | 868 | .owner = THIS_MODULE, |
867 | .hash_size = IP_HASHMAX, | 869 | .hash_size = IP_HASHMAX, |
868 | .name = "auth.unix.ip", | 870 | .name = "auth.unix.ip", |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ff8e06cd067e..5570719e4787 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -338,8 +338,8 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, | |||
338 | rqstp->rq_xprt_hlen = 0; | 338 | rqstp->rq_xprt_hlen = 0; |
339 | 339 | ||
340 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); | 340 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
341 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, | 341 | iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen); |
342 | msg.msg_flags); | 342 | len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags); |
343 | /* If we read a full record, then assume there may be more | 343 | /* If we read a full record, then assume there may be more |
344 | * data to read (stream based sockets only!) | 344 | * data to read (stream based sockets only!) |
345 | */ | 345 | */ |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 333b9d697ae5..2436fd1125fc 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task) | |||
940 | 940 | ||
941 | if (task->tk_status != -ETIMEDOUT) | 941 | if (task->tk_status != -ETIMEDOUT) |
942 | return; | 942 | return; |
943 | dprintk("RPC: %5u xprt_timer\n", task->tk_pid); | ||
944 | 943 | ||
944 | trace_xprt_timer(xprt, req->rq_xid, task->tk_status); | ||
945 | if (!req->rq_reply_bytes_recvd) { | 945 | if (!req->rq_reply_bytes_recvd) { |
946 | if (xprt->ops->timer) | 946 | if (xprt->ops->timer) |
947 | xprt->ops->timer(xprt, task); | 947 | xprt->ops->timer(xprt, task); |
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task) | |||
1001 | { | 1001 | { |
1002 | struct rpc_rqst *req = task->tk_rqstp; | 1002 | struct rpc_rqst *req = task->tk_rqstp; |
1003 | struct rpc_xprt *xprt = req->rq_xprt; | 1003 | struct rpc_xprt *xprt = req->rq_xprt; |
1004 | unsigned int connect_cookie; | ||
1004 | int status, numreqs; | 1005 | int status, numreqs; |
1005 | 1006 | ||
1006 | dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); | 1007 | dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); |
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task) | |||
1024 | } else if (!req->rq_bytes_sent) | 1025 | } else if (!req->rq_bytes_sent) |
1025 | return; | 1026 | return; |
1026 | 1027 | ||
1028 | connect_cookie = xprt->connect_cookie; | ||
1027 | req->rq_xtime = ktime_get(); | 1029 | req->rq_xtime = ktime_get(); |
1028 | status = xprt->ops->send_request(task); | 1030 | status = xprt->ops->send_request(task); |
1029 | trace_xprt_transmit(xprt, req->rq_xid, status); | 1031 | trace_xprt_transmit(xprt, req->rq_xid, status); |
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task) | |||
1047 | xprt->stat.bklog_u += xprt->backlog.qlen; | 1049 | xprt->stat.bklog_u += xprt->backlog.qlen; |
1048 | xprt->stat.sending_u += xprt->sending.qlen; | 1050 | xprt->stat.sending_u += xprt->sending.qlen; |
1049 | xprt->stat.pending_u += xprt->pending.qlen; | 1051 | xprt->stat.pending_u += xprt->pending.qlen; |
1052 | spin_unlock_bh(&xprt->transport_lock); | ||
1050 | 1053 | ||
1051 | /* Don't race with disconnect */ | 1054 | req->rq_connect_cookie = connect_cookie; |
1052 | if (!xprt_connected(xprt)) | 1055 | if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) { |
1053 | task->tk_status = -ENOTCONN; | ||
1054 | else { | ||
1055 | /* | 1056 | /* |
1056 | * Sleep on the pending queue since | 1057 | * Sleep on the pending queue if we're expecting a reply. |
1057 | * we're expecting a reply. | 1058 | * The spinlock ensures atomicity between the test of |
1059 | * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on(). | ||
1058 | */ | 1060 | */ |
1059 | if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) | 1061 | spin_lock(&xprt->recv_lock); |
1062 | if (!req->rq_reply_bytes_recvd) { | ||
1060 | rpc_sleep_on(&xprt->pending, task, xprt_timer); | 1063 | rpc_sleep_on(&xprt->pending, task, xprt_timer); |
1061 | req->rq_connect_cookie = xprt->connect_cookie; | 1064 | /* |
1065 | * Send an extra queue wakeup call if the | ||
1066 | * connection was dropped in case the call to | ||
1067 | * rpc_sleep_on() raced. | ||
1068 | */ | ||
1069 | if (!xprt_connected(xprt)) | ||
1070 | xprt_wake_pending_tasks(xprt, -ENOTCONN); | ||
1071 | } | ||
1072 | spin_unlock(&xprt->recv_lock); | ||
1062 | } | 1073 | } |
1063 | spin_unlock_bh(&xprt->transport_lock); | ||
1064 | } | 1074 | } |
1065 | 1075 | ||
1066 | static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) | 1076 | static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) |
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 8b818bb3518a..ed1a4a3065ee 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, | |||
43 | req = rpcrdma_create_req(r_xprt); | 43 | req = rpcrdma_create_req(r_xprt); |
44 | if (IS_ERR(req)) | 44 | if (IS_ERR(req)) |
45 | return PTR_ERR(req); | 45 | return PTR_ERR(req); |
46 | __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags); | ||
47 | 46 | ||
48 | rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, | 47 | rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, |
49 | DMA_TO_DEVICE, GFP_KERNEL); | 48 | DMA_TO_DEVICE, GFP_KERNEL); |
@@ -74,21 +73,13 @@ out_fail: | |||
74 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | 73 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, |
75 | unsigned int count) | 74 | unsigned int count) |
76 | { | 75 | { |
77 | struct rpcrdma_rep *rep; | ||
78 | int rc = 0; | 76 | int rc = 0; |
79 | 77 | ||
80 | while (count--) { | 78 | while (count--) { |
81 | rep = rpcrdma_create_rep(r_xprt); | 79 | rc = rpcrdma_create_rep(r_xprt); |
82 | if (IS_ERR(rep)) { | 80 | if (rc) |
83 | pr_err("RPC: %s: reply buffer alloc failed\n", | ||
84 | __func__); | ||
85 | rc = PTR_ERR(rep); | ||
86 | break; | 81 | break; |
87 | } | ||
88 | |||
89 | rpcrdma_recv_buffer_put(rep); | ||
90 | } | 82 | } |
91 | |||
92 | return rc; | 83 | return rc; |
93 | } | 84 | } |
94 | 85 | ||
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
129 | rqst->rq_xprt = &r_xprt->rx_xprt; | 120 | rqst->rq_xprt = &r_xprt->rx_xprt; |
130 | INIT_LIST_HEAD(&rqst->rq_list); | 121 | INIT_LIST_HEAD(&rqst->rq_list); |
131 | INIT_LIST_HEAD(&rqst->rq_bc_list); | 122 | INIT_LIST_HEAD(&rqst->rq_bc_list); |
123 | __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | ||
132 | 124 | ||
133 | if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) | 125 | if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) |
134 | goto out_free; | 126 | goto out_free; |
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
148 | 140 | ||
149 | buffer->rb_bc_srv_max_requests = reqs; | 141 | buffer->rb_bc_srv_max_requests = reqs; |
150 | request_module("svcrdma"); | 142 | request_module("svcrdma"); |
151 | 143 | trace_xprtrdma_cb_setup(r_xprt, reqs); | |
152 | return 0; | 144 | return 0; |
153 | 145 | ||
154 | out_free: | 146 | out_free: |
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) | |||
196 | return maxmsg - RPCRDMA_HDRLEN_MIN; | 188 | return maxmsg - RPCRDMA_HDRLEN_MIN; |
197 | } | 189 | } |
198 | 190 | ||
199 | /** | 191 | static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) |
200 | * rpcrdma_bc_marshal_reply - Send backwards direction reply | ||
201 | * @rqst: buffer containing RPC reply data | ||
202 | * | ||
203 | * Returns zero on success. | ||
204 | */ | ||
205 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | ||
206 | { | 192 | { |
207 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 193 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
208 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 194 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | |||
226 | if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, | 212 | if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, |
227 | &rqst->rq_snd_buf, rpcrdma_noch)) | 213 | &rqst->rq_snd_buf, rpcrdma_noch)) |
228 | return -EIO; | 214 | return -EIO; |
215 | |||
216 | trace_xprtrdma_cb_reply(rqst); | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * xprt_rdma_bc_send_reply - marshal and send a backchannel reply | ||
222 | * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf | ||
223 | * | ||
224 | * Caller holds the transport's write lock. | ||
225 | * | ||
226 | * Returns: | ||
227 | * %0 if the RPC message has been sent | ||
228 | * %-ENOTCONN if the caller should reconnect and call again | ||
229 | * %-EIO if a permanent error occurred and the request was not | ||
230 | * sent. Do not try to send this message again. | ||
231 | */ | ||
232 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) | ||
233 | { | ||
234 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | ||
235 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
236 | int rc; | ||
237 | |||
238 | if (!xprt_connected(rqst->rq_xprt)) | ||
239 | goto drop_connection; | ||
240 | |||
241 | rc = rpcrdma_bc_marshal_reply(rqst); | ||
242 | if (rc < 0) | ||
243 | goto failed_marshal; | ||
244 | |||
245 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | ||
246 | goto drop_connection; | ||
229 | return 0; | 247 | return 0; |
248 | |||
249 | failed_marshal: | ||
250 | if (rc != -ENOTCONN) | ||
251 | return rc; | ||
252 | drop_connection: | ||
253 | xprt_disconnect_done(rqst->rq_xprt); | ||
254 | return -ENOTCONN; | ||
230 | } | 255 | } |
231 | 256 | ||
232 | /** | 257 | /** |
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
262 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", | 287 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", |
263 | __func__, rqst, rpcr_to_rdmar(rqst)); | 288 | __func__, rqst, rpcr_to_rdmar(rqst)); |
264 | 289 | ||
265 | smp_mb__before_atomic(); | ||
266 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); | ||
267 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | ||
268 | smp_mb__after_atomic(); | ||
269 | |||
270 | spin_lock_bh(&xprt->bc_pa_lock); | 290 | spin_lock_bh(&xprt->bc_pa_lock); |
271 | list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); | 291 | list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); |
272 | spin_unlock_bh(&xprt->bc_pa_lock); | 292 | spin_unlock_bh(&xprt->bc_pa_lock); |
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
274 | 294 | ||
275 | /** | 295 | /** |
276 | * rpcrdma_bc_receive_call - Handle a backward direction call | 296 | * rpcrdma_bc_receive_call - Handle a backward direction call |
277 | * @xprt: transport receiving the call | 297 | * @r_xprt: transport receiving the call |
278 | * @rep: receive buffer containing the call | 298 | * @rep: receive buffer containing the call |
279 | * | 299 | * |
280 | * Operational assumptions: | 300 | * Operational assumptions: |
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
313 | struct rpc_rqst, rq_bc_pa_list); | 333 | struct rpc_rqst, rq_bc_pa_list); |
314 | list_del(&rqst->rq_bc_pa_list); | 334 | list_del(&rqst->rq_bc_pa_list); |
315 | spin_unlock(&xprt->bc_pa_lock); | 335 | spin_unlock(&xprt->bc_pa_lock); |
316 | dprintk("RPC: %s: using rqst %p\n", __func__, rqst); | ||
317 | 336 | ||
318 | /* Prepare rqst */ | 337 | /* Prepare rqst */ |
319 | rqst->rq_reply_bytes_recvd = 0; | 338 | rqst->rq_reply_bytes_recvd = 0; |
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
321 | rqst->rq_xid = *p; | 340 | rqst->rq_xid = *p; |
322 | 341 | ||
323 | rqst->rq_private_buf.len = size; | 342 | rqst->rq_private_buf.len = size; |
324 | set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | ||
325 | 343 | ||
326 | buf = &rqst->rq_rcv_buf; | 344 | buf = &rqst->rq_rcv_buf; |
327 | memset(buf, 0, sizeof(*buf)); | 345 | memset(buf, 0, sizeof(*buf)); |
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
335 | * the Upper Layer is done decoding it. | 353 | * the Upper Layer is done decoding it. |
336 | */ | 354 | */ |
337 | req = rpcr_to_rdmar(rqst); | 355 | req = rpcr_to_rdmar(rqst); |
338 | dprintk("RPC: %s: attaching rep %p to req %p\n", | ||
339 | __func__, rep, req); | ||
340 | req->rl_reply = rep; | 356 | req->rl_reply = rep; |
341 | 357 | trace_xprtrdma_cb_call(rqst); | |
342 | /* Defeat the retransmit detection logic in send_request */ | ||
343 | req->rl_connect_cookie = 0; | ||
344 | 358 | ||
345 | /* Queue rqst for ULP's callback service */ | 359 | /* Queue rqst for ULP's callback service */ |
346 | bc_serv = xprt->bc_serv; | 360 | bc_serv = xprt->bc_serv; |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 29fc84c7ff98..d5f95bb39300 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -1,6 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | 2 | /* |
3 | * Copyright (c) 2015 Oracle. All rights reserved. | 3 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | 4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | */ | 5 | */ |
6 | 6 | ||
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia) | |||
47 | } | 47 | } |
48 | 48 | ||
49 | static int | 49 | static int |
50 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) | 50 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
51 | { | 51 | { |
52 | static struct ib_fmr_attr fmr_attr = { | 52 | static struct ib_fmr_attr fmr_attr = { |
53 | .max_pages = RPCRDMA_MAX_FMR_SGES, | 53 | .max_pages = RPCRDMA_MAX_FMR_SGES, |
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) | |||
55 | .page_shift = PAGE_SHIFT | 55 | .page_shift = PAGE_SHIFT |
56 | }; | 56 | }; |
57 | 57 | ||
58 | mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, | 58 | mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, |
59 | sizeof(u64), GFP_KERNEL); | 59 | sizeof(u64), GFP_KERNEL); |
60 | if (!mw->fmr.fm_physaddrs) | 60 | if (!mr->fmr.fm_physaddrs) |
61 | goto out_free; | 61 | goto out_free; |
62 | 62 | ||
63 | mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, | 63 | mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, |
64 | sizeof(*mw->mw_sg), GFP_KERNEL); | 64 | sizeof(*mr->mr_sg), GFP_KERNEL); |
65 | if (!mw->mw_sg) | 65 | if (!mr->mr_sg) |
66 | goto out_free; | 66 | goto out_free; |
67 | 67 | ||
68 | sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); | 68 | sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); |
69 | 69 | ||
70 | mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, | 70 | mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, |
71 | &fmr_attr); | 71 | &fmr_attr); |
72 | if (IS_ERR(mw->fmr.fm_mr)) | 72 | if (IS_ERR(mr->fmr.fm_mr)) |
73 | goto out_fmr_err; | 73 | goto out_fmr_err; |
74 | 74 | ||
75 | return 0; | 75 | return 0; |
76 | 76 | ||
77 | out_fmr_err: | 77 | out_fmr_err: |
78 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, | 78 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, |
79 | PTR_ERR(mw->fmr.fm_mr)); | 79 | PTR_ERR(mr->fmr.fm_mr)); |
80 | 80 | ||
81 | out_free: | 81 | out_free: |
82 | kfree(mw->mw_sg); | 82 | kfree(mr->mr_sg); |
83 | kfree(mw->fmr.fm_physaddrs); | 83 | kfree(mr->fmr.fm_physaddrs); |
84 | return -ENOMEM; | 84 | return -ENOMEM; |
85 | } | 85 | } |
86 | 86 | ||
87 | static int | 87 | static int |
88 | __fmr_unmap(struct rpcrdma_mw *mw) | 88 | __fmr_unmap(struct rpcrdma_mr *mr) |
89 | { | 89 | { |
90 | LIST_HEAD(l); | 90 | LIST_HEAD(l); |
91 | int rc; | 91 | int rc; |
92 | 92 | ||
93 | list_add(&mw->fmr.fm_mr->list, &l); | 93 | list_add(&mr->fmr.fm_mr->list, &l); |
94 | rc = ib_unmap_fmr(&l); | 94 | rc = ib_unmap_fmr(&l); |
95 | list_del(&mw->fmr.fm_mr->list); | 95 | list_del(&mr->fmr.fm_mr->list); |
96 | return rc; | 96 | return rc; |
97 | } | 97 | } |
98 | 98 | ||
99 | static void | 99 | static void |
100 | fmr_op_release_mr(struct rpcrdma_mw *r) | 100 | fmr_op_release_mr(struct rpcrdma_mr *mr) |
101 | { | 101 | { |
102 | LIST_HEAD(unmap_list); | 102 | LIST_HEAD(unmap_list); |
103 | int rc; | 103 | int rc; |
104 | 104 | ||
105 | /* Ensure MW is not on any rl_registered list */ | 105 | /* Ensure MW is not on any rl_registered list */ |
106 | if (!list_empty(&r->mw_list)) | 106 | if (!list_empty(&mr->mr_list)) |
107 | list_del(&r->mw_list); | 107 | list_del(&mr->mr_list); |
108 | 108 | ||
109 | kfree(r->fmr.fm_physaddrs); | 109 | kfree(mr->fmr.fm_physaddrs); |
110 | kfree(r->mw_sg); | 110 | kfree(mr->mr_sg); |
111 | 111 | ||
112 | /* In case this one was left mapped, try to unmap it | 112 | /* In case this one was left mapped, try to unmap it |
113 | * to prevent dealloc_fmr from failing with EBUSY | 113 | * to prevent dealloc_fmr from failing with EBUSY |
114 | */ | 114 | */ |
115 | rc = __fmr_unmap(r); | 115 | rc = __fmr_unmap(mr); |
116 | if (rc) | 116 | if (rc) |
117 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", | 117 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", |
118 | r, rc); | 118 | mr, rc); |
119 | 119 | ||
120 | rc = ib_dealloc_fmr(r->fmr.fm_mr); | 120 | rc = ib_dealloc_fmr(mr->fmr.fm_mr); |
121 | if (rc) | 121 | if (rc) |
122 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", | 122 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", |
123 | r, rc); | 123 | mr, rc); |
124 | 124 | ||
125 | kfree(r); | 125 | kfree(mr); |
126 | } | 126 | } |
127 | 127 | ||
128 | /* Reset of a single FMR. | 128 | /* Reset of a single FMR. |
129 | */ | 129 | */ |
130 | static void | 130 | static void |
131 | fmr_op_recover_mr(struct rpcrdma_mw *mw) | 131 | fmr_op_recover_mr(struct rpcrdma_mr *mr) |
132 | { | 132 | { |
133 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | 133 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
134 | int rc; | 134 | int rc; |
135 | 135 | ||
136 | /* ORDER: invalidate first */ | 136 | /* ORDER: invalidate first */ |
137 | rc = __fmr_unmap(mw); | 137 | rc = __fmr_unmap(mr); |
138 | |||
139 | /* ORDER: then DMA unmap */ | ||
140 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | ||
141 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
142 | if (rc) | 138 | if (rc) |
143 | goto out_release; | 139 | goto out_release; |
144 | 140 | ||
145 | rpcrdma_put_mw(r_xprt, mw); | 141 | /* ORDER: then DMA unmap */ |
142 | rpcrdma_mr_unmap_and_put(mr); | ||
143 | |||
146 | r_xprt->rx_stats.mrs_recovered++; | 144 | r_xprt->rx_stats.mrs_recovered++; |
147 | return; | 145 | return; |
148 | 146 | ||
149 | out_release: | 147 | out_release: |
150 | pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); | 148 | pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr); |
151 | r_xprt->rx_stats.mrs_orphaned++; | 149 | r_xprt->rx_stats.mrs_orphaned++; |
152 | 150 | ||
153 | spin_lock(&r_xprt->rx_buf.rb_mwlock); | 151 | trace_xprtrdma_dma_unmap(mr); |
154 | list_del(&mw->mw_all); | 152 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
155 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); | 153 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
154 | |||
155 | spin_lock(&r_xprt->rx_buf.rb_mrlock); | ||
156 | list_del(&mr->mr_all); | ||
157 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); | ||
156 | 158 | ||
157 | fmr_op_release_mr(mw); | 159 | fmr_op_release_mr(mr); |
158 | } | 160 | } |
159 | 161 | ||
160 | static int | 162 | static int |
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
180 | */ | 182 | */ |
181 | static struct rpcrdma_mr_seg * | 183 | static struct rpcrdma_mr_seg * |
182 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | 184 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
183 | int nsegs, bool writing, struct rpcrdma_mw **out) | 185 | int nsegs, bool writing, struct rpcrdma_mr **out) |
184 | { | 186 | { |
185 | struct rpcrdma_mr_seg *seg1 = seg; | 187 | struct rpcrdma_mr_seg *seg1 = seg; |
186 | int len, pageoff, i, rc; | 188 | int len, pageoff, i, rc; |
187 | struct rpcrdma_mw *mw; | 189 | struct rpcrdma_mr *mr; |
188 | u64 *dma_pages; | 190 | u64 *dma_pages; |
189 | 191 | ||
190 | mw = rpcrdma_get_mw(r_xprt); | 192 | mr = rpcrdma_mr_get(r_xprt); |
191 | if (!mw) | 193 | if (!mr) |
192 | return ERR_PTR(-ENOBUFS); | 194 | return ERR_PTR(-ENOBUFS); |
193 | 195 | ||
194 | pageoff = offset_in_page(seg1->mr_offset); | 196 | pageoff = offset_in_page(seg1->mr_offset); |
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
199 | nsegs = RPCRDMA_MAX_FMR_SGES; | 201 | nsegs = RPCRDMA_MAX_FMR_SGES; |
200 | for (i = 0; i < nsegs;) { | 202 | for (i = 0; i < nsegs;) { |
201 | if (seg->mr_page) | 203 | if (seg->mr_page) |
202 | sg_set_page(&mw->mw_sg[i], | 204 | sg_set_page(&mr->mr_sg[i], |
203 | seg->mr_page, | 205 | seg->mr_page, |
204 | seg->mr_len, | 206 | seg->mr_len, |
205 | offset_in_page(seg->mr_offset)); | 207 | offset_in_page(seg->mr_offset)); |
206 | else | 208 | else |
207 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, | 209 | sg_set_buf(&mr->mr_sg[i], seg->mr_offset, |
208 | seg->mr_len); | 210 | seg->mr_len); |
209 | len += seg->mr_len; | 211 | len += seg->mr_len; |
210 | ++seg; | 212 | ++seg; |
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
214 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 216 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
215 | break; | 217 | break; |
216 | } | 218 | } |
217 | mw->mw_dir = rpcrdma_data_dir(writing); | 219 | mr->mr_dir = rpcrdma_data_dir(writing); |
218 | 220 | ||
219 | mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, | 221 | mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, |
220 | mw->mw_sg, i, mw->mw_dir); | 222 | mr->mr_sg, i, mr->mr_dir); |
221 | if (!mw->mw_nents) | 223 | if (!mr->mr_nents) |
222 | goto out_dmamap_err; | 224 | goto out_dmamap_err; |
223 | 225 | ||
224 | for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) | 226 | for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) |
225 | dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); | 227 | dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); |
226 | rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, | 228 | rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents, |
227 | dma_pages[0]); | 229 | dma_pages[0]); |
228 | if (rc) | 230 | if (rc) |
229 | goto out_maperr; | 231 | goto out_maperr; |
230 | 232 | ||
231 | mw->mw_handle = mw->fmr.fm_mr->rkey; | 233 | mr->mr_handle = mr->fmr.fm_mr->rkey; |
232 | mw->mw_length = len; | 234 | mr->mr_length = len; |
233 | mw->mw_offset = dma_pages[0] + pageoff; | 235 | mr->mr_offset = dma_pages[0] + pageoff; |
234 | 236 | ||
235 | *out = mw; | 237 | *out = mr; |
236 | return seg; | 238 | return seg; |
237 | 239 | ||
238 | out_dmamap_err: | 240 | out_dmamap_err: |
239 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", | 241 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", |
240 | mw->mw_sg, i); | 242 | mr->mr_sg, i); |
241 | rpcrdma_put_mw(r_xprt, mw); | 243 | rpcrdma_mr_put(mr); |
242 | return ERR_PTR(-EIO); | 244 | return ERR_PTR(-EIO); |
243 | 245 | ||
244 | out_maperr: | 246 | out_maperr: |
245 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | 247 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", |
246 | len, (unsigned long long)dma_pages[0], | 248 | len, (unsigned long long)dma_pages[0], |
247 | pageoff, mw->mw_nents, rc); | 249 | pageoff, mr->mr_nents, rc); |
248 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | 250 | rpcrdma_mr_unmap_and_put(mr); |
249 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
250 | rpcrdma_put_mw(r_xprt, mw); | ||
251 | return ERR_PTR(-EIO); | 251 | return ERR_PTR(-EIO); |
252 | } | 252 | } |
253 | 253 | ||
@@ -256,13 +256,13 @@ out_maperr: | |||
256 | * Sleeps until it is safe for the host CPU to access the | 256 | * Sleeps until it is safe for the host CPU to access the |
257 | * previously mapped memory regions. | 257 | * previously mapped memory regions. |
258 | * | 258 | * |
259 | * Caller ensures that @mws is not empty before the call. This | 259 | * Caller ensures that @mrs is not empty before the call. This |
260 | * function empties the list. | 260 | * function empties the list. |
261 | */ | 261 | */ |
262 | static void | 262 | static void |
263 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | 263 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) |
264 | { | 264 | { |
265 | struct rpcrdma_mw *mw; | 265 | struct rpcrdma_mr *mr; |
266 | LIST_HEAD(unmap_list); | 266 | LIST_HEAD(unmap_list); |
267 | int rc; | 267 | int rc; |
268 | 268 | ||
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
271 | * ib_unmap_fmr() is slow, so use a single call instead | 271 | * ib_unmap_fmr() is slow, so use a single call instead |
272 | * of one call per mapped FMR. | 272 | * of one call per mapped FMR. |
273 | */ | 273 | */ |
274 | list_for_each_entry(mw, mws, mw_list) { | 274 | list_for_each_entry(mr, mrs, mr_list) { |
275 | dprintk("RPC: %s: unmapping fmr %p\n", | 275 | dprintk("RPC: %s: unmapping fmr %p\n", |
276 | __func__, &mw->fmr); | 276 | __func__, &mr->fmr); |
277 | list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); | 277 | trace_xprtrdma_localinv(mr); |
278 | list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); | ||
278 | } | 279 | } |
279 | r_xprt->rx_stats.local_inv_needed++; | 280 | r_xprt->rx_stats.local_inv_needed++; |
280 | rc = ib_unmap_fmr(&unmap_list); | 281 | rc = ib_unmap_fmr(&unmap_list); |
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
284 | /* ORDER: Now DMA unmap all of the req's MRs, and return | 285 | /* ORDER: Now DMA unmap all of the req's MRs, and return |
285 | * them to the free MW list. | 286 | * them to the free MW list. |
286 | */ | 287 | */ |
287 | while (!list_empty(mws)) { | 288 | while (!list_empty(mrs)) { |
288 | mw = rpcrdma_pop_mw(mws); | 289 | mr = rpcrdma_mr_pop(mrs); |
289 | dprintk("RPC: %s: DMA unmapping fmr %p\n", | 290 | list_del(&mr->fmr.fm_mr->list); |
290 | __func__, &mw->fmr); | 291 | rpcrdma_mr_unmap_and_put(mr); |
291 | list_del(&mw->fmr.fm_mr->list); | ||
292 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | ||
293 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
294 | rpcrdma_put_mw(r_xprt, mw); | ||
295 | } | 292 | } |
296 | 293 | ||
297 | return; | 294 | return; |
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
299 | out_reset: | 296 | out_reset: |
300 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); | 297 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); |
301 | 298 | ||
302 | while (!list_empty(mws)) { | 299 | while (!list_empty(mrs)) { |
303 | mw = rpcrdma_pop_mw(mws); | 300 | mr = rpcrdma_mr_pop(mrs); |
304 | list_del(&mw->fmr.fm_mr->list); | 301 | list_del(&mr->fmr.fm_mr->list); |
305 | fmr_op_recover_mr(mw); | 302 | fmr_op_recover_mr(mr); |
306 | } | 303 | } |
307 | } | 304 | } |
308 | 305 | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 773e66e10a15..90f688f19783 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -1,11 +1,11 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | 2 | /* |
3 | * Copyright (c) 2015 Oracle. All rights reserved. | 3 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | 4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* Lightweight memory registration using Fast Registration Work | 7 | /* Lightweight memory registration using Fast Registration Work |
8 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | 8 | * Requests (FRWR). |
9 | * | 9 | * |
10 | * FRWR features ordered asynchronous registration and deregistration | 10 | * FRWR features ordered asynchronous registration and deregistration |
11 | * of arbitrarily sized memory regions. This is the fastest and safest | 11 | * of arbitrarily sized memory regions. This is the fastest and safest |
@@ -15,9 +15,9 @@ | |||
15 | /* Normal operation | 15 | /* Normal operation |
16 | * | 16 | * |
17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | 17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG |
18 | * Work Request (frmr_op_map). When the RDMA operation is finished, this | 18 | * Work Request (frwr_op_map). When the RDMA operation is finished, this |
19 | * Memory Region is invalidated using a LOCAL_INV Work Request | 19 | * Memory Region is invalidated using a LOCAL_INV Work Request |
20 | * (frmr_op_unmap). | 20 | * (frwr_op_unmap_sync). |
21 | * | 21 | * |
22 | * Typically these Work Requests are not signaled, and neither are RDMA | 22 | * Typically these Work Requests are not signaled, and neither are RDMA |
23 | * SEND Work Requests (with the exception of signaling occasionally to | 23 | * SEND Work Requests (with the exception of signaling occasionally to |
@@ -26,7 +26,7 @@ | |||
26 | * | 26 | * |
27 | * As an optimization, frwr_op_unmap marks MRs INVALID before the | 27 | * As an optimization, frwr_op_unmap marks MRs INVALID before the |
28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | 28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on |
29 | * rb_mws immediately so that no work (like managing a linked list | 29 | * rb_mrs immediately so that no work (like managing a linked list |
30 | * under a spinlock) is needed in the completion upcall. | 30 | * under a spinlock) is needed in the completion upcall. |
31 | * | 31 | * |
32 | * But this means that frwr_op_map() can occasionally encounter an MR | 32 | * But this means that frwr_op_map() can occasionally encounter an MR |
@@ -60,7 +60,7 @@ | |||
60 | * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered | 60 | * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered |
61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery | 61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery |
62 | * allocates fresh resources, it is deferred to a workqueue, and the | 62 | * allocates fresh resources, it is deferred to a workqueue, and the |
63 | * recovered MRs are placed back on the rb_mws list when recovery is | 63 | * recovered MRs are placed back on the rb_mrs list when recovery is |
64 | * complete. frwr_op_map allocates another MR for the current RPC while | 64 | * complete. frwr_op_map allocates another MR for the current RPC while |
65 | * the broken MR is reset. | 65 | * the broken MR is reset. |
66 | * | 66 | * |
@@ -96,26 +96,26 @@ out_not_supported: | |||
96 | } | 96 | } |
97 | 97 | ||
98 | static int | 98 | static int |
99 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | 99 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
100 | { | 100 | { |
101 | unsigned int depth = ia->ri_max_frmr_depth; | 101 | unsigned int depth = ia->ri_max_frwr_depth; |
102 | struct rpcrdma_frmr *f = &r->frmr; | 102 | struct rpcrdma_frwr *frwr = &mr->frwr; |
103 | int rc; | 103 | int rc; |
104 | 104 | ||
105 | f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); | 105 | frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); |
106 | if (IS_ERR(f->fr_mr)) | 106 | if (IS_ERR(frwr->fr_mr)) |
107 | goto out_mr_err; | 107 | goto out_mr_err; |
108 | 108 | ||
109 | r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); | 109 | mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL); |
110 | if (!r->mw_sg) | 110 | if (!mr->mr_sg) |
111 | goto out_list_err; | 111 | goto out_list_err; |
112 | 112 | ||
113 | sg_init_table(r->mw_sg, depth); | 113 | sg_init_table(mr->mr_sg, depth); |
114 | init_completion(&f->fr_linv_done); | 114 | init_completion(&frwr->fr_linv_done); |
115 | return 0; | 115 | return 0; |
116 | 116 | ||
117 | out_mr_err: | 117 | out_mr_err: |
118 | rc = PTR_ERR(f->fr_mr); | 118 | rc = PTR_ERR(frwr->fr_mr); |
119 | dprintk("RPC: %s: ib_alloc_mr status %i\n", | 119 | dprintk("RPC: %s: ib_alloc_mr status %i\n", |
120 | __func__, rc); | 120 | __func__, rc); |
121 | return rc; | 121 | return rc; |
@@ -124,83 +124,85 @@ out_list_err: | |||
124 | rc = -ENOMEM; | 124 | rc = -ENOMEM; |
125 | dprintk("RPC: %s: sg allocation failure\n", | 125 | dprintk("RPC: %s: sg allocation failure\n", |
126 | __func__); | 126 | __func__); |
127 | ib_dereg_mr(f->fr_mr); | 127 | ib_dereg_mr(frwr->fr_mr); |
128 | return rc; | 128 | return rc; |
129 | } | 129 | } |
130 | 130 | ||
131 | static void | 131 | static void |
132 | frwr_op_release_mr(struct rpcrdma_mw *r) | 132 | frwr_op_release_mr(struct rpcrdma_mr *mr) |
133 | { | 133 | { |
134 | int rc; | 134 | int rc; |
135 | 135 | ||
136 | /* Ensure MW is not on any rl_registered list */ | 136 | /* Ensure MR is not on any rl_registered list */ |
137 | if (!list_empty(&r->mw_list)) | 137 | if (!list_empty(&mr->mr_list)) |
138 | list_del(&r->mw_list); | 138 | list_del(&mr->mr_list); |
139 | 139 | ||
140 | rc = ib_dereg_mr(r->frmr.fr_mr); | 140 | rc = ib_dereg_mr(mr->frwr.fr_mr); |
141 | if (rc) | 141 | if (rc) |
142 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", | 142 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", |
143 | r, rc); | 143 | mr, rc); |
144 | kfree(r->mw_sg); | 144 | kfree(mr->mr_sg); |
145 | kfree(r); | 145 | kfree(mr); |
146 | } | 146 | } |
147 | 147 | ||
148 | static int | 148 | static int |
149 | __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | 149 | __frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
150 | { | 150 | { |
151 | struct rpcrdma_frmr *f = &r->frmr; | 151 | struct rpcrdma_frwr *frwr = &mr->frwr; |
152 | int rc; | 152 | int rc; |
153 | 153 | ||
154 | rc = ib_dereg_mr(f->fr_mr); | 154 | rc = ib_dereg_mr(frwr->fr_mr); |
155 | if (rc) { | 155 | if (rc) { |
156 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", | 156 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", |
157 | rc, r); | 157 | rc, mr); |
158 | return rc; | 158 | return rc; |
159 | } | 159 | } |
160 | 160 | ||
161 | f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, | 161 | frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, |
162 | ia->ri_max_frmr_depth); | 162 | ia->ri_max_frwr_depth); |
163 | if (IS_ERR(f->fr_mr)) { | 163 | if (IS_ERR(frwr->fr_mr)) { |
164 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", | 164 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", |
165 | PTR_ERR(f->fr_mr), r); | 165 | PTR_ERR(frwr->fr_mr), mr); |
166 | return PTR_ERR(f->fr_mr); | 166 | return PTR_ERR(frwr->fr_mr); |
167 | } | 167 | } |
168 | 168 | ||
169 | dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); | 169 | dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr); |
170 | f->fr_state = FRMR_IS_INVALID; | 170 | frwr->fr_state = FRWR_IS_INVALID; |
171 | return 0; | 171 | return 0; |
172 | } | 172 | } |
173 | 173 | ||
174 | /* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. | 174 | /* Reset of a single FRWR. Generate a fresh rkey by replacing the MR. |
175 | */ | 175 | */ |
176 | static void | 176 | static void |
177 | frwr_op_recover_mr(struct rpcrdma_mw *mw) | 177 | frwr_op_recover_mr(struct rpcrdma_mr *mr) |
178 | { | 178 | { |
179 | enum rpcrdma_frmr_state state = mw->frmr.fr_state; | 179 | enum rpcrdma_frwr_state state = mr->frwr.fr_state; |
180 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | 180 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
181 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 181 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
182 | int rc; | 182 | int rc; |
183 | 183 | ||
184 | rc = __frwr_reset_mr(ia, mw); | 184 | rc = __frwr_mr_reset(ia, mr); |
185 | if (state != FRMR_FLUSHED_LI) | 185 | if (state != FRWR_FLUSHED_LI) { |
186 | trace_xprtrdma_dma_unmap(mr); | ||
186 | ib_dma_unmap_sg(ia->ri_device, | 187 | ib_dma_unmap_sg(ia->ri_device, |
187 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | 188 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
189 | } | ||
188 | if (rc) | 190 | if (rc) |
189 | goto out_release; | 191 | goto out_release; |
190 | 192 | ||
191 | rpcrdma_put_mw(r_xprt, mw); | 193 | rpcrdma_mr_put(mr); |
192 | r_xprt->rx_stats.mrs_recovered++; | 194 | r_xprt->rx_stats.mrs_recovered++; |
193 | return; | 195 | return; |
194 | 196 | ||
195 | out_release: | 197 | out_release: |
196 | pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); | 198 | pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr); |
197 | r_xprt->rx_stats.mrs_orphaned++; | 199 | r_xprt->rx_stats.mrs_orphaned++; |
198 | 200 | ||
199 | spin_lock(&r_xprt->rx_buf.rb_mwlock); | 201 | spin_lock(&r_xprt->rx_buf.rb_mrlock); |
200 | list_del(&mw->mw_all); | 202 | list_del(&mr->mr_all); |
201 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); | 203 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); |
202 | 204 | ||
203 | frwr_op_release_mr(mw); | 205 | frwr_op_release_mr(mr); |
204 | } | 206 | } |
205 | 207 | ||
206 | static int | 208 | static int |
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
214 | if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) | 216 | if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) |
215 | ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; | 217 | ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; |
216 | 218 | ||
217 | ia->ri_max_frmr_depth = | 219 | ia->ri_max_frwr_depth = |
218 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 220 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
219 | attrs->max_fast_reg_page_list_len); | 221 | attrs->max_fast_reg_page_list_len); |
220 | dprintk("RPC: %s: device's max FR page list len = %u\n", | 222 | dprintk("RPC: %s: device's max FR page list len = %u\n", |
221 | __func__, ia->ri_max_frmr_depth); | 223 | __func__, ia->ri_max_frwr_depth); |
222 | 224 | ||
223 | /* Add room for frmr register and invalidate WRs. | 225 | /* Add room for frwr register and invalidate WRs. |
224 | * 1. FRMR reg WR for head | 226 | * 1. FRWR reg WR for head |
225 | * 2. FRMR invalidate WR for head | 227 | * 2. FRWR invalidate WR for head |
226 | * 3. N FRMR reg WRs for pagelist | 228 | * 3. N FRWR reg WRs for pagelist |
227 | * 4. N FRMR invalidate WRs for pagelist | 229 | * 4. N FRWR invalidate WRs for pagelist |
228 | * 5. FRMR reg WR for tail | 230 | * 5. FRWR reg WR for tail |
229 | * 6. FRMR invalidate WR for tail | 231 | * 6. FRWR invalidate WR for tail |
230 | * 7. The RDMA_SEND WR | 232 | * 7. The RDMA_SEND WR |
231 | */ | 233 | */ |
232 | depth = 7; | 234 | depth = 7; |
233 | 235 | ||
234 | /* Calculate N if the device max FRMR depth is smaller than | 236 | /* Calculate N if the device max FRWR depth is smaller than |
235 | * RPCRDMA_MAX_DATA_SEGS. | 237 | * RPCRDMA_MAX_DATA_SEGS. |
236 | */ | 238 | */ |
237 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | 239 | if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) { |
238 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | 240 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth; |
239 | do { | 241 | do { |
240 | depth += 2; /* FRMR reg + invalidate */ | 242 | depth += 2; /* FRWR reg + invalidate */ |
241 | delta -= ia->ri_max_frmr_depth; | 243 | delta -= ia->ri_max_frwr_depth; |
242 | } while (delta > 0); | 244 | } while (delta > 0); |
243 | } | 245 | } |
244 | 246 | ||
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
252 | } | 254 | } |
253 | 255 | ||
254 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / | 256 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / |
255 | ia->ri_max_frmr_depth); | 257 | ia->ri_max_frwr_depth); |
256 | return 0; | 258 | return 0; |
257 | } | 259 | } |
258 | 260 | ||
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
265 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 267 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
266 | 268 | ||
267 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 269 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
268 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); | 270 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth); |
269 | } | 271 | } |
270 | 272 | ||
271 | static void | 273 | static void |
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) | |||
286 | static void | 288 | static void |
287 | frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) | 289 | frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) |
288 | { | 290 | { |
289 | struct rpcrdma_frmr *frmr; | 291 | struct ib_cqe *cqe = wc->wr_cqe; |
290 | struct ib_cqe *cqe; | 292 | struct rpcrdma_frwr *frwr = |
293 | container_of(cqe, struct rpcrdma_frwr, fr_cqe); | ||
291 | 294 | ||
292 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 295 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
293 | if (wc->status != IB_WC_SUCCESS) { | 296 | if (wc->status != IB_WC_SUCCESS) { |
294 | cqe = wc->wr_cqe; | 297 | frwr->fr_state = FRWR_FLUSHED_FR; |
295 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | ||
296 | frmr->fr_state = FRMR_FLUSHED_FR; | ||
297 | __frwr_sendcompletion_flush(wc, "fastreg"); | 298 | __frwr_sendcompletion_flush(wc, "fastreg"); |
298 | } | 299 | } |
300 | trace_xprtrdma_wc_fastreg(wc, frwr); | ||
299 | } | 301 | } |
300 | 302 | ||
301 | /** | 303 | /** |
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) | |||
307 | static void | 309 | static void |
308 | frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) | 310 | frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) |
309 | { | 311 | { |
310 | struct rpcrdma_frmr *frmr; | 312 | struct ib_cqe *cqe = wc->wr_cqe; |
311 | struct ib_cqe *cqe; | 313 | struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, |
314 | fr_cqe); | ||
312 | 315 | ||
313 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 316 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
314 | if (wc->status != IB_WC_SUCCESS) { | 317 | if (wc->status != IB_WC_SUCCESS) { |
315 | cqe = wc->wr_cqe; | 318 | frwr->fr_state = FRWR_FLUSHED_LI; |
316 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | ||
317 | frmr->fr_state = FRMR_FLUSHED_LI; | ||
318 | __frwr_sendcompletion_flush(wc, "localinv"); | 319 | __frwr_sendcompletion_flush(wc, "localinv"); |
319 | } | 320 | } |
321 | trace_xprtrdma_wc_li(wc, frwr); | ||
320 | } | 322 | } |
321 | 323 | ||
322 | /** | 324 | /** |
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) | |||
329 | static void | 331 | static void |
330 | frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | 332 | frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) |
331 | { | 333 | { |
332 | struct rpcrdma_frmr *frmr; | 334 | struct ib_cqe *cqe = wc->wr_cqe; |
333 | struct ib_cqe *cqe; | 335 | struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, |
336 | fr_cqe); | ||
334 | 337 | ||
335 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 338 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
336 | cqe = wc->wr_cqe; | ||
337 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | ||
338 | if (wc->status != IB_WC_SUCCESS) { | 339 | if (wc->status != IB_WC_SUCCESS) { |
339 | frmr->fr_state = FRMR_FLUSHED_LI; | 340 | frwr->fr_state = FRWR_FLUSHED_LI; |
340 | __frwr_sendcompletion_flush(wc, "localinv"); | 341 | __frwr_sendcompletion_flush(wc, "localinv"); |
341 | } | 342 | } |
342 | complete(&frmr->fr_linv_done); | 343 | complete(&frwr->fr_linv_done); |
344 | trace_xprtrdma_wc_li_wake(wc, frwr); | ||
343 | } | 345 | } |
344 | 346 | ||
345 | /* Post a REG_MR Work Request to register a memory region | 347 | /* Post a REG_MR Work Request to register a memory region |
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |||
347 | */ | 349 | */ |
348 | static struct rpcrdma_mr_seg * | 350 | static struct rpcrdma_mr_seg * |
349 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | 351 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
350 | int nsegs, bool writing, struct rpcrdma_mw **out) | 352 | int nsegs, bool writing, struct rpcrdma_mr **out) |
351 | { | 353 | { |
352 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 354 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
353 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; | 355 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; |
354 | struct rpcrdma_mw *mw; | 356 | struct rpcrdma_frwr *frwr; |
355 | struct rpcrdma_frmr *frmr; | 357 | struct rpcrdma_mr *mr; |
356 | struct ib_mr *mr; | 358 | struct ib_mr *ibmr; |
357 | struct ib_reg_wr *reg_wr; | 359 | struct ib_reg_wr *reg_wr; |
358 | struct ib_send_wr *bad_wr; | 360 | struct ib_send_wr *bad_wr; |
359 | int rc, i, n; | 361 | int rc, i, n; |
360 | u8 key; | 362 | u8 key; |
361 | 363 | ||
362 | mw = NULL; | 364 | mr = NULL; |
363 | do { | 365 | do { |
364 | if (mw) | 366 | if (mr) |
365 | rpcrdma_defer_mr_recovery(mw); | 367 | rpcrdma_mr_defer_recovery(mr); |
366 | mw = rpcrdma_get_mw(r_xprt); | 368 | mr = rpcrdma_mr_get(r_xprt); |
367 | if (!mw) | 369 | if (!mr) |
368 | return ERR_PTR(-ENOBUFS); | 370 | return ERR_PTR(-ENOBUFS); |
369 | } while (mw->frmr.fr_state != FRMR_IS_INVALID); | 371 | } while (mr->frwr.fr_state != FRWR_IS_INVALID); |
370 | frmr = &mw->frmr; | 372 | frwr = &mr->frwr; |
371 | frmr->fr_state = FRMR_IS_VALID; | 373 | frwr->fr_state = FRWR_IS_VALID; |
372 | mr = frmr->fr_mr; | 374 | |
373 | reg_wr = &frmr->fr_regwr; | 375 | if (nsegs > ia->ri_max_frwr_depth) |
374 | 376 | nsegs = ia->ri_max_frwr_depth; | |
375 | if (nsegs > ia->ri_max_frmr_depth) | ||
376 | nsegs = ia->ri_max_frmr_depth; | ||
377 | for (i = 0; i < nsegs;) { | 377 | for (i = 0; i < nsegs;) { |
378 | if (seg->mr_page) | 378 | if (seg->mr_page) |
379 | sg_set_page(&mw->mw_sg[i], | 379 | sg_set_page(&mr->mr_sg[i], |
380 | seg->mr_page, | 380 | seg->mr_page, |
381 | seg->mr_len, | 381 | seg->mr_len, |
382 | offset_in_page(seg->mr_offset)); | 382 | offset_in_page(seg->mr_offset)); |
383 | else | 383 | else |
384 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, | 384 | sg_set_buf(&mr->mr_sg[i], seg->mr_offset, |
385 | seg->mr_len); | 385 | seg->mr_len); |
386 | 386 | ||
387 | ++seg; | 387 | ++seg; |
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
392 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 392 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
393 | break; | 393 | break; |
394 | } | 394 | } |
395 | mw->mw_dir = rpcrdma_data_dir(writing); | 395 | mr->mr_dir = rpcrdma_data_dir(writing); |
396 | 396 | ||
397 | mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); | 397 | mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); |
398 | if (!mw->mw_nents) | 398 | if (!mr->mr_nents) |
399 | goto out_dmamap_err; | 399 | goto out_dmamap_err; |
400 | 400 | ||
401 | n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); | 401 | ibmr = frwr->fr_mr; |
402 | if (unlikely(n != mw->mw_nents)) | 402 | n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); |
403 | if (unlikely(n != mr->mr_nents)) | ||
403 | goto out_mapmr_err; | 404 | goto out_mapmr_err; |
404 | 405 | ||
405 | dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", | 406 | key = (u8)(ibmr->rkey & 0x000000FF); |
406 | __func__, frmr, mw->mw_nents, mr->length); | 407 | ib_update_fast_reg_key(ibmr, ++key); |
407 | |||
408 | key = (u8)(mr->rkey & 0x000000FF); | ||
409 | ib_update_fast_reg_key(mr, ++key); | ||
410 | 408 | ||
409 | reg_wr = &frwr->fr_regwr; | ||
411 | reg_wr->wr.next = NULL; | 410 | reg_wr->wr.next = NULL; |
412 | reg_wr->wr.opcode = IB_WR_REG_MR; | 411 | reg_wr->wr.opcode = IB_WR_REG_MR; |
413 | frmr->fr_cqe.done = frwr_wc_fastreg; | 412 | frwr->fr_cqe.done = frwr_wc_fastreg; |
414 | reg_wr->wr.wr_cqe = &frmr->fr_cqe; | 413 | reg_wr->wr.wr_cqe = &frwr->fr_cqe; |
415 | reg_wr->wr.num_sge = 0; | 414 | reg_wr->wr.num_sge = 0; |
416 | reg_wr->wr.send_flags = 0; | 415 | reg_wr->wr.send_flags = 0; |
417 | reg_wr->mr = mr; | 416 | reg_wr->mr = ibmr; |
418 | reg_wr->key = mr->rkey; | 417 | reg_wr->key = ibmr->rkey; |
419 | reg_wr->access = writing ? | 418 | reg_wr->access = writing ? |
420 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 419 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
421 | IB_ACCESS_REMOTE_READ; | 420 | IB_ACCESS_REMOTE_READ; |
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
424 | if (rc) | 423 | if (rc) |
425 | goto out_senderr; | 424 | goto out_senderr; |
426 | 425 | ||
427 | mw->mw_handle = mr->rkey; | 426 | mr->mr_handle = ibmr->rkey; |
428 | mw->mw_length = mr->length; | 427 | mr->mr_length = ibmr->length; |
429 | mw->mw_offset = mr->iova; | 428 | mr->mr_offset = ibmr->iova; |
430 | 429 | ||
431 | *out = mw; | 430 | *out = mr; |
432 | return seg; | 431 | return seg; |
433 | 432 | ||
434 | out_dmamap_err: | 433 | out_dmamap_err: |
435 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", | 434 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", |
436 | mw->mw_sg, i); | 435 | mr->mr_sg, i); |
437 | frmr->fr_state = FRMR_IS_INVALID; | 436 | frwr->fr_state = FRWR_IS_INVALID; |
438 | rpcrdma_put_mw(r_xprt, mw); | 437 | rpcrdma_mr_put(mr); |
439 | return ERR_PTR(-EIO); | 438 | return ERR_PTR(-EIO); |
440 | 439 | ||
441 | out_mapmr_err: | 440 | out_mapmr_err: |
442 | pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", | 441 | pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", |
443 | frmr->fr_mr, n, mw->mw_nents); | 442 | frwr->fr_mr, n, mr->mr_nents); |
444 | rpcrdma_defer_mr_recovery(mw); | 443 | rpcrdma_mr_defer_recovery(mr); |
445 | return ERR_PTR(-EIO); | 444 | return ERR_PTR(-EIO); |
446 | 445 | ||
447 | out_senderr: | 446 | out_senderr: |
448 | pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); | 447 | pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); |
449 | rpcrdma_defer_mr_recovery(mw); | 448 | rpcrdma_mr_defer_recovery(mr); |
450 | return ERR_PTR(-ENOTCONN); | 449 | return ERR_PTR(-ENOTCONN); |
451 | } | 450 | } |
452 | 451 | ||
452 | /* Handle a remotely invalidated mr on the @mrs list | ||
453 | */ | ||
454 | static void | ||
455 | frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) | ||
456 | { | ||
457 | struct rpcrdma_mr *mr; | ||
458 | |||
459 | list_for_each_entry(mr, mrs, mr_list) | ||
460 | if (mr->mr_handle == rep->rr_inv_rkey) { | ||
461 | list_del(&mr->mr_list); | ||
462 | trace_xprtrdma_remoteinv(mr); | ||
463 | mr->frwr.fr_state = FRWR_IS_INVALID; | ||
464 | rpcrdma_mr_unmap_and_put(mr); | ||
465 | break; /* only one invalidated MR per RPC */ | ||
466 | } | ||
467 | } | ||
468 | |||
453 | /* Invalidate all memory regions that were registered for "req". | 469 | /* Invalidate all memory regions that were registered for "req". |
454 | * | 470 | * |
455 | * Sleeps until it is safe for the host CPU to access the | 471 | * Sleeps until it is safe for the host CPU to access the |
456 | * previously mapped memory regions. | 472 | * previously mapped memory regions. |
457 | * | 473 | * |
458 | * Caller ensures that @mws is not empty before the call. This | 474 | * Caller ensures that @mrs is not empty before the call. This |
459 | * function empties the list. | 475 | * function empties the list. |
460 | */ | 476 | */ |
461 | static void | 477 | static void |
462 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | 478 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) |
463 | { | 479 | { |
464 | struct ib_send_wr *first, **prev, *last, *bad_wr; | 480 | struct ib_send_wr *first, **prev, *last, *bad_wr; |
465 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 481 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
466 | struct rpcrdma_frmr *f; | 482 | struct rpcrdma_frwr *frwr; |
467 | struct rpcrdma_mw *mw; | 483 | struct rpcrdma_mr *mr; |
468 | int count, rc; | 484 | int count, rc; |
469 | 485 | ||
470 | /* ORDER: Invalidate all of the MRs first | 486 | /* ORDER: Invalidate all of the MRs first |
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
472 | * Chain the LOCAL_INV Work Requests and post them with | 488 | * Chain the LOCAL_INV Work Requests and post them with |
473 | * a single ib_post_send() call. | 489 | * a single ib_post_send() call. |
474 | */ | 490 | */ |
475 | f = NULL; | 491 | frwr = NULL; |
476 | count = 0; | 492 | count = 0; |
477 | prev = &first; | 493 | prev = &first; |
478 | list_for_each_entry(mw, mws, mw_list) { | 494 | list_for_each_entry(mr, mrs, mr_list) { |
479 | mw->frmr.fr_state = FRMR_IS_INVALID; | 495 | mr->frwr.fr_state = FRWR_IS_INVALID; |
480 | 496 | ||
481 | if (mw->mw_flags & RPCRDMA_MW_F_RI) | 497 | frwr = &mr->frwr; |
482 | continue; | 498 | trace_xprtrdma_localinv(mr); |
483 | 499 | ||
484 | f = &mw->frmr; | 500 | frwr->fr_cqe.done = frwr_wc_localinv; |
485 | dprintk("RPC: %s: invalidating frmr %p\n", | 501 | last = &frwr->fr_invwr; |
486 | __func__, f); | ||
487 | |||
488 | f->fr_cqe.done = frwr_wc_localinv; | ||
489 | last = &f->fr_invwr; | ||
490 | memset(last, 0, sizeof(*last)); | 502 | memset(last, 0, sizeof(*last)); |
491 | last->wr_cqe = &f->fr_cqe; | 503 | last->wr_cqe = &frwr->fr_cqe; |
492 | last->opcode = IB_WR_LOCAL_INV; | 504 | last->opcode = IB_WR_LOCAL_INV; |
493 | last->ex.invalidate_rkey = mw->mw_handle; | 505 | last->ex.invalidate_rkey = mr->mr_handle; |
494 | count++; | 506 | count++; |
495 | 507 | ||
496 | *prev = last; | 508 | *prev = last; |
497 | prev = &last->next; | 509 | prev = &last->next; |
498 | } | 510 | } |
499 | if (!f) | 511 | if (!frwr) |
500 | goto unmap; | 512 | goto unmap; |
501 | 513 | ||
502 | /* Strong send queue ordering guarantees that when the | 514 | /* Strong send queue ordering guarantees that when the |
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
504 | * are complete. | 516 | * are complete. |
505 | */ | 517 | */ |
506 | last->send_flags = IB_SEND_SIGNALED; | 518 | last->send_flags = IB_SEND_SIGNALED; |
507 | f->fr_cqe.done = frwr_wc_localinv_wake; | 519 | frwr->fr_cqe.done = frwr_wc_localinv_wake; |
508 | reinit_completion(&f->fr_linv_done); | 520 | reinit_completion(&frwr->fr_linv_done); |
509 | 521 | ||
510 | /* Transport disconnect drains the receive CQ before it | 522 | /* Transport disconnect drains the receive CQ before it |
511 | * replaces the QP. The RPC reply handler won't call us | 523 | * replaces the QP. The RPC reply handler won't call us |
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
515 | bad_wr = NULL; | 527 | bad_wr = NULL; |
516 | rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); | 528 | rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); |
517 | if (bad_wr != first) | 529 | if (bad_wr != first) |
518 | wait_for_completion(&f->fr_linv_done); | 530 | wait_for_completion(&frwr->fr_linv_done); |
519 | if (rc) | 531 | if (rc) |
520 | goto reset_mrs; | 532 | goto reset_mrs; |
521 | 533 | ||
522 | /* ORDER: Now DMA unmap all of the MRs, and return | 534 | /* ORDER: Now DMA unmap all of the MRs, and return |
523 | * them to the free MW list. | 535 | * them to the free MR list. |
524 | */ | 536 | */ |
525 | unmap: | 537 | unmap: |
526 | while (!list_empty(mws)) { | 538 | while (!list_empty(mrs)) { |
527 | mw = rpcrdma_pop_mw(mws); | 539 | mr = rpcrdma_mr_pop(mrs); |
528 | dprintk("RPC: %s: DMA unmapping frmr %p\n", | 540 | rpcrdma_mr_unmap_and_put(mr); |
529 | __func__, &mw->frmr); | ||
530 | ib_dma_unmap_sg(ia->ri_device, | ||
531 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
532 | rpcrdma_put_mw(r_xprt, mw); | ||
533 | } | 541 | } |
534 | return; | 542 | return; |
535 | 543 | ||
536 | reset_mrs: | 544 | reset_mrs: |
537 | pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); | 545 | pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); |
538 | 546 | ||
539 | /* Find and reset the MRs in the LOCAL_INV WRs that did not | 547 | /* Find and reset the MRs in the LOCAL_INV WRs that did not |
540 | * get posted. | 548 | * get posted. |
541 | */ | 549 | */ |
542 | while (bad_wr) { | 550 | while (bad_wr) { |
543 | f = container_of(bad_wr, struct rpcrdma_frmr, | 551 | frwr = container_of(bad_wr, struct rpcrdma_frwr, |
544 | fr_invwr); | 552 | fr_invwr); |
545 | mw = container_of(f, struct rpcrdma_mw, frmr); | 553 | mr = container_of(frwr, struct rpcrdma_mr, frwr); |
546 | 554 | ||
547 | __frwr_reset_mr(ia, mw); | 555 | __frwr_mr_reset(ia, mr); |
548 | 556 | ||
549 | bad_wr = bad_wr->next; | 557 | bad_wr = bad_wr->next; |
550 | } | 558 | } |
@@ -553,6 +561,7 @@ reset_mrs: | |||
553 | 561 | ||
554 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | 562 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
555 | .ro_map = frwr_op_map, | 563 | .ro_map = frwr_op_map, |
564 | .ro_reminv = frwr_op_reminv, | ||
556 | .ro_unmap_sync = frwr_op_unmap_sync, | 565 | .ro_unmap_sync = frwr_op_unmap_sync, |
557 | .ro_recover_mr = frwr_op_recover_mr, | 566 | .ro_recover_mr = frwr_op_recover_mr, |
558 | .ro_open = frwr_op_open, | 567 | .ro_open = frwr_op_open, |
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c index 560712bd9fa2..a762d192372b 100644 --- a/net/sunrpc/xprtrdma/module.c +++ b/net/sunrpc/xprtrdma/module.c | |||
@@ -1,18 +1,20 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2015 Oracle. All rights reserved. | 2 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /* rpcrdma.ko module initialization | 5 | /* rpcrdma.ko module initialization |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/compiler.h> | ||
8 | #include <linux/module.h> | 10 | #include <linux/module.h> |
9 | #include <linux/init.h> | 11 | #include <linux/init.h> |
10 | #include <linux/sunrpc/svc_rdma.h> | 12 | #include <linux/sunrpc/svc_rdma.h> |
11 | #include "xprt_rdma.h" | ||
12 | 13 | ||
13 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 14 | #include <asm/swab.h> |
14 | # define RPCDBG_FACILITY RPCDBG_TRANS | 15 | |
15 | #endif | 16 | #define CREATE_TRACE_POINTS |
17 | #include "xprt_rdma.h" | ||
16 | 18 | ||
17 | MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); | 19 | MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); |
18 | MODULE_DESCRIPTION("RPC/RDMA Transport"); | 20 | MODULE_DESCRIPTION("RPC/RDMA Transport"); |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index ed34dc0f144c..162e5dd82466 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr) | |||
292 | } | 292 | } |
293 | 293 | ||
294 | static void | 294 | static void |
295 | xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) | 295 | xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) |
296 | { | 296 | { |
297 | *iptr++ = cpu_to_be32(mw->mw_handle); | 297 | *iptr++ = cpu_to_be32(mr->mr_handle); |
298 | *iptr++ = cpu_to_be32(mw->mw_length); | 298 | *iptr++ = cpu_to_be32(mr->mr_length); |
299 | xdr_encode_hyper(iptr, mw->mw_offset); | 299 | xdr_encode_hyper(iptr, mr->mr_offset); |
300 | } | 300 | } |
301 | 301 | ||
302 | static int | 302 | static int |
303 | encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) | 303 | encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr) |
304 | { | 304 | { |
305 | __be32 *p; | 305 | __be32 *p; |
306 | 306 | ||
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) | |||
308 | if (unlikely(!p)) | 308 | if (unlikely(!p)) |
309 | return -EMSGSIZE; | 309 | return -EMSGSIZE; |
310 | 310 | ||
311 | xdr_encode_rdma_segment(p, mw); | 311 | xdr_encode_rdma_segment(p, mr); |
312 | return 0; | 312 | return 0; |
313 | } | 313 | } |
314 | 314 | ||
315 | static int | 315 | static int |
316 | encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, | 316 | encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, |
317 | u32 position) | 317 | u32 position) |
318 | { | 318 | { |
319 | __be32 *p; | 319 | __be32 *p; |
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, | |||
324 | 324 | ||
325 | *p++ = xdr_one; /* Item present */ | 325 | *p++ = xdr_one; /* Item present */ |
326 | *p++ = cpu_to_be32(position); | 326 | *p++ = cpu_to_be32(position); |
327 | xdr_encode_rdma_segment(p, mw); | 327 | xdr_encode_rdma_segment(p, mr); |
328 | return 0; | 328 | return 0; |
329 | } | 329 | } |
330 | 330 | ||
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
348 | { | 348 | { |
349 | struct xdr_stream *xdr = &req->rl_stream; | 349 | struct xdr_stream *xdr = &req->rl_stream; |
350 | struct rpcrdma_mr_seg *seg; | 350 | struct rpcrdma_mr_seg *seg; |
351 | struct rpcrdma_mw *mw; | 351 | struct rpcrdma_mr *mr; |
352 | unsigned int pos; | 352 | unsigned int pos; |
353 | int nsegs; | 353 | int nsegs; |
354 | 354 | ||
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
363 | 363 | ||
364 | do { | 364 | do { |
365 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 365 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
366 | false, &mw); | 366 | false, &mr); |
367 | if (IS_ERR(seg)) | 367 | if (IS_ERR(seg)) |
368 | return PTR_ERR(seg); | 368 | return PTR_ERR(seg); |
369 | rpcrdma_push_mw(mw, &req->rl_registered); | 369 | rpcrdma_mr_push(mr, &req->rl_registered); |
370 | 370 | ||
371 | if (encode_read_segment(xdr, mw, pos) < 0) | 371 | if (encode_read_segment(xdr, mr, pos) < 0) |
372 | return -EMSGSIZE; | 372 | return -EMSGSIZE; |
373 | 373 | ||
374 | dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", | 374 | trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs); |
375 | rqst->rq_task->tk_pid, __func__, pos, | ||
376 | mw->mw_length, (unsigned long long)mw->mw_offset, | ||
377 | mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); | ||
378 | |||
379 | r_xprt->rx_stats.read_chunk_count++; | 375 | r_xprt->rx_stats.read_chunk_count++; |
380 | nsegs -= mw->mw_nents; | 376 | nsegs -= mr->mr_nents; |
381 | } while (nsegs); | 377 | } while (nsegs); |
382 | 378 | ||
383 | return 0; | 379 | return 0; |
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
404 | { | 400 | { |
405 | struct xdr_stream *xdr = &req->rl_stream; | 401 | struct xdr_stream *xdr = &req->rl_stream; |
406 | struct rpcrdma_mr_seg *seg; | 402 | struct rpcrdma_mr_seg *seg; |
407 | struct rpcrdma_mw *mw; | 403 | struct rpcrdma_mr *mr; |
408 | int nsegs, nchunks; | 404 | int nsegs, nchunks; |
409 | __be32 *segcount; | 405 | __be32 *segcount; |
410 | 406 | ||
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
425 | nchunks = 0; | 421 | nchunks = 0; |
426 | do { | 422 | do { |
427 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 423 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
428 | true, &mw); | 424 | true, &mr); |
429 | if (IS_ERR(seg)) | 425 | if (IS_ERR(seg)) |
430 | return PTR_ERR(seg); | 426 | return PTR_ERR(seg); |
431 | rpcrdma_push_mw(mw, &req->rl_registered); | 427 | rpcrdma_mr_push(mr, &req->rl_registered); |
432 | 428 | ||
433 | if (encode_rdma_segment(xdr, mw) < 0) | 429 | if (encode_rdma_segment(xdr, mr) < 0) |
434 | return -EMSGSIZE; | 430 | return -EMSGSIZE; |
435 | 431 | ||
436 | dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", | 432 | trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); |
437 | rqst->rq_task->tk_pid, __func__, | ||
438 | mw->mw_length, (unsigned long long)mw->mw_offset, | ||
439 | mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); | ||
440 | |||
441 | r_xprt->rx_stats.write_chunk_count++; | 433 | r_xprt->rx_stats.write_chunk_count++; |
442 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | 434 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; |
443 | nchunks++; | 435 | nchunks++; |
444 | nsegs -= mw->mw_nents; | 436 | nsegs -= mr->mr_nents; |
445 | } while (nsegs); | 437 | } while (nsegs); |
446 | 438 | ||
447 | /* Update count of segments in this Write chunk */ | 439 | /* Update count of segments in this Write chunk */ |
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
468 | { | 460 | { |
469 | struct xdr_stream *xdr = &req->rl_stream; | 461 | struct xdr_stream *xdr = &req->rl_stream; |
470 | struct rpcrdma_mr_seg *seg; | 462 | struct rpcrdma_mr_seg *seg; |
471 | struct rpcrdma_mw *mw; | 463 | struct rpcrdma_mr *mr; |
472 | int nsegs, nchunks; | 464 | int nsegs, nchunks; |
473 | __be32 *segcount; | 465 | __be32 *segcount; |
474 | 466 | ||
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
487 | nchunks = 0; | 479 | nchunks = 0; |
488 | do { | 480 | do { |
489 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 481 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
490 | true, &mw); | 482 | true, &mr); |
491 | if (IS_ERR(seg)) | 483 | if (IS_ERR(seg)) |
492 | return PTR_ERR(seg); | 484 | return PTR_ERR(seg); |
493 | rpcrdma_push_mw(mw, &req->rl_registered); | 485 | rpcrdma_mr_push(mr, &req->rl_registered); |
494 | 486 | ||
495 | if (encode_rdma_segment(xdr, mw) < 0) | 487 | if (encode_rdma_segment(xdr, mr) < 0) |
496 | return -EMSGSIZE; | 488 | return -EMSGSIZE; |
497 | 489 | ||
498 | dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", | 490 | trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); |
499 | rqst->rq_task->tk_pid, __func__, | ||
500 | mw->mw_length, (unsigned long long)mw->mw_offset, | ||
501 | mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); | ||
502 | |||
503 | r_xprt->rx_stats.reply_chunk_count++; | 491 | r_xprt->rx_stats.reply_chunk_count++; |
504 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | 492 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; |
505 | nchunks++; | 493 | nchunks++; |
506 | nsegs -= mw->mw_nents; | 494 | nsegs -= mr->mr_nents; |
507 | } while (nsegs); | 495 | } while (nsegs); |
508 | 496 | ||
509 | /* Update count of segments in the Reply chunk */ | 497 | /* Update count of segments in the Reply chunk */ |
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) | |||
524 | struct ib_sge *sge; | 512 | struct ib_sge *sge; |
525 | unsigned int count; | 513 | unsigned int count; |
526 | 514 | ||
527 | dprintk("RPC: %s: unmapping %u sges for sc=%p\n", | ||
528 | __func__, sc->sc_unmap_count, sc); | ||
529 | |||
530 | /* The first two SGEs contain the transport header and | 515 | /* The first two SGEs contain the transport header and |
531 | * the inline buffer. These are always left mapped so | 516 | * the inline buffer. These are always left mapped so |
532 | * they can be cheaply re-used. | 517 | * they can be cheaply re-used. |
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) | |||
754 | __be32 *p; | 739 | __be32 *p; |
755 | int ret; | 740 | int ret; |
756 | 741 | ||
757 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | ||
758 | if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) | ||
759 | return rpcrdma_bc_marshal_reply(rqst); | ||
760 | #endif | ||
761 | |||
762 | rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); | 742 | rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); |
763 | xdr_init_encode(xdr, &req->rl_hdrbuf, | 743 | xdr_init_encode(xdr, &req->rl_hdrbuf, |
764 | req->rl_rdmabuf->rg_base); | 744 | req->rl_rdmabuf->rg_base); |
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) | |||
821 | rtype = rpcrdma_areadch; | 801 | rtype = rpcrdma_areadch; |
822 | } | 802 | } |
823 | 803 | ||
804 | /* If this is a retransmit, discard previously registered | ||
805 | * chunks. Very likely the connection has been replaced, | ||
806 | * so these registrations are invalid and unusable. | ||
807 | */ | ||
808 | while (unlikely(!list_empty(&req->rl_registered))) { | ||
809 | struct rpcrdma_mr *mr; | ||
810 | |||
811 | mr = rpcrdma_mr_pop(&req->rl_registered); | ||
812 | rpcrdma_mr_defer_recovery(mr); | ||
813 | } | ||
814 | |||
824 | /* This implementation supports the following combinations | 815 | /* This implementation supports the following combinations |
825 | * of chunk lists in one RPC-over-RDMA Call message: | 816 | * of chunk lists in one RPC-over-RDMA Call message: |
826 | * | 817 | * |
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) | |||
868 | if (ret) | 859 | if (ret) |
869 | goto out_err; | 860 | goto out_err; |
870 | 861 | ||
871 | dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", | 862 | trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype); |
872 | rqst->rq_task->tk_pid, __func__, | ||
873 | transfertypes[rtype], transfertypes[wtype], | ||
874 | xdr_stream_pos(xdr)); | ||
875 | 863 | ||
876 | ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), | 864 | ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), |
877 | &rqst->rq_snd_buf, rtype); | 865 | &rqst->rq_snd_buf, rtype); |
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
926 | curlen = rqst->rq_rcv_buf.head[0].iov_len; | 914 | curlen = rqst->rq_rcv_buf.head[0].iov_len; |
927 | if (curlen > copy_len) | 915 | if (curlen > copy_len) |
928 | curlen = copy_len; | 916 | curlen = copy_len; |
929 | dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", | 917 | trace_xprtrdma_fixup(rqst, copy_len, curlen); |
930 | __func__, srcp, copy_len, curlen); | ||
931 | srcp += curlen; | 918 | srcp += curlen; |
932 | copy_len -= curlen; | 919 | copy_len -= curlen; |
933 | 920 | ||
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
947 | if (curlen > pagelist_len) | 934 | if (curlen > pagelist_len) |
948 | curlen = pagelist_len; | 935 | curlen = pagelist_len; |
949 | 936 | ||
950 | dprintk("RPC: %s: page %d" | 937 | trace_xprtrdma_fixup_pg(rqst, i, srcp, |
951 | " srcp 0x%p len %d curlen %d\n", | 938 | copy_len, curlen); |
952 | __func__, i, srcp, copy_len, curlen); | ||
953 | destp = kmap_atomic(ppages[i]); | 939 | destp = kmap_atomic(ppages[i]); |
954 | memcpy(destp + page_base, srcp, curlen); | 940 | memcpy(destp + page_base, srcp, curlen); |
955 | flush_dcache_page(ppages[i]); | 941 | flush_dcache_page(ppages[i]); |
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
984 | return fixup_copy_count; | 970 | return fixup_copy_count; |
985 | } | 971 | } |
986 | 972 | ||
987 | /* Caller must guarantee @rep remains stable during this call. | ||
988 | */ | ||
989 | static void | ||
990 | rpcrdma_mark_remote_invalidation(struct list_head *mws, | ||
991 | struct rpcrdma_rep *rep) | ||
992 | { | ||
993 | struct rpcrdma_mw *mw; | ||
994 | |||
995 | if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)) | ||
996 | return; | ||
997 | |||
998 | list_for_each_entry(mw, mws, mw_list) | ||
999 | if (mw->mw_handle == rep->rr_inv_rkey) { | ||
1000 | mw->mw_flags = RPCRDMA_MW_F_RI; | ||
1001 | break; /* only one invalidated MR per RPC */ | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | /* By convention, backchannel calls arrive via rdma_msg type | 973 | /* By convention, backchannel calls arrive via rdma_msg type |
1006 | * messages, and never populate the chunk lists. This makes | 974 | * messages, and never populate the chunk lists. This makes |
1007 | * the RPC/RDMA header small and fixed in size, so it is | 975 | * the RPC/RDMA header small and fixed in size, so it is |
@@ -1058,26 +1026,19 @@ out_short: | |||
1058 | 1026 | ||
1059 | static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) | 1027 | static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) |
1060 | { | 1028 | { |
1029 | u32 handle; | ||
1030 | u64 offset; | ||
1061 | __be32 *p; | 1031 | __be32 *p; |
1062 | 1032 | ||
1063 | p = xdr_inline_decode(xdr, 4 * sizeof(*p)); | 1033 | p = xdr_inline_decode(xdr, 4 * sizeof(*p)); |
1064 | if (unlikely(!p)) | 1034 | if (unlikely(!p)) |
1065 | return -EIO; | 1035 | return -EIO; |
1066 | 1036 | ||
1067 | ifdebug(FACILITY) { | 1037 | handle = be32_to_cpup(p++); |
1068 | u64 offset; | 1038 | *length = be32_to_cpup(p++); |
1069 | u32 handle; | 1039 | xdr_decode_hyper(p, &offset); |
1070 | |||
1071 | handle = be32_to_cpup(p++); | ||
1072 | *length = be32_to_cpup(p++); | ||
1073 | xdr_decode_hyper(p, &offset); | ||
1074 | dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n", | ||
1075 | __func__, *length, (unsigned long long)offset, | ||
1076 | handle); | ||
1077 | } else { | ||
1078 | *length = be32_to_cpup(p + 1); | ||
1079 | } | ||
1080 | 1040 | ||
1041 | trace_xprtrdma_decode_seg(handle, *length, offset); | ||
1081 | return 0; | 1042 | return 0; |
1082 | } | 1043 | } |
1083 | 1044 | ||
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length) | |||
1098 | *length += seglength; | 1059 | *length += seglength; |
1099 | } | 1060 | } |
1100 | 1061 | ||
1101 | dprintk("RPC: %s: segcount=%u, %u bytes\n", | ||
1102 | __func__, be32_to_cpup(p), *length); | ||
1103 | return 0; | 1062 | return 0; |
1104 | } | 1063 | } |
1105 | 1064 | ||
@@ -1296,8 +1255,7 @@ out: | |||
1296 | * being marshaled. | 1255 | * being marshaled. |
1297 | */ | 1256 | */ |
1298 | out_badheader: | 1257 | out_badheader: |
1299 | dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", | 1258 | trace_xprtrdma_reply_hdr(rep); |
1300 | rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc)); | ||
1301 | r_xprt->rx_stats.bad_reply_count++; | 1259 | r_xprt->rx_stats.bad_reply_count++; |
1302 | status = -EIO; | 1260 | status = -EIO; |
1303 | goto out; | 1261 | goto out; |
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work) | |||
1339 | struct rpcrdma_rep *rep = | 1297 | struct rpcrdma_rep *rep = |
1340 | container_of(work, struct rpcrdma_rep, rr_work); | 1298 | container_of(work, struct rpcrdma_rep, rr_work); |
1341 | struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); | 1299 | struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); |
1300 | struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; | ||
1342 | 1301 | ||
1343 | rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); | 1302 | trace_xprtrdma_defer_cmp(rep); |
1344 | rpcrdma_release_rqst(rep->rr_rxprt, req); | 1303 | if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) |
1304 | r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); | ||
1305 | rpcrdma_release_rqst(r_xprt, req); | ||
1345 | rpcrdma_complete_rqst(rep); | 1306 | rpcrdma_complete_rqst(rep); |
1346 | } | 1307 | } |
1347 | 1308 | ||
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
1360 | u32 credits; | 1321 | u32 credits; |
1361 | __be32 *p; | 1322 | __be32 *p; |
1362 | 1323 | ||
1363 | dprintk("RPC: %s: incoming rep %p\n", __func__, rep); | ||
1364 | |||
1365 | if (rep->rr_hdrbuf.head[0].iov_len == 0) | 1324 | if (rep->rr_hdrbuf.head[0].iov_len == 0) |
1366 | goto out_badstatus; | 1325 | goto out_badstatus; |
1367 | 1326 | ||
@@ -1405,14 +1364,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
1405 | rep->rr_rqst = rqst; | 1364 | rep->rr_rqst = rqst; |
1406 | clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); | 1365 | clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); |
1407 | 1366 | ||
1408 | dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", | 1367 | trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); |
1409 | __func__, rep, req, be32_to_cpu(rep->rr_xid)); | ||
1410 | 1368 | ||
1411 | if (list_empty(&req->rl_registered) && | 1369 | queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); |
1412 | !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) | ||
1413 | rpcrdma_complete_rqst(rep); | ||
1414 | else | ||
1415 | queue_work(rpcrdma_receive_wq, &rep->rr_work); | ||
1416 | return; | 1370 | return; |
1417 | 1371 | ||
1418 | out_badstatus: | 1372 | out_badstatus: |
@@ -1424,8 +1378,7 @@ out_badstatus: | |||
1424 | return; | 1378 | return; |
1425 | 1379 | ||
1426 | out_badversion: | 1380 | out_badversion: |
1427 | dprintk("RPC: %s: invalid version %d\n", | 1381 | trace_xprtrdma_reply_vers(rep); |
1428 | __func__, be32_to_cpu(rep->rr_vers)); | ||
1429 | goto repost; | 1382 | goto repost; |
1430 | 1383 | ||
1431 | /* The RPC transaction has already been terminated, or the header | 1384 | /* The RPC transaction has already been terminated, or the header |
@@ -1433,12 +1386,11 @@ out_badversion: | |||
1433 | */ | 1386 | */ |
1434 | out_norqst: | 1387 | out_norqst: |
1435 | spin_unlock(&xprt->recv_lock); | 1388 | spin_unlock(&xprt->recv_lock); |
1436 | dprintk("RPC: %s: no match for incoming xid 0x%08x\n", | 1389 | trace_xprtrdma_reply_rqst(rep); |
1437 | __func__, be32_to_cpu(rep->rr_xid)); | ||
1438 | goto repost; | 1390 | goto repost; |
1439 | 1391 | ||
1440 | out_shortreply: | 1392 | out_shortreply: |
1441 | dprintk("RPC: %s: short/invalid reply\n", __func__); | 1393 | trace_xprtrdma_reply_short(rep); |
1442 | 1394 | ||
1443 | /* If no pending RPC transaction was matched, post a replacement | 1395 | /* If no pending RPC transaction was matched, post a replacement |
1444 | * receive buffer before returning. | 1396 | * receive buffer before returning. |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 646c24494ea7..4b1ecfe979cf 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/slab.h> | 52 | #include <linux/slab.h> |
53 | #include <linux/seq_file.h> | 53 | #include <linux/seq_file.h> |
54 | #include <linux/sunrpc/addr.h> | 54 | #include <linux/sunrpc/addr.h> |
55 | #include <linux/smp.h> | ||
55 | 56 | ||
56 | #include "xprt_rdma.h" | 57 | #include "xprt_rdma.h" |
57 | 58 | ||
@@ -66,8 +67,7 @@ | |||
66 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | 67 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; |
67 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 68 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
68 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 69 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
69 | static unsigned int xprt_rdma_inline_write_padding; | 70 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; |
70 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; | ||
71 | int xprt_rdma_pad_optimize; | 71 | int xprt_rdma_pad_optimize; |
72 | 72 | ||
73 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 73 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
@@ -80,6 +80,7 @@ static unsigned int zero; | |||
80 | static unsigned int max_padding = PAGE_SIZE; | 80 | static unsigned int max_padding = PAGE_SIZE; |
81 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; | 81 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; |
82 | static unsigned int max_memreg = RPCRDMA_LAST - 1; | 82 | static unsigned int max_memreg = RPCRDMA_LAST - 1; |
83 | static unsigned int dummy; | ||
83 | 84 | ||
84 | static struct ctl_table_header *sunrpc_table_header; | 85 | static struct ctl_table_header *sunrpc_table_header; |
85 | 86 | ||
@@ -113,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = { | |||
113 | }, | 114 | }, |
114 | { | 115 | { |
115 | .procname = "rdma_inline_write_padding", | 116 | .procname = "rdma_inline_write_padding", |
116 | .data = &xprt_rdma_inline_write_padding, | 117 | .data = &dummy, |
117 | .maxlen = sizeof(unsigned int), | 118 | .maxlen = sizeof(unsigned int), |
118 | .mode = 0644, | 119 | .mode = 0644, |
119 | .proc_handler = proc_dointvec_minmax, | 120 | .proc_handler = proc_dointvec_minmax, |
@@ -258,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work) | |||
258 | 259 | ||
259 | xprt_clear_connected(xprt); | 260 | xprt_clear_connected(xprt); |
260 | 261 | ||
261 | dprintk("RPC: %s: %sconnect\n", __func__, | ||
262 | r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); | ||
263 | rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); | 262 | rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
264 | if (rc) | 263 | if (rc) |
265 | xprt_wake_pending_tasks(xprt, rc); | 264 | xprt_wake_pending_tasks(xprt, rc); |
266 | 265 | ||
267 | dprintk("RPC: %s: exit\n", __func__); | ||
268 | xprt_clear_connecting(xprt); | 266 | xprt_clear_connecting(xprt); |
269 | } | 267 | } |
270 | 268 | ||
@@ -274,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) | |||
274 | struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, | 272 | struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, |
275 | rx_xprt); | 273 | rx_xprt); |
276 | 274 | ||
277 | pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); | 275 | trace_xprtrdma_inject_dsc(r_xprt); |
278 | rdma_disconnect(r_xprt->rx_ia.ri_id); | 276 | rdma_disconnect(r_xprt->rx_ia.ri_id); |
279 | } | 277 | } |
280 | 278 | ||
@@ -294,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
294 | { | 292 | { |
295 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 293 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
296 | 294 | ||
297 | dprintk("RPC: %s: called\n", __func__); | 295 | trace_xprtrdma_destroy(r_xprt); |
298 | 296 | ||
299 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); | 297 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); |
300 | 298 | ||
@@ -305,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
305 | rpcrdma_ia_close(&r_xprt->rx_ia); | 303 | rpcrdma_ia_close(&r_xprt->rx_ia); |
306 | 304 | ||
307 | xprt_rdma_free_addresses(xprt); | 305 | xprt_rdma_free_addresses(xprt); |
308 | |||
309 | xprt_free(xprt); | 306 | xprt_free(xprt); |
310 | 307 | ||
311 | dprintk("RPC: %s: returning\n", __func__); | ||
312 | |||
313 | module_put(THIS_MODULE); | 308 | module_put(THIS_MODULE); |
314 | } | 309 | } |
315 | 310 | ||
@@ -360,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args) | |||
360 | /* | 355 | /* |
361 | * Set up RDMA-specific connect data. | 356 | * Set up RDMA-specific connect data. |
362 | */ | 357 | */ |
363 | 358 | sap = args->dstaddr; | |
364 | sap = (struct sockaddr *)&cdata.addr; | ||
365 | memcpy(sap, args->dstaddr, args->addrlen); | ||
366 | 359 | ||
367 | /* Ensure xprt->addr holds valid server TCP (not RDMA) | 360 | /* Ensure xprt->addr holds valid server TCP (not RDMA) |
368 | * address, for any side protocols which peek at it */ | 361 | * address, for any side protocols which peek at it */ |
@@ -372,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args) | |||
372 | 365 | ||
373 | if (rpc_get_port(sap)) | 366 | if (rpc_get_port(sap)) |
374 | xprt_set_bound(xprt); | 367 | xprt_set_bound(xprt); |
368 | xprt_rdma_format_addresses(xprt, sap); | ||
375 | 369 | ||
376 | cdata.max_requests = xprt->max_reqs; | 370 | cdata.max_requests = xprt->max_reqs; |
377 | 371 | ||
@@ -386,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args) | |||
386 | if (cdata.inline_rsize > cdata.rsize) | 380 | if (cdata.inline_rsize > cdata.rsize) |
387 | cdata.inline_rsize = cdata.rsize; | 381 | cdata.inline_rsize = cdata.rsize; |
388 | 382 | ||
389 | cdata.padding = xprt_rdma_inline_write_padding; | ||
390 | |||
391 | /* | 383 | /* |
392 | * Create new transport instance, which includes initialized | 384 | * Create new transport instance, which includes initialized |
393 | * o ia | 385 | * o ia |
@@ -397,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args) | |||
397 | 389 | ||
398 | new_xprt = rpcx_to_rdmax(xprt); | 390 | new_xprt = rpcx_to_rdmax(xprt); |
399 | 391 | ||
400 | rc = rpcrdma_ia_open(new_xprt, sap); | 392 | rc = rpcrdma_ia_open(new_xprt); |
401 | if (rc) | 393 | if (rc) |
402 | goto out1; | 394 | goto out1; |
403 | 395 | ||
@@ -406,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args) | |||
406 | */ | 398 | */ |
407 | new_xprt->rx_data = cdata; | 399 | new_xprt->rx_data = cdata; |
408 | new_ep = &new_xprt->rx_ep; | 400 | new_ep = &new_xprt->rx_ep; |
409 | new_ep->rep_remote_addr = cdata.addr; | ||
410 | 401 | ||
411 | rc = rpcrdma_ep_create(&new_xprt->rx_ep, | 402 | rc = rpcrdma_ep_create(&new_xprt->rx_ep, |
412 | &new_xprt->rx_ia, &new_xprt->rx_data); | 403 | &new_xprt->rx_ia, &new_xprt->rx_data); |
413 | if (rc) | 404 | if (rc) |
414 | goto out2; | 405 | goto out2; |
415 | 406 | ||
416 | /* | ||
417 | * Allocate pre-registered send and receive buffers for headers and | ||
418 | * any inline data. Also specify any padding which will be provided | ||
419 | * from a preregistered zero buffer. | ||
420 | */ | ||
421 | rc = rpcrdma_buffer_create(new_xprt); | 407 | rc = rpcrdma_buffer_create(new_xprt); |
422 | if (rc) | 408 | if (rc) |
423 | goto out3; | 409 | goto out3; |
424 | 410 | ||
425 | /* | ||
426 | * Register a callback for connection events. This is necessary because | ||
427 | * connection loss notification is async. We also catch connection loss | ||
428 | * when reaping receives. | ||
429 | */ | ||
430 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, | 411 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, |
431 | xprt_rdma_connect_worker); | 412 | xprt_rdma_connect_worker); |
432 | 413 | ||
433 | xprt_rdma_format_addresses(xprt, sap); | ||
434 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); | 414 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
435 | if (xprt->max_payload == 0) | 415 | if (xprt->max_payload == 0) |
436 | goto out4; | 416 | goto out4; |
@@ -444,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args) | |||
444 | dprintk("RPC: %s: %s:%s\n", __func__, | 424 | dprintk("RPC: %s: %s:%s\n", __func__, |
445 | xprt->address_strings[RPC_DISPLAY_ADDR], | 425 | xprt->address_strings[RPC_DISPLAY_ADDR], |
446 | xprt->address_strings[RPC_DISPLAY_PORT]); | 426 | xprt->address_strings[RPC_DISPLAY_PORT]); |
427 | trace_xprtrdma_create(new_xprt); | ||
447 | return xprt; | 428 | return xprt; |
448 | 429 | ||
449 | out4: | 430 | out4: |
450 | xprt_rdma_free_addresses(xprt); | 431 | rpcrdma_buffer_destroy(&new_xprt->rx_buf); |
451 | rc = -EINVAL; | 432 | rc = -ENODEV; |
452 | out3: | 433 | out3: |
453 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); | 434 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
454 | out2: | 435 | out2: |
455 | rpcrdma_ia_close(&new_xprt->rx_ia); | 436 | rpcrdma_ia_close(&new_xprt->rx_ia); |
456 | out1: | 437 | out1: |
438 | trace_xprtrdma_destroy(new_xprt); | ||
439 | xprt_rdma_free_addresses(xprt); | ||
457 | xprt_free(xprt); | 440 | xprt_free(xprt); |
458 | return ERR_PTR(rc); | 441 | return ERR_PTR(rc); |
459 | } | 442 | } |
@@ -487,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt) | |||
487 | rpcrdma_ep_disconnect(ep, ia); | 470 | rpcrdma_ep_disconnect(ep, ia); |
488 | } | 471 | } |
489 | 472 | ||
473 | /** | ||
474 | * xprt_rdma_set_port - update server port with rpcbind result | ||
475 | * @xprt: controlling RPC transport | ||
476 | * @port: new port value | ||
477 | * | ||
478 | * Transport connect status is unchanged. | ||
479 | */ | ||
490 | static void | 480 | static void |
491 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) | 481 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) |
492 | { | 482 | { |
493 | struct sockaddr_in *sap; | 483 | struct sockaddr *sap = (struct sockaddr *)&xprt->addr; |
484 | char buf[8]; | ||
494 | 485 | ||
495 | sap = (struct sockaddr_in *)&xprt->addr; | 486 | dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", |
496 | sap->sin_port = htons(port); | 487 | __func__, xprt, |
497 | sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; | 488 | xprt->address_strings[RPC_DISPLAY_ADDR], |
498 | sap->sin_port = htons(port); | 489 | xprt->address_strings[RPC_DISPLAY_PORT], |
499 | dprintk("RPC: %s: %u\n", __func__, port); | 490 | port); |
491 | |||
492 | rpc_set_port(sap, port); | ||
493 | |||
494 | kfree(xprt->address_strings[RPC_DISPLAY_PORT]); | ||
495 | snprintf(buf, sizeof(buf), "%u", port); | ||
496 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | ||
497 | |||
498 | kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); | ||
499 | snprintf(buf, sizeof(buf), "%4hx", port); | ||
500 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | ||
500 | } | 501 | } |
501 | 502 | ||
502 | /** | 503 | /** |
@@ -515,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) | |||
515 | static void | 516 | static void |
516 | xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) | 517 | xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) |
517 | { | 518 | { |
518 | dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt); | ||
519 | |||
520 | xprt_force_disconnect(xprt); | 519 | xprt_force_disconnect(xprt); |
521 | } | 520 | } |
522 | 521 | ||
@@ -639,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task) | |||
639 | 638 | ||
640 | req = rpcrdma_buffer_get(&r_xprt->rx_buf); | 639 | req = rpcrdma_buffer_get(&r_xprt->rx_buf); |
641 | if (req == NULL) | 640 | if (req == NULL) |
642 | return -ENOMEM; | 641 | goto out_get; |
643 | 642 | ||
644 | flags = RPCRDMA_DEF_GFP; | 643 | flags = RPCRDMA_DEF_GFP; |
645 | if (RPC_IS_SWAPPER(task)) | 644 | if (RPC_IS_SWAPPER(task)) |
@@ -652,18 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task) | |||
652 | if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) | 651 | if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) |
653 | goto out_fail; | 652 | goto out_fail; |
654 | 653 | ||
655 | dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n", | 654 | req->rl_cpu = smp_processor_id(); |
656 | task->tk_pid, __func__, rqst->rq_callsize, | ||
657 | rqst->rq_rcvsize, req); | ||
658 | |||
659 | req->rl_connect_cookie = 0; /* our reserved value */ | 655 | req->rl_connect_cookie = 0; /* our reserved value */ |
660 | rpcrdma_set_xprtdata(rqst, req); | 656 | rpcrdma_set_xprtdata(rqst, req); |
661 | rqst->rq_buffer = req->rl_sendbuf->rg_base; | 657 | rqst->rq_buffer = req->rl_sendbuf->rg_base; |
662 | rqst->rq_rbuffer = req->rl_recvbuf->rg_base; | 658 | rqst->rq_rbuffer = req->rl_recvbuf->rg_base; |
659 | trace_xprtrdma_allocate(task, req); | ||
663 | return 0; | 660 | return 0; |
664 | 661 | ||
665 | out_fail: | 662 | out_fail: |
666 | rpcrdma_buffer_put(req); | 663 | rpcrdma_buffer_put(req); |
664 | out_get: | ||
665 | trace_xprtrdma_allocate(task, NULL); | ||
667 | return -ENOMEM; | 666 | return -ENOMEM; |
668 | } | 667 | } |
669 | 668 | ||
@@ -680,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task) | |||
680 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 679 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
681 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 680 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
682 | 681 | ||
683 | if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags)) | ||
684 | return; | ||
685 | |||
686 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); | ||
687 | |||
688 | if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) | 682 | if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) |
689 | rpcrdma_release_rqst(r_xprt, req); | 683 | rpcrdma_release_rqst(r_xprt, req); |
684 | trace_xprtrdma_rpc_done(task, req); | ||
690 | rpcrdma_buffer_put(req); | 685 | rpcrdma_buffer_put(req); |
691 | } | 686 | } |
692 | 687 | ||
@@ -696,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task) | |||
696 | * | 691 | * |
697 | * Caller holds the transport's write lock. | 692 | * Caller holds the transport's write lock. |
698 | * | 693 | * |
699 | * Return values: | 694 | * Returns: |
700 | * 0: The request has been sent | 695 | * %0 if the RPC message has been sent |
701 | * ENOTCONN: Caller needs to invoke connect logic then call again | 696 | * %-ENOTCONN if the caller should reconnect and call again |
702 | * ENOBUFS: Call again later to send the request | 697 | * %-ENOBUFS if the caller should call again later |
703 | * EIO: A permanent error occurred. The request was not sent, | 698 | * %-EIO if a permanent error occurred and the request was not |
704 | * and don't try it again | 699 | * sent. Do not try to send this message again. |
705 | * | ||
706 | * send_request invokes the meat of RPC RDMA. It must do the following: | ||
707 | * | ||
708 | * 1. Marshal the RPC request into an RPC RDMA request, which means | ||
709 | * putting a header in front of data, and creating IOVs for RDMA | ||
710 | * from those in the request. | ||
711 | * 2. In marshaling, detect opportunities for RDMA, and use them. | ||
712 | * 3. Post a recv message to set up asynch completion, then send | ||
713 | * the request (rpcrdma_ep_post). | ||
714 | * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). | ||
715 | */ | 700 | */ |
716 | static int | 701 | static int |
717 | xprt_rdma_send_request(struct rpc_task *task) | 702 | xprt_rdma_send_request(struct rpc_task *task) |
@@ -722,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
722 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 707 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
723 | int rc = 0; | 708 | int rc = 0; |
724 | 709 | ||
710 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | ||
711 | if (unlikely(!rqst->rq_buffer)) | ||
712 | return xprt_rdma_bc_send_reply(rqst); | ||
713 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | ||
714 | |||
725 | if (!xprt_connected(xprt)) | 715 | if (!xprt_connected(xprt)) |
726 | goto drop_connection; | 716 | goto drop_connection; |
727 | 717 | ||
728 | /* On retransmit, remove any previously registered chunks */ | ||
729 | if (unlikely(!list_empty(&req->rl_registered))) | ||
730 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, | ||
731 | &req->rl_registered); | ||
732 | |||
733 | rc = rpcrdma_marshal_req(r_xprt, rqst); | 718 | rc = rpcrdma_marshal_req(r_xprt, rqst); |
734 | if (rc < 0) | 719 | if (rc < 0) |
735 | goto failed_marshal; | 720 | goto failed_marshal; |
@@ -742,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
742 | goto drop_connection; | 727 | goto drop_connection; |
743 | req->rl_connect_cookie = xprt->connect_cookie; | 728 | req->rl_connect_cookie = xprt->connect_cookie; |
744 | 729 | ||
745 | set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); | 730 | __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); |
746 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | 731 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) |
747 | goto drop_connection; | 732 | goto drop_connection; |
748 | 733 | ||
@@ -902,8 +887,7 @@ int xprt_rdma_init(void) | |||
902 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", | 887 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", |
903 | xprt_rdma_slot_table_entries, | 888 | xprt_rdma_slot_table_entries, |
904 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); | 889 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); |
905 | dprintk("\tPadding %d\n\tMemreg %d\n", | 890 | dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); |
906 | xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); | ||
907 | 891 | ||
908 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 892 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
909 | if (!sunrpc_table_header) | 893 | if (!sunrpc_table_header) |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 710b3f77db82..f4eb63e8e689 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -71,8 +71,8 @@ | |||
71 | /* | 71 | /* |
72 | * internal functions | 72 | * internal functions |
73 | */ | 73 | */ |
74 | static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); | 74 | static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); |
75 | static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); | 75 | static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); |
76 | static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); | 76 | static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); |
77 | 77 | ||
78 | struct workqueue_struct *rpcrdma_receive_wq __read_mostly; | 78 | struct workqueue_struct *rpcrdma_receive_wq __read_mostly; |
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void) | |||
83 | struct workqueue_struct *recv_wq; | 83 | struct workqueue_struct *recv_wq; |
84 | 84 | ||
85 | recv_wq = alloc_workqueue("xprtrdma_receive", | 85 | recv_wq = alloc_workqueue("xprtrdma_receive", |
86 | WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI, | 86 | WQ_MEM_RECLAIM | WQ_HIGHPRI, |
87 | 0); | 87 | 0); |
88 | if (!recv_wq) | 88 | if (!recv_wq) |
89 | return -ENOMEM; | 89 | return -ENOMEM; |
@@ -108,7 +108,10 @@ static void | |||
108 | rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) | 108 | rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) |
109 | { | 109 | { |
110 | struct rpcrdma_ep *ep = context; | 110 | struct rpcrdma_ep *ep = context; |
111 | struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, | ||
112 | rx_ep); | ||
111 | 113 | ||
114 | trace_xprtrdma_qp_error(r_xprt, event); | ||
112 | pr_err("rpcrdma: %s on device %s ep %p\n", | 115 | pr_err("rpcrdma: %s on device %s ep %p\n", |
113 | ib_event_msg(event->event), event->device->name, context); | 116 | ib_event_msg(event->event), event->device->name, context); |
114 | 117 | ||
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) | |||
133 | container_of(cqe, struct rpcrdma_sendctx, sc_cqe); | 136 | container_of(cqe, struct rpcrdma_sendctx, sc_cqe); |
134 | 137 | ||
135 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 138 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
139 | trace_xprtrdma_wc_send(sc, wc); | ||
136 | if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) | 140 | if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) |
137 | pr_err("rpcrdma: Send: %s (%u/0x%x)\n", | 141 | pr_err("rpcrdma: Send: %s (%u/0x%x)\n", |
138 | ib_wc_status_msg(wc->status), | 142 | ib_wc_status_msg(wc->status), |
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) | |||
155 | rr_cqe); | 159 | rr_cqe); |
156 | 160 | ||
157 | /* WARNING: Only wr_id and status are reliable at this point */ | 161 | /* WARNING: Only wr_id and status are reliable at this point */ |
162 | trace_xprtrdma_wc_receive(rep, wc); | ||
158 | if (wc->status != IB_WC_SUCCESS) | 163 | if (wc->status != IB_WC_SUCCESS) |
159 | goto out_fail; | 164 | goto out_fail; |
160 | 165 | ||
161 | /* status == SUCCESS means all fields in wc are trustworthy */ | 166 | /* status == SUCCESS means all fields in wc are trustworthy */ |
162 | dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", | ||
163 | __func__, rep, wc->byte_len); | ||
164 | |||
165 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); | 167 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); |
166 | rep->rr_wc_flags = wc->wc_flags; | 168 | rep->rr_wc_flags = wc->wc_flags; |
167 | rep->rr_inv_rkey = wc->ex.invalidate_rkey; | 169 | rep->rr_inv_rkey = wc->ex.invalidate_rkey; |
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, | |||
192 | unsigned int rsize, wsize; | 194 | unsigned int rsize, wsize; |
193 | 195 | ||
194 | /* Default settings for RPC-over-RDMA Version One */ | 196 | /* Default settings for RPC-over-RDMA Version One */ |
195 | r_xprt->rx_ia.ri_reminv_expected = false; | ||
196 | r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; | 197 | r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; |
197 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; | 198 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
198 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; | 199 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, | |||
200 | if (pmsg && | 201 | if (pmsg && |
201 | pmsg->cp_magic == rpcrdma_cmp_magic && | 202 | pmsg->cp_magic == rpcrdma_cmp_magic && |
202 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { | 203 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { |
203 | r_xprt->rx_ia.ri_reminv_expected = true; | ||
204 | r_xprt->rx_ia.ri_implicit_roundup = true; | 204 | r_xprt->rx_ia.ri_implicit_roundup = true; |
205 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); | 205 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); |
206 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); | 206 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); |
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
221 | struct rpcrdma_xprt *xprt = id->context; | 221 | struct rpcrdma_xprt *xprt = id->context; |
222 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 222 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
223 | struct rpcrdma_ep *ep = &xprt->rx_ep; | 223 | struct rpcrdma_ep *ep = &xprt->rx_ep; |
224 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
225 | struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; | ||
226 | #endif | ||
227 | int connstate = 0; | 224 | int connstate = 0; |
228 | 225 | ||
226 | trace_xprtrdma_conn_upcall(xprt, event); | ||
229 | switch (event->event) { | 227 | switch (event->event) { |
230 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 228 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
231 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 229 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
234 | break; | 232 | break; |
235 | case RDMA_CM_EVENT_ADDR_ERROR: | 233 | case RDMA_CM_EVENT_ADDR_ERROR: |
236 | ia->ri_async_rc = -EHOSTUNREACH; | 234 | ia->ri_async_rc = -EHOSTUNREACH; |
237 | dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", | ||
238 | __func__, ep); | ||
239 | complete(&ia->ri_done); | 235 | complete(&ia->ri_done); |
240 | break; | 236 | break; |
241 | case RDMA_CM_EVENT_ROUTE_ERROR: | 237 | case RDMA_CM_EVENT_ROUTE_ERROR: |
242 | ia->ri_async_rc = -ENETUNREACH; | 238 | ia->ri_async_rc = -ENETUNREACH; |
243 | dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", | ||
244 | __func__, ep); | ||
245 | complete(&ia->ri_done); | 239 | complete(&ia->ri_done); |
246 | break; | 240 | break; |
247 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | 241 | case RDMA_CM_EVENT_DEVICE_REMOVAL: |
248 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 242 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
249 | pr_info("rpcrdma: removing device %s for %pIS:%u\n", | 243 | pr_info("rpcrdma: removing device %s for %s:%s\n", |
250 | ia->ri_device->name, | 244 | ia->ri_device->name, |
251 | sap, rpc_get_port(sap)); | 245 | rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt)); |
252 | #endif | 246 | #endif |
253 | set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); | 247 | set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); |
254 | ep->rep_connected = -ENODEV; | 248 | ep->rep_connected = -ENODEV; |
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
271 | connstate = -ENETDOWN; | 265 | connstate = -ENETDOWN; |
272 | goto connected; | 266 | goto connected; |
273 | case RDMA_CM_EVENT_REJECTED: | 267 | case RDMA_CM_EVENT_REJECTED: |
274 | dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n", | 268 | dprintk("rpcrdma: connection to %s:%s rejected: %s\n", |
275 | sap, rpc_get_port(sap), | 269 | rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), |
276 | rdma_reject_msg(id, event->status)); | 270 | rdma_reject_msg(id, event->status)); |
277 | connstate = -ECONNREFUSED; | 271 | connstate = -ECONNREFUSED; |
278 | if (event->status == IB_CM_REJ_STALE_CONN) | 272 | if (event->status == IB_CM_REJ_STALE_CONN) |
@@ -287,8 +281,9 @@ connected: | |||
287 | wake_up_all(&ep->rep_connect_wait); | 281 | wake_up_all(&ep->rep_connect_wait); |
288 | /*FALLTHROUGH*/ | 282 | /*FALLTHROUGH*/ |
289 | default: | 283 | default: |
290 | dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n", | 284 | dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n", |
291 | __func__, sap, rpc_get_port(sap), | 285 | __func__, |
286 | rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), | ||
292 | ia->ri_device->name, ia->ri_ops->ro_displayname, | 287 | ia->ri_device->name, ia->ri_ops->ro_displayname, |
293 | ep, rdma_event_msg(event->event)); | 288 | ep, rdma_event_msg(event->event)); |
294 | break; | 289 | break; |
@@ -298,13 +293,14 @@ connected: | |||
298 | } | 293 | } |
299 | 294 | ||
300 | static struct rdma_cm_id * | 295 | static struct rdma_cm_id * |
301 | rpcrdma_create_id(struct rpcrdma_xprt *xprt, | 296 | rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) |
302 | struct rpcrdma_ia *ia, struct sockaddr *addr) | ||
303 | { | 297 | { |
304 | unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; | 298 | unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; |
305 | struct rdma_cm_id *id; | 299 | struct rdma_cm_id *id; |
306 | int rc; | 300 | int rc; |
307 | 301 | ||
302 | trace_xprtrdma_conn_start(xprt); | ||
303 | |||
308 | init_completion(&ia->ri_done); | 304 | init_completion(&ia->ri_done); |
309 | init_completion(&ia->ri_remove_done); | 305 | init_completion(&ia->ri_remove_done); |
310 | 306 | ||
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
318 | } | 314 | } |
319 | 315 | ||
320 | ia->ri_async_rc = -ETIMEDOUT; | 316 | ia->ri_async_rc = -ETIMEDOUT; |
321 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | 317 | rc = rdma_resolve_addr(id, NULL, |
318 | (struct sockaddr *)&xprt->rx_xprt.addr, | ||
319 | RDMA_RESOLVE_TIMEOUT); | ||
322 | if (rc) { | 320 | if (rc) { |
323 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | 321 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
324 | __func__, rc); | 322 | __func__, rc); |
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
326 | } | 324 | } |
327 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); | 325 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
328 | if (rc < 0) { | 326 | if (rc < 0) { |
329 | dprintk("RPC: %s: wait() exited: %i\n", | 327 | trace_xprtrdma_conn_tout(xprt); |
330 | __func__, rc); | ||
331 | goto out; | 328 | goto out; |
332 | } | 329 | } |
333 | 330 | ||
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
344 | } | 341 | } |
345 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); | 342 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
346 | if (rc < 0) { | 343 | if (rc < 0) { |
347 | dprintk("RPC: %s: wait() exited: %i\n", | 344 | trace_xprtrdma_conn_tout(xprt); |
348 | __func__, rc); | ||
349 | goto out; | 345 | goto out; |
350 | } | 346 | } |
351 | rc = ia->ri_async_rc; | 347 | rc = ia->ri_async_rc; |
@@ -365,19 +361,18 @@ out: | |||
365 | 361 | ||
366 | /** | 362 | /** |
367 | * rpcrdma_ia_open - Open and initialize an Interface Adapter. | 363 | * rpcrdma_ia_open - Open and initialize an Interface Adapter. |
368 | * @xprt: controlling transport | 364 | * @xprt: transport with IA to (re)initialize |
369 | * @addr: IP address of remote peer | ||
370 | * | 365 | * |
371 | * Returns 0 on success, negative errno if an appropriate | 366 | * Returns 0 on success, negative errno if an appropriate |
372 | * Interface Adapter could not be found and opened. | 367 | * Interface Adapter could not be found and opened. |
373 | */ | 368 | */ |
374 | int | 369 | int |
375 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) | 370 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt) |
376 | { | 371 | { |
377 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 372 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
378 | int rc; | 373 | int rc; |
379 | 374 | ||
380 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 375 | ia->ri_id = rpcrdma_create_id(xprt, ia); |
381 | if (IS_ERR(ia->ri_id)) { | 376 | if (IS_ERR(ia->ri_id)) { |
382 | rc = PTR_ERR(ia->ri_id); | 377 | rc = PTR_ERR(ia->ri_id); |
383 | goto out_err; | 378 | goto out_err; |
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) | |||
392 | } | 387 | } |
393 | 388 | ||
394 | switch (xprt_rdma_memreg_strategy) { | 389 | switch (xprt_rdma_memreg_strategy) { |
395 | case RPCRDMA_FRMR: | 390 | case RPCRDMA_FRWR: |
396 | if (frwr_is_supported(ia)) { | 391 | if (frwr_is_supported(ia)) { |
397 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | 392 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; |
398 | break; | 393 | break; |
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) | |||
462 | rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); | 457 | rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); |
463 | rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); | 458 | rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); |
464 | } | 459 | } |
465 | rpcrdma_destroy_mrs(buf); | 460 | rpcrdma_mrs_destroy(buf); |
466 | 461 | ||
467 | /* Allow waiters to continue */ | 462 | /* Allow waiters to continue */ |
468 | complete(&ia->ri_remove_done); | 463 | complete(&ia->ri_remove_done); |
464 | |||
465 | trace_xprtrdma_remove(r_xprt); | ||
469 | } | 466 | } |
470 | 467 | ||
471 | /** | 468 | /** |
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) | |||
476 | void | 473 | void |
477 | rpcrdma_ia_close(struct rpcrdma_ia *ia) | 474 | rpcrdma_ia_close(struct rpcrdma_ia *ia) |
478 | { | 475 | { |
479 | dprintk("RPC: %s: entering\n", __func__); | ||
480 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { | 476 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
481 | if (ia->ri_id->qp) | 477 | if (ia->ri_id->qp) |
482 | rdma_destroy_qp(ia->ri_id); | 478 | rdma_destroy_qp(ia->ri_id); |
@@ -630,9 +626,6 @@ out1: | |||
630 | void | 626 | void |
631 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 627 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
632 | { | 628 | { |
633 | dprintk("RPC: %s: entering, connected is %d\n", | ||
634 | __func__, ep->rep_connected); | ||
635 | |||
636 | cancel_delayed_work_sync(&ep->rep_connect_worker); | 629 | cancel_delayed_work_sync(&ep->rep_connect_worker); |
637 | 630 | ||
638 | if (ia->ri_id->qp) { | 631 | if (ia->ri_id->qp) { |
@@ -653,13 +646,12 @@ static int | |||
653 | rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, | 646 | rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, |
654 | struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 647 | struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
655 | { | 648 | { |
656 | struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; | ||
657 | int rc, err; | 649 | int rc, err; |
658 | 650 | ||
659 | pr_info("%s: r_xprt = %p\n", __func__, r_xprt); | 651 | trace_xprtrdma_reinsert(r_xprt); |
660 | 652 | ||
661 | rc = -EHOSTUNREACH; | 653 | rc = -EHOSTUNREACH; |
662 | if (rpcrdma_ia_open(r_xprt, sap)) | 654 | if (rpcrdma_ia_open(r_xprt)) |
663 | goto out1; | 655 | goto out1; |
664 | 656 | ||
665 | rc = -ENOMEM; | 657 | rc = -ENOMEM; |
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, | |||
676 | goto out3; | 668 | goto out3; |
677 | } | 669 | } |
678 | 670 | ||
679 | rpcrdma_create_mrs(r_xprt); | 671 | rpcrdma_mrs_create(r_xprt); |
680 | return 0; | 672 | return 0; |
681 | 673 | ||
682 | out3: | 674 | out3: |
@@ -691,16 +683,15 @@ static int | |||
691 | rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, | 683 | rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, |
692 | struct rpcrdma_ia *ia) | 684 | struct rpcrdma_ia *ia) |
693 | { | 685 | { |
694 | struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; | ||
695 | struct rdma_cm_id *id, *old; | 686 | struct rdma_cm_id *id, *old; |
696 | int err, rc; | 687 | int err, rc; |
697 | 688 | ||
698 | dprintk("RPC: %s: reconnecting...\n", __func__); | 689 | trace_xprtrdma_reconnect(r_xprt); |
699 | 690 | ||
700 | rpcrdma_ep_disconnect(ep, ia); | 691 | rpcrdma_ep_disconnect(ep, ia); |
701 | 692 | ||
702 | rc = -EHOSTUNREACH; | 693 | rc = -EHOSTUNREACH; |
703 | id = rpcrdma_create_id(r_xprt, ia, sap); | 694 | id = rpcrdma_create_id(r_xprt, ia); |
704 | if (IS_ERR(id)) | 695 | if (IS_ERR(id)) |
705 | goto out; | 696 | goto out; |
706 | 697 | ||
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
817 | int rc; | 808 | int rc; |
818 | 809 | ||
819 | rc = rdma_disconnect(ia->ri_id); | 810 | rc = rdma_disconnect(ia->ri_id); |
820 | if (!rc) { | 811 | if (!rc) |
821 | /* returns without wait if not connected */ | 812 | /* returns without wait if not connected */ |
822 | wait_event_interruptible(ep->rep_connect_wait, | 813 | wait_event_interruptible(ep->rep_connect_wait, |
823 | ep->rep_connected != 1); | 814 | ep->rep_connected != 1); |
824 | dprintk("RPC: %s: after wait, %sconnected\n", __func__, | 815 | else |
825 | (ep->rep_connected == 1) ? "still " : "dis"); | ||
826 | } else { | ||
827 | dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); | ||
828 | ep->rep_connected = rc; | 816 | ep->rep_connected = rc; |
829 | } | 817 | trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, |
818 | rx_ep), rc); | ||
830 | 819 | ||
831 | ib_drain_qp(ia->ri_id->qp); | 820 | ib_drain_qp(ia->ri_id->qp); |
832 | } | 821 | } |
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) | |||
998 | { | 987 | { |
999 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, | 988 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, |
1000 | rb_recovery_worker.work); | 989 | rb_recovery_worker.work); |
1001 | struct rpcrdma_mw *mw; | 990 | struct rpcrdma_mr *mr; |
1002 | 991 | ||
1003 | spin_lock(&buf->rb_recovery_lock); | 992 | spin_lock(&buf->rb_recovery_lock); |
1004 | while (!list_empty(&buf->rb_stale_mrs)) { | 993 | while (!list_empty(&buf->rb_stale_mrs)) { |
1005 | mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); | 994 | mr = rpcrdma_mr_pop(&buf->rb_stale_mrs); |
1006 | spin_unlock(&buf->rb_recovery_lock); | 995 | spin_unlock(&buf->rb_recovery_lock); |
1007 | 996 | ||
1008 | dprintk("RPC: %s: recovering MR %p\n", __func__, mw); | 997 | trace_xprtrdma_recover_mr(mr); |
1009 | mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); | 998 | mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr); |
1010 | 999 | ||
1011 | spin_lock(&buf->rb_recovery_lock); | 1000 | spin_lock(&buf->rb_recovery_lock); |
1012 | } | 1001 | } |
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) | |||
1014 | } | 1003 | } |
1015 | 1004 | ||
1016 | void | 1005 | void |
1017 | rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) | 1006 | rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr) |
1018 | { | 1007 | { |
1019 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | 1008 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
1020 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1009 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1021 | 1010 | ||
1022 | spin_lock(&buf->rb_recovery_lock); | 1011 | spin_lock(&buf->rb_recovery_lock); |
1023 | rpcrdma_push_mw(mw, &buf->rb_stale_mrs); | 1012 | rpcrdma_mr_push(mr, &buf->rb_stale_mrs); |
1024 | spin_unlock(&buf->rb_recovery_lock); | 1013 | spin_unlock(&buf->rb_recovery_lock); |
1025 | 1014 | ||
1026 | schedule_delayed_work(&buf->rb_recovery_worker, 0); | 1015 | schedule_delayed_work(&buf->rb_recovery_worker, 0); |
1027 | } | 1016 | } |
1028 | 1017 | ||
1029 | static void | 1018 | static void |
1030 | rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) | 1019 | rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) |
1031 | { | 1020 | { |
1032 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1021 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1033 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1022 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) | |||
1036 | LIST_HEAD(all); | 1025 | LIST_HEAD(all); |
1037 | 1026 | ||
1038 | for (count = 0; count < 32; count++) { | 1027 | for (count = 0; count < 32; count++) { |
1039 | struct rpcrdma_mw *mw; | 1028 | struct rpcrdma_mr *mr; |
1040 | int rc; | 1029 | int rc; |
1041 | 1030 | ||
1042 | mw = kzalloc(sizeof(*mw), GFP_KERNEL); | 1031 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
1043 | if (!mw) | 1032 | if (!mr) |
1044 | break; | 1033 | break; |
1045 | 1034 | ||
1046 | rc = ia->ri_ops->ro_init_mr(ia, mw); | 1035 | rc = ia->ri_ops->ro_init_mr(ia, mr); |
1047 | if (rc) { | 1036 | if (rc) { |
1048 | kfree(mw); | 1037 | kfree(mr); |
1049 | break; | 1038 | break; |
1050 | } | 1039 | } |
1051 | 1040 | ||
1052 | mw->mw_xprt = r_xprt; | 1041 | mr->mr_xprt = r_xprt; |
1053 | 1042 | ||
1054 | list_add(&mw->mw_list, &free); | 1043 | list_add(&mr->mr_list, &free); |
1055 | list_add(&mw->mw_all, &all); | 1044 | list_add(&mr->mr_all, &all); |
1056 | } | 1045 | } |
1057 | 1046 | ||
1058 | spin_lock(&buf->rb_mwlock); | 1047 | spin_lock(&buf->rb_mrlock); |
1059 | list_splice(&free, &buf->rb_mws); | 1048 | list_splice(&free, &buf->rb_mrs); |
1060 | list_splice(&all, &buf->rb_all); | 1049 | list_splice(&all, &buf->rb_all); |
1061 | r_xprt->rx_stats.mrs_allocated += count; | 1050 | r_xprt->rx_stats.mrs_allocated += count; |
1062 | spin_unlock(&buf->rb_mwlock); | 1051 | spin_unlock(&buf->rb_mrlock); |
1063 | 1052 | ||
1064 | dprintk("RPC: %s: created %u MRs\n", __func__, count); | 1053 | trace_xprtrdma_createmrs(r_xprt, count); |
1065 | } | 1054 | } |
1066 | 1055 | ||
1067 | static void | 1056 | static void |
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) | |||
1072 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | 1061 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
1073 | rx_buf); | 1062 | rx_buf); |
1074 | 1063 | ||
1075 | rpcrdma_create_mrs(r_xprt); | 1064 | rpcrdma_mrs_create(r_xprt); |
1076 | } | 1065 | } |
1077 | 1066 | ||
1078 | struct rpcrdma_req * | 1067 | struct rpcrdma_req * |
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) | |||
1093 | return req; | 1082 | return req; |
1094 | } | 1083 | } |
1095 | 1084 | ||
1096 | struct rpcrdma_rep * | 1085 | /** |
1086 | * rpcrdma_create_rep - Allocate an rpcrdma_rep object | ||
1087 | * @r_xprt: controlling transport | ||
1088 | * | ||
1089 | * Returns 0 on success or a negative errno on failure. | ||
1090 | */ | ||
1091 | int | ||
1097 | rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) | 1092 | rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) |
1098 | { | 1093 | { |
1099 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | 1094 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
1095 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1100 | struct rpcrdma_rep *rep; | 1096 | struct rpcrdma_rep *rep; |
1101 | int rc; | 1097 | int rc; |
1102 | 1098 | ||
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) | |||
1121 | rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; | 1117 | rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; |
1122 | rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; | 1118 | rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; |
1123 | rep->rr_recv_wr.num_sge = 1; | 1119 | rep->rr_recv_wr.num_sge = 1; |
1124 | return rep; | 1120 | |
1121 | spin_lock(&buf->rb_lock); | ||
1122 | list_add(&rep->rr_list, &buf->rb_recv_bufs); | ||
1123 | spin_unlock(&buf->rb_lock); | ||
1124 | return 0; | ||
1125 | 1125 | ||
1126 | out_free: | 1126 | out_free: |
1127 | kfree(rep); | 1127 | kfree(rep); |
1128 | out: | 1128 | out: |
1129 | return ERR_PTR(rc); | 1129 | dprintk("RPC: %s: reply buffer %d alloc failed\n", |
1130 | __func__, rc); | ||
1131 | return rc; | ||
1130 | } | 1132 | } |
1131 | 1133 | ||
1132 | int | 1134 | int |
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1137 | 1139 | ||
1138 | buf->rb_max_requests = r_xprt->rx_data.max_requests; | 1140 | buf->rb_max_requests = r_xprt->rx_data.max_requests; |
1139 | buf->rb_bc_srv_max_requests = 0; | 1141 | buf->rb_bc_srv_max_requests = 0; |
1140 | spin_lock_init(&buf->rb_mwlock); | 1142 | spin_lock_init(&buf->rb_mrlock); |
1141 | spin_lock_init(&buf->rb_lock); | 1143 | spin_lock_init(&buf->rb_lock); |
1142 | spin_lock_init(&buf->rb_recovery_lock); | 1144 | spin_lock_init(&buf->rb_recovery_lock); |
1143 | INIT_LIST_HEAD(&buf->rb_mws); | 1145 | INIT_LIST_HEAD(&buf->rb_mrs); |
1144 | INIT_LIST_HEAD(&buf->rb_all); | 1146 | INIT_LIST_HEAD(&buf->rb_all); |
1145 | INIT_LIST_HEAD(&buf->rb_stale_mrs); | 1147 | INIT_LIST_HEAD(&buf->rb_stale_mrs); |
1146 | INIT_DELAYED_WORK(&buf->rb_refresh_worker, | 1148 | INIT_DELAYED_WORK(&buf->rb_refresh_worker, |
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1148 | INIT_DELAYED_WORK(&buf->rb_recovery_worker, | 1150 | INIT_DELAYED_WORK(&buf->rb_recovery_worker, |
1149 | rpcrdma_mr_recovery_worker); | 1151 | rpcrdma_mr_recovery_worker); |
1150 | 1152 | ||
1151 | rpcrdma_create_mrs(r_xprt); | 1153 | rpcrdma_mrs_create(r_xprt); |
1152 | 1154 | ||
1153 | INIT_LIST_HEAD(&buf->rb_send_bufs); | 1155 | INIT_LIST_HEAD(&buf->rb_send_bufs); |
1154 | INIT_LIST_HEAD(&buf->rb_allreqs); | 1156 | INIT_LIST_HEAD(&buf->rb_allreqs); |
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1167 | } | 1169 | } |
1168 | 1170 | ||
1169 | INIT_LIST_HEAD(&buf->rb_recv_bufs); | 1171 | INIT_LIST_HEAD(&buf->rb_recv_bufs); |
1170 | for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { | 1172 | for (i = 0; i <= buf->rb_max_requests; i++) { |
1171 | struct rpcrdma_rep *rep; | 1173 | rc = rpcrdma_create_rep(r_xprt); |
1172 | 1174 | if (rc) | |
1173 | rep = rpcrdma_create_rep(r_xprt); | ||
1174 | if (IS_ERR(rep)) { | ||
1175 | dprintk("RPC: %s: reply buffer %d alloc failed\n", | ||
1176 | __func__, i); | ||
1177 | rc = PTR_ERR(rep); | ||
1178 | goto out; | 1175 | goto out; |
1179 | } | ||
1180 | list_add(&rep->rr_list, &buf->rb_recv_bufs); | ||
1181 | } | 1176 | } |
1182 | 1177 | ||
1183 | rc = rpcrdma_sendctxs_create(r_xprt); | 1178 | rc = rpcrdma_sendctxs_create(r_xprt); |
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req) | |||
1229 | } | 1224 | } |
1230 | 1225 | ||
1231 | static void | 1226 | static void |
1232 | rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) | 1227 | rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) |
1233 | { | 1228 | { |
1234 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | 1229 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
1235 | rx_buf); | 1230 | rx_buf); |
1236 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1231 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
1237 | struct rpcrdma_mw *mw; | 1232 | struct rpcrdma_mr *mr; |
1238 | unsigned int count; | 1233 | unsigned int count; |
1239 | 1234 | ||
1240 | count = 0; | 1235 | count = 0; |
1241 | spin_lock(&buf->rb_mwlock); | 1236 | spin_lock(&buf->rb_mrlock); |
1242 | while (!list_empty(&buf->rb_all)) { | 1237 | while (!list_empty(&buf->rb_all)) { |
1243 | mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | 1238 | mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all); |
1244 | list_del(&mw->mw_all); | 1239 | list_del(&mr->mr_all); |
1245 | 1240 | ||
1246 | spin_unlock(&buf->rb_mwlock); | 1241 | spin_unlock(&buf->rb_mrlock); |
1247 | ia->ri_ops->ro_release_mr(mw); | 1242 | ia->ri_ops->ro_release_mr(mr); |
1248 | count++; | 1243 | count++; |
1249 | spin_lock(&buf->rb_mwlock); | 1244 | spin_lock(&buf->rb_mrlock); |
1250 | } | 1245 | } |
1251 | spin_unlock(&buf->rb_mwlock); | 1246 | spin_unlock(&buf->rb_mrlock); |
1252 | r_xprt->rx_stats.mrs_allocated = 0; | 1247 | r_xprt->rx_stats.mrs_allocated = 0; |
1253 | 1248 | ||
1254 | dprintk("RPC: %s: released %u MRs\n", __func__, count); | 1249 | dprintk("RPC: %s: released %u MRs\n", __func__, count); |
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1285 | spin_unlock(&buf->rb_reqslock); | 1280 | spin_unlock(&buf->rb_reqslock); |
1286 | buf->rb_recv_count = 0; | 1281 | buf->rb_recv_count = 0; |
1287 | 1282 | ||
1288 | rpcrdma_destroy_mrs(buf); | 1283 | rpcrdma_mrs_destroy(buf); |
1289 | } | 1284 | } |
1290 | 1285 | ||
1291 | struct rpcrdma_mw * | 1286 | /** |
1292 | rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) | 1287 | * rpcrdma_mr_get - Allocate an rpcrdma_mr object |
1288 | * @r_xprt: controlling transport | ||
1289 | * | ||
1290 | * Returns an initialized rpcrdma_mr or NULL if no free | ||
1291 | * rpcrdma_mr objects are available. | ||
1292 | */ | ||
1293 | struct rpcrdma_mr * | ||
1294 | rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) | ||
1293 | { | 1295 | { |
1294 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1296 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1295 | struct rpcrdma_mw *mw = NULL; | 1297 | struct rpcrdma_mr *mr = NULL; |
1296 | 1298 | ||
1297 | spin_lock(&buf->rb_mwlock); | 1299 | spin_lock(&buf->rb_mrlock); |
1298 | if (!list_empty(&buf->rb_mws)) | 1300 | if (!list_empty(&buf->rb_mrs)) |
1299 | mw = rpcrdma_pop_mw(&buf->rb_mws); | 1301 | mr = rpcrdma_mr_pop(&buf->rb_mrs); |
1300 | spin_unlock(&buf->rb_mwlock); | 1302 | spin_unlock(&buf->rb_mrlock); |
1301 | 1303 | ||
1302 | if (!mw) | 1304 | if (!mr) |
1303 | goto out_nomws; | 1305 | goto out_nomrs; |
1304 | mw->mw_flags = 0; | 1306 | return mr; |
1305 | return mw; | ||
1306 | 1307 | ||
1307 | out_nomws: | 1308 | out_nomrs: |
1308 | dprintk("RPC: %s: no MWs available\n", __func__); | 1309 | trace_xprtrdma_nomrs(r_xprt); |
1309 | if (r_xprt->rx_ep.rep_connected != -ENODEV) | 1310 | if (r_xprt->rx_ep.rep_connected != -ENODEV) |
1310 | schedule_delayed_work(&buf->rb_refresh_worker, 0); | 1311 | schedule_delayed_work(&buf->rb_refresh_worker, 0); |
1311 | 1312 | ||
@@ -1315,14 +1316,39 @@ out_nomws: | |||
1315 | return NULL; | 1316 | return NULL; |
1316 | } | 1317 | } |
1317 | 1318 | ||
1319 | static void | ||
1320 | __rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr) | ||
1321 | { | ||
1322 | spin_lock(&buf->rb_mrlock); | ||
1323 | rpcrdma_mr_push(mr, &buf->rb_mrs); | ||
1324 | spin_unlock(&buf->rb_mrlock); | ||
1325 | } | ||
1326 | |||
1327 | /** | ||
1328 | * rpcrdma_mr_put - Release an rpcrdma_mr object | ||
1329 | * @mr: object to release | ||
1330 | * | ||
1331 | */ | ||
1318 | void | 1332 | void |
1319 | rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) | 1333 | rpcrdma_mr_put(struct rpcrdma_mr *mr) |
1320 | { | 1334 | { |
1321 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1335 | __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr); |
1336 | } | ||
1337 | |||
1338 | /** | ||
1339 | * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it | ||
1340 | * @mr: object to release | ||
1341 | * | ||
1342 | */ | ||
1343 | void | ||
1344 | rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) | ||
1345 | { | ||
1346 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | ||
1322 | 1347 | ||
1323 | spin_lock(&buf->rb_mwlock); | 1348 | trace_xprtrdma_dma_unmap(mr); |
1324 | rpcrdma_push_mw(mw, &buf->rb_mws); | 1349 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
1325 | spin_unlock(&buf->rb_mwlock); | 1350 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
1351 | __rpcrdma_mr_put(&r_xprt->rx_buf, mr); | ||
1326 | } | 1352 | } |
1327 | 1353 | ||
1328 | static struct rpcrdma_rep * | 1354 | static struct rpcrdma_rep * |
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1359 | req = rpcrdma_buffer_get_req_locked(buffers); | 1385 | req = rpcrdma_buffer_get_req_locked(buffers); |
1360 | req->rl_reply = rpcrdma_buffer_get_rep(buffers); | 1386 | req->rl_reply = rpcrdma_buffer_get_rep(buffers); |
1361 | spin_unlock(&buffers->rb_lock); | 1387 | spin_unlock(&buffers->rb_lock); |
1388 | |||
1362 | return req; | 1389 | return req; |
1363 | 1390 | ||
1364 | out_reqbuf: | 1391 | out_reqbuf: |
1365 | spin_unlock(&buffers->rb_lock); | 1392 | spin_unlock(&buffers->rb_lock); |
1366 | pr_warn("RPC: %s: out of request buffers\n", __func__); | ||
1367 | return NULL; | 1393 | return NULL; |
1368 | } | 1394 | } |
1369 | 1395 | ||
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
1519 | req->rl_reply = NULL; | 1545 | req->rl_reply = NULL; |
1520 | } | 1546 | } |
1521 | 1547 | ||
1522 | dprintk("RPC: %s: posting %d s/g entries\n", | ||
1523 | __func__, send_wr->num_sge); | ||
1524 | |||
1525 | if (!ep->rep_send_count || | 1548 | if (!ep->rep_send_count || |
1526 | test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { | 1549 | test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { |
1527 | send_wr->send_flags |= IB_SEND_SIGNALED; | 1550 | send_wr->send_flags |= IB_SEND_SIGNALED; |
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
1530 | send_wr->send_flags &= ~IB_SEND_SIGNALED; | 1553 | send_wr->send_flags &= ~IB_SEND_SIGNALED; |
1531 | --ep->rep_send_count; | 1554 | --ep->rep_send_count; |
1532 | } | 1555 | } |
1556 | |||
1533 | rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); | 1557 | rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); |
1558 | trace_xprtrdma_post_send(req, rc); | ||
1534 | if (rc) | 1559 | if (rc) |
1535 | goto out_postsend_err; | 1560 | return -ENOTCONN; |
1536 | return 0; | 1561 | return 0; |
1537 | |||
1538 | out_postsend_err: | ||
1539 | pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc); | ||
1540 | return -ENOTCONN; | ||
1541 | } | 1562 | } |
1542 | 1563 | ||
1543 | int | 1564 | int |
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
1550 | if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) | 1571 | if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) |
1551 | goto out_map; | 1572 | goto out_map; |
1552 | rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); | 1573 | rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); |
1574 | trace_xprtrdma_post_recv(rep, rc); | ||
1553 | if (rc) | 1575 | if (rc) |
1554 | goto out_postrecv; | 1576 | return -ENOTCONN; |
1555 | return 0; | 1577 | return 0; |
1556 | 1578 | ||
1557 | out_map: | 1579 | out_map: |
1558 | pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); | 1580 | pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); |
1559 | return -EIO; | 1581 | return -EIO; |
1560 | |||
1561 | out_postrecv: | ||
1562 | pr_err("rpcrdma: ib_post_recv returned %i\n", rc); | ||
1563 | return -ENOTCONN; | ||
1564 | } | 1582 | } |
1565 | 1583 | ||
1566 | /** | 1584 | /** |
1567 | * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests | 1585 | * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests |
1568 | * @r_xprt: transport associated with these backchannel resources | 1586 | * @r_xprt: transport associated with these backchannel resources |
1569 | * @min_reqs: minimum number of incoming requests expected | 1587 | * @count: minimum number of incoming requests expected |
1570 | * | 1588 | * |
1571 | * Returns zero if all requested buffers were posted, or a negative errno. | 1589 | * Returns zero if all requested buffers were posted, or a negative errno. |
1572 | */ | 1590 | */ |
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1594 | 1612 | ||
1595 | out_reqbuf: | 1613 | out_reqbuf: |
1596 | spin_unlock(&buffers->rb_lock); | 1614 | spin_unlock(&buffers->rb_lock); |
1597 | pr_warn("%s: no extra receive buffers\n", __func__); | 1615 | trace_xprtrdma_noreps(r_xprt); |
1598 | return -ENOMEM; | 1616 | return -ENOMEM; |
1599 | 1617 | ||
1600 | out_rc: | 1618 | out_rc: |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 51686d9eac5f..69883a960a3f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -73,11 +73,10 @@ struct rpcrdma_ia { | |||
73 | struct completion ri_remove_done; | 73 | struct completion ri_remove_done; |
74 | int ri_async_rc; | 74 | int ri_async_rc; |
75 | unsigned int ri_max_segs; | 75 | unsigned int ri_max_segs; |
76 | unsigned int ri_max_frmr_depth; | 76 | unsigned int ri_max_frwr_depth; |
77 | unsigned int ri_max_inline_write; | 77 | unsigned int ri_max_inline_write; |
78 | unsigned int ri_max_inline_read; | 78 | unsigned int ri_max_inline_read; |
79 | unsigned int ri_max_send_sges; | 79 | unsigned int ri_max_send_sges; |
80 | bool ri_reminv_expected; | ||
81 | bool ri_implicit_roundup; | 80 | bool ri_implicit_roundup; |
82 | enum ib_mr_type ri_mrtype; | 81 | enum ib_mr_type ri_mrtype; |
83 | unsigned long ri_flags; | 82 | unsigned long ri_flags; |
@@ -101,7 +100,6 @@ struct rpcrdma_ep { | |||
101 | wait_queue_head_t rep_connect_wait; | 100 | wait_queue_head_t rep_connect_wait; |
102 | struct rpcrdma_connect_private rep_cm_private; | 101 | struct rpcrdma_connect_private rep_cm_private; |
103 | struct rdma_conn_param rep_remote_cma; | 102 | struct rdma_conn_param rep_remote_cma; |
104 | struct sockaddr_storage rep_remote_addr; | ||
105 | struct delayed_work rep_connect_worker; | 103 | struct delayed_work rep_connect_worker; |
106 | }; | 104 | }; |
107 | 105 | ||
@@ -232,29 +230,29 @@ enum { | |||
232 | }; | 230 | }; |
233 | 231 | ||
234 | /* | 232 | /* |
235 | * struct rpcrdma_mw - external memory region metadata | 233 | * struct rpcrdma_mr - external memory region metadata |
236 | * | 234 | * |
237 | * An external memory region is any buffer or page that is registered | 235 | * An external memory region is any buffer or page that is registered |
238 | * on the fly (ie, not pre-registered). | 236 | * on the fly (ie, not pre-registered). |
239 | * | 237 | * |
240 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During | 238 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During |
241 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in | 239 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in |
242 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep | 240 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep |
243 | * track of registration metadata while each RPC is pending. | 241 | * track of registration metadata while each RPC is pending. |
244 | * rpcrdma_deregister_external() uses this metadata to unmap and | 242 | * rpcrdma_deregister_external() uses this metadata to unmap and |
245 | * release these resources when an RPC is complete. | 243 | * release these resources when an RPC is complete. |
246 | */ | 244 | */ |
247 | enum rpcrdma_frmr_state { | 245 | enum rpcrdma_frwr_state { |
248 | FRMR_IS_INVALID, /* ready to be used */ | 246 | FRWR_IS_INVALID, /* ready to be used */ |
249 | FRMR_IS_VALID, /* in use */ | 247 | FRWR_IS_VALID, /* in use */ |
250 | FRMR_FLUSHED_FR, /* flushed FASTREG WR */ | 248 | FRWR_FLUSHED_FR, /* flushed FASTREG WR */ |
251 | FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ | 249 | FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ |
252 | }; | 250 | }; |
253 | 251 | ||
254 | struct rpcrdma_frmr { | 252 | struct rpcrdma_frwr { |
255 | struct ib_mr *fr_mr; | 253 | struct ib_mr *fr_mr; |
256 | struct ib_cqe fr_cqe; | 254 | struct ib_cqe fr_cqe; |
257 | enum rpcrdma_frmr_state fr_state; | 255 | enum rpcrdma_frwr_state fr_state; |
258 | struct completion fr_linv_done; | 256 | struct completion fr_linv_done; |
259 | union { | 257 | union { |
260 | struct ib_reg_wr fr_regwr; | 258 | struct ib_reg_wr fr_regwr; |
@@ -267,26 +265,20 @@ struct rpcrdma_fmr { | |||
267 | u64 *fm_physaddrs; | 265 | u64 *fm_physaddrs; |
268 | }; | 266 | }; |
269 | 267 | ||
270 | struct rpcrdma_mw { | 268 | struct rpcrdma_mr { |
271 | struct list_head mw_list; | 269 | struct list_head mr_list; |
272 | struct scatterlist *mw_sg; | 270 | struct scatterlist *mr_sg; |
273 | int mw_nents; | 271 | int mr_nents; |
274 | enum dma_data_direction mw_dir; | 272 | enum dma_data_direction mr_dir; |
275 | unsigned long mw_flags; | ||
276 | union { | 273 | union { |
277 | struct rpcrdma_fmr fmr; | 274 | struct rpcrdma_fmr fmr; |
278 | struct rpcrdma_frmr frmr; | 275 | struct rpcrdma_frwr frwr; |
279 | }; | 276 | }; |
280 | struct rpcrdma_xprt *mw_xprt; | 277 | struct rpcrdma_xprt *mr_xprt; |
281 | u32 mw_handle; | 278 | u32 mr_handle; |
282 | u32 mw_length; | 279 | u32 mr_length; |
283 | u64 mw_offset; | 280 | u64 mr_offset; |
284 | struct list_head mw_all; | 281 | struct list_head mr_all; |
285 | }; | ||
286 | |||
287 | /* mw_flags */ | ||
288 | enum { | ||
289 | RPCRDMA_MW_F_RI = 1, | ||
290 | }; | 282 | }; |
291 | 283 | ||
292 | /* | 284 | /* |
@@ -342,6 +334,7 @@ enum { | |||
342 | struct rpcrdma_buffer; | 334 | struct rpcrdma_buffer; |
343 | struct rpcrdma_req { | 335 | struct rpcrdma_req { |
344 | struct list_head rl_list; | 336 | struct list_head rl_list; |
337 | int rl_cpu; | ||
345 | unsigned int rl_connect_cookie; | 338 | unsigned int rl_connect_cookie; |
346 | struct rpcrdma_buffer *rl_buffer; | 339 | struct rpcrdma_buffer *rl_buffer; |
347 | struct rpcrdma_rep *rl_reply; | 340 | struct rpcrdma_rep *rl_reply; |
@@ -361,8 +354,7 @@ struct rpcrdma_req { | |||
361 | 354 | ||
362 | /* rl_flags */ | 355 | /* rl_flags */ |
363 | enum { | 356 | enum { |
364 | RPCRDMA_REQ_F_BACKCHANNEL = 0, | 357 | RPCRDMA_REQ_F_PENDING = 0, |
365 | RPCRDMA_REQ_F_PENDING, | ||
366 | RPCRDMA_REQ_F_TX_RESOURCES, | 358 | RPCRDMA_REQ_F_TX_RESOURCES, |
367 | }; | 359 | }; |
368 | 360 | ||
@@ -373,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) | |||
373 | } | 365 | } |
374 | 366 | ||
375 | static inline struct rpcrdma_req * | 367 | static inline struct rpcrdma_req * |
376 | rpcr_to_rdmar(struct rpc_rqst *rqst) | 368 | rpcr_to_rdmar(const struct rpc_rqst *rqst) |
377 | { | 369 | { |
378 | return rqst->rq_xprtdata; | 370 | return rqst->rq_xprtdata; |
379 | } | 371 | } |
380 | 372 | ||
381 | static inline void | 373 | static inline void |
382 | rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) | 374 | rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list) |
383 | { | 375 | { |
384 | list_add_tail(&mw->mw_list, list); | 376 | list_add_tail(&mr->mr_list, list); |
385 | } | 377 | } |
386 | 378 | ||
387 | static inline struct rpcrdma_mw * | 379 | static inline struct rpcrdma_mr * |
388 | rpcrdma_pop_mw(struct list_head *list) | 380 | rpcrdma_mr_pop(struct list_head *list) |
389 | { | 381 | { |
390 | struct rpcrdma_mw *mw; | 382 | struct rpcrdma_mr *mr; |
391 | 383 | ||
392 | mw = list_first_entry(list, struct rpcrdma_mw, mw_list); | 384 | mr = list_first_entry(list, struct rpcrdma_mr, mr_list); |
393 | list_del(&mw->mw_list); | 385 | list_del(&mr->mr_list); |
394 | return mw; | 386 | return mr; |
395 | } | 387 | } |
396 | 388 | ||
397 | /* | 389 | /* |
@@ -401,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list) | |||
401 | * One of these is associated with a transport instance | 393 | * One of these is associated with a transport instance |
402 | */ | 394 | */ |
403 | struct rpcrdma_buffer { | 395 | struct rpcrdma_buffer { |
404 | spinlock_t rb_mwlock; /* protect rb_mws list */ | 396 | spinlock_t rb_mrlock; /* protect rb_mrs list */ |
405 | struct list_head rb_mws; | 397 | struct list_head rb_mrs; |
406 | struct list_head rb_all; | 398 | struct list_head rb_all; |
407 | 399 | ||
408 | unsigned long rb_sc_head; | 400 | unsigned long rb_sc_head; |
@@ -437,13 +429,11 @@ struct rpcrdma_buffer { | |||
437 | * This data should be set with mount options | 429 | * This data should be set with mount options |
438 | */ | 430 | */ |
439 | struct rpcrdma_create_data_internal { | 431 | struct rpcrdma_create_data_internal { |
440 | struct sockaddr_storage addr; /* RDMA server address */ | ||
441 | unsigned int max_requests; /* max requests (slots) in flight */ | 432 | unsigned int max_requests; /* max requests (slots) in flight */ |
442 | unsigned int rsize; /* mount rsize - max read hdr+data */ | 433 | unsigned int rsize; /* mount rsize - max read hdr+data */ |
443 | unsigned int wsize; /* mount wsize - max write hdr+data */ | 434 | unsigned int wsize; /* mount wsize - max write hdr+data */ |
444 | unsigned int inline_rsize; /* max non-rdma read data payload */ | 435 | unsigned int inline_rsize; /* max non-rdma read data payload */ |
445 | unsigned int inline_wsize; /* max non-rdma write data payload */ | 436 | unsigned int inline_wsize; /* max non-rdma write data payload */ |
446 | unsigned int padding; /* non-rdma write header padding */ | ||
447 | }; | 437 | }; |
448 | 438 | ||
449 | /* | 439 | /* |
@@ -483,17 +473,19 @@ struct rpcrdma_memreg_ops { | |||
483 | struct rpcrdma_mr_seg * | 473 | struct rpcrdma_mr_seg * |
484 | (*ro_map)(struct rpcrdma_xprt *, | 474 | (*ro_map)(struct rpcrdma_xprt *, |
485 | struct rpcrdma_mr_seg *, int, bool, | 475 | struct rpcrdma_mr_seg *, int, bool, |
486 | struct rpcrdma_mw **); | 476 | struct rpcrdma_mr **); |
477 | void (*ro_reminv)(struct rpcrdma_rep *rep, | ||
478 | struct list_head *mrs); | ||
487 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | 479 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
488 | struct list_head *); | 480 | struct list_head *); |
489 | void (*ro_recover_mr)(struct rpcrdma_mw *); | 481 | void (*ro_recover_mr)(struct rpcrdma_mr *mr); |
490 | int (*ro_open)(struct rpcrdma_ia *, | 482 | int (*ro_open)(struct rpcrdma_ia *, |
491 | struct rpcrdma_ep *, | 483 | struct rpcrdma_ep *, |
492 | struct rpcrdma_create_data_internal *); | 484 | struct rpcrdma_create_data_internal *); |
493 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | 485 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
494 | int (*ro_init_mr)(struct rpcrdma_ia *, | 486 | int (*ro_init_mr)(struct rpcrdma_ia *, |
495 | struct rpcrdma_mw *); | 487 | struct rpcrdma_mr *); |
496 | void (*ro_release_mr)(struct rpcrdma_mw *); | 488 | void (*ro_release_mr)(struct rpcrdma_mr *mr); |
497 | const char *ro_displayname; | 489 | const char *ro_displayname; |
498 | const int ro_send_w_inv_ok; | 490 | const int ro_send_w_inv_ok; |
499 | }; | 491 | }; |
@@ -524,6 +516,18 @@ struct rpcrdma_xprt { | |||
524 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) | 516 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) |
525 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) | 517 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
526 | 518 | ||
519 | static inline const char * | ||
520 | rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) | ||
521 | { | ||
522 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]; | ||
523 | } | ||
524 | |||
525 | static inline const char * | ||
526 | rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt) | ||
527 | { | ||
528 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT]; | ||
529 | } | ||
530 | |||
527 | /* Setting this to 0 ensures interoperability with early servers. | 531 | /* Setting this to 0 ensures interoperability with early servers. |
528 | * Setting this to 1 enhances certain unaligned read/write performance. | 532 | * Setting this to 1 enhances certain unaligned read/write performance. |
529 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | 533 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ |
@@ -537,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy; | |||
537 | /* | 541 | /* |
538 | * Interface Adapter calls - xprtrdma/verbs.c | 542 | * Interface Adapter calls - xprtrdma/verbs.c |
539 | */ | 543 | */ |
540 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); | 544 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); |
541 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); | 545 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
542 | void rpcrdma_ia_close(struct rpcrdma_ia *); | 546 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
543 | bool frwr_is_supported(struct rpcrdma_ia *); | 547 | bool frwr_is_supported(struct rpcrdma_ia *); |
@@ -563,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); | |||
563 | * Buffer calls - xprtrdma/verbs.c | 567 | * Buffer calls - xprtrdma/verbs.c |
564 | */ | 568 | */ |
565 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); | 569 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
566 | struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); | ||
567 | void rpcrdma_destroy_req(struct rpcrdma_req *); | 570 | void rpcrdma_destroy_req(struct rpcrdma_req *); |
571 | int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt); | ||
568 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); | 572 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
569 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); | 573 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
570 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); | 574 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); |
571 | void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); | 575 | void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); |
572 | 576 | ||
573 | struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); | 577 | struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); |
574 | void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); | 578 | void rpcrdma_mr_put(struct rpcrdma_mr *mr); |
579 | void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); | ||
580 | void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr); | ||
581 | |||
575 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); | 582 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
576 | void rpcrdma_buffer_put(struct rpcrdma_req *); | 583 | void rpcrdma_buffer_put(struct rpcrdma_req *); |
577 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 584 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
578 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 585 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
579 | 586 | ||
580 | void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); | ||
581 | |||
582 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, | 587 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, |
583 | gfp_t); | 588 | gfp_t); |
584 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); | 589 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); |
@@ -662,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *); | |||
662 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); | 667 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); |
663 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); | 668 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
664 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); | 669 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
665 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *); | 670 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); |
666 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); | 671 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); |
667 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | 672 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); |
668 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | 673 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
@@ -670,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | |||
670 | extern struct xprt_class xprt_rdma_bc; | 675 | extern struct xprt_class xprt_rdma_bc; |
671 | 676 | ||
672 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ | 677 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |
678 | |||
679 | #include <trace/events/rpcrdma.h> | ||
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9cc850c2719e..18803021f242 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -52,6 +52,8 @@ | |||
52 | 52 | ||
53 | #include "sunrpc.h" | 53 | #include "sunrpc.h" |
54 | 54 | ||
55 | #define RPC_TCP_READ_CHUNK_SZ (3*512*1024) | ||
56 | |||
55 | static void xs_close(struct rpc_xprt *xprt); | 57 | static void xs_close(struct rpc_xprt *xprt); |
56 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, | 58 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, |
57 | struct socket *sock); | 59 | struct socket *sock); |
@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport) | |||
1003 | struct sock *sk; | 1005 | struct sock *sk; |
1004 | int err; | 1006 | int err; |
1005 | 1007 | ||
1008 | restart: | ||
1006 | mutex_lock(&transport->recv_mutex); | 1009 | mutex_lock(&transport->recv_mutex); |
1007 | sk = transport->inet; | 1010 | sk = transport->inet; |
1008 | if (sk == NULL) | 1011 | if (sk == NULL) |
@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport) | |||
1016 | } | 1019 | } |
1017 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) | 1020 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) |
1018 | break; | 1021 | break; |
1022 | if (need_resched()) { | ||
1023 | mutex_unlock(&transport->recv_mutex); | ||
1024 | cond_resched(); | ||
1025 | goto restart; | ||
1026 | } | ||
1019 | } | 1027 | } |
1020 | out: | 1028 | out: |
1021 | mutex_unlock(&transport->recv_mutex); | 1029 | mutex_unlock(&transport->recv_mutex); |
@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) | |||
1094 | struct sock *sk; | 1102 | struct sock *sk; |
1095 | int err; | 1103 | int err; |
1096 | 1104 | ||
1105 | restart: | ||
1097 | mutex_lock(&transport->recv_mutex); | 1106 | mutex_lock(&transport->recv_mutex); |
1098 | sk = transport->inet; | 1107 | sk = transport->inet; |
1099 | if (sk == NULL) | 1108 | if (sk == NULL) |
@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport) | |||
1107 | } | 1116 | } |
1108 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) | 1117 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) |
1109 | break; | 1118 | break; |
1119 | if (need_resched()) { | ||
1120 | mutex_unlock(&transport->recv_mutex); | ||
1121 | cond_resched(); | ||
1122 | goto restart; | ||
1123 | } | ||
1110 | } | 1124 | } |
1111 | out: | 1125 | out: |
1112 | mutex_unlock(&transport->recv_mutex); | 1126 | mutex_unlock(&transport->recv_mutex); |
@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns | |||
1479 | .offset = offset, | 1493 | .offset = offset, |
1480 | .count = len, | 1494 | .count = len, |
1481 | }; | 1495 | }; |
1496 | size_t ret; | ||
1482 | 1497 | ||
1483 | dprintk("RPC: xs_tcp_data_recv started\n"); | 1498 | dprintk("RPC: xs_tcp_data_recv started\n"); |
1484 | do { | 1499 | do { |
@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns | |||
1507 | /* Skip over any trailing bytes on short reads */ | 1522 | /* Skip over any trailing bytes on short reads */ |
1508 | xs_tcp_read_discard(transport, &desc); | 1523 | xs_tcp_read_discard(transport, &desc); |
1509 | } while (desc.count); | 1524 | } while (desc.count); |
1525 | ret = len - desc.count; | ||
1526 | if (ret < rd_desc->count) | ||
1527 | rd_desc->count -= ret; | ||
1528 | else | ||
1529 | rd_desc->count = 0; | ||
1510 | trace_xs_tcp_data_recv(transport); | 1530 | trace_xs_tcp_data_recv(transport); |
1511 | dprintk("RPC: xs_tcp_data_recv done\n"); | 1531 | dprintk("RPC: xs_tcp_data_recv done\n"); |
1512 | return len - desc.count; | 1532 | return ret; |
1513 | } | 1533 | } |
1514 | 1534 | ||
1515 | static void xs_tcp_data_receive(struct sock_xprt *transport) | 1535 | static void xs_tcp_data_receive(struct sock_xprt *transport) |
@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) | |||
1517 | struct rpc_xprt *xprt = &transport->xprt; | 1537 | struct rpc_xprt *xprt = &transport->xprt; |
1518 | struct sock *sk; | 1538 | struct sock *sk; |
1519 | read_descriptor_t rd_desc = { | 1539 | read_descriptor_t rd_desc = { |
1520 | .count = 2*1024*1024, | ||
1521 | .arg.data = xprt, | 1540 | .arg.data = xprt, |
1522 | }; | 1541 | }; |
1523 | unsigned long total = 0; | 1542 | unsigned long total = 0; |
1524 | int loop; | ||
1525 | int read = 0; | 1543 | int read = 0; |
1526 | 1544 | ||
1545 | restart: | ||
1527 | mutex_lock(&transport->recv_mutex); | 1546 | mutex_lock(&transport->recv_mutex); |
1528 | sk = transport->inet; | 1547 | sk = transport->inet; |
1529 | if (sk == NULL) | 1548 | if (sk == NULL) |
1530 | goto out; | 1549 | goto out; |
1531 | 1550 | ||
1532 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ | 1551 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ |
1533 | for (loop = 0; loop < 64; loop++) { | 1552 | for (;;) { |
1553 | rd_desc.count = RPC_TCP_READ_CHUNK_SZ; | ||
1534 | lock_sock(sk); | 1554 | lock_sock(sk); |
1535 | read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); | 1555 | read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); |
1536 | if (read <= 0) { | 1556 | if (rd_desc.count != 0 || read < 0) { |
1537 | clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); | 1557 | clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); |
1538 | release_sock(sk); | 1558 | release_sock(sk); |
1539 | break; | 1559 | break; |
1540 | } | 1560 | } |
1541 | release_sock(sk); | 1561 | release_sock(sk); |
1542 | total += read; | 1562 | total += read; |
1543 | rd_desc.count = 65536; | 1563 | if (need_resched()) { |
1564 | mutex_unlock(&transport->recv_mutex); | ||
1565 | cond_resched(); | ||
1566 | goto restart; | ||
1567 | } | ||
1544 | } | 1568 | } |
1545 | if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) | 1569 | if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) |
1546 | queue_work(xprtiod_workqueue, &transport->recv_worker); | 1570 | queue_work(xprtiod_workqueue, &transport->recv_worker); |
@@ -2440,7 +2464,9 @@ static void xs_tcp_setup_socket(struct work_struct *work) | |||
2440 | */ | 2464 | */ |
2441 | case -ECONNREFUSED: | 2465 | case -ECONNREFUSED: |
2442 | case -ECONNRESET: | 2466 | case -ECONNRESET: |
2467 | case -ENETDOWN: | ||
2443 | case -ENETUNREACH: | 2468 | case -ENETUNREACH: |
2469 | case -EHOSTUNREACH: | ||
2444 | case -EADDRINUSE: | 2470 | case -EADDRINUSE: |
2445 | case -ENOBUFS: | 2471 | case -ENOBUFS: |
2446 | /* | 2472 | /* |
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 47ec121574ce..c8001471da6c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c | |||
@@ -324,6 +324,7 @@ restart: | |||
324 | if (res) { | 324 | if (res) { |
325 | pr_warn("Bearer <%s> rejected, enable failure (%d)\n", | 325 | pr_warn("Bearer <%s> rejected, enable failure (%d)\n", |
326 | name, -res); | 326 | name, -res); |
327 | kfree(b); | ||
327 | return -EINVAL; | 328 | return -EINVAL; |
328 | } | 329 | } |
329 | 330 | ||
@@ -347,8 +348,10 @@ restart: | |||
347 | if (skb) | 348 | if (skb) |
348 | tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); | 349 | tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); |
349 | 350 | ||
350 | if (tipc_mon_create(net, bearer_id)) | 351 | if (tipc_mon_create(net, bearer_id)) { |
352 | bearer_disable(net, b); | ||
351 | return -ENOMEM; | 353 | return -ENOMEM; |
354 | } | ||
352 | 355 | ||
353 | pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", | 356 | pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", |
354 | name, | 357 | name, |
diff --git a/net/tipc/group.c b/net/tipc/group.c index 12777cac638a..5f4ffae807ee 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c | |||
@@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, | |||
109 | static void tipc_group_decr_active(struct tipc_group *grp, | 109 | static void tipc_group_decr_active(struct tipc_group *grp, |
110 | struct tipc_member *m) | 110 | struct tipc_member *m) |
111 | { | 111 | { |
112 | if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) | 112 | if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || |
113 | m->state == MBR_REMITTED) | ||
113 | grp->active_cnt--; | 114 | grp->active_cnt--; |
114 | } | 115 | } |
115 | 116 | ||
@@ -351,8 +352,7 @@ void tipc_group_update_member(struct tipc_member *m, int len) | |||
351 | if (m->window >= ADV_IDLE) | 352 | if (m->window >= ADV_IDLE) |
352 | return; | 353 | return; |
353 | 354 | ||
354 | if (!list_empty(&m->congested)) | 355 | list_del_init(&m->congested); |
355 | return; | ||
356 | 356 | ||
357 | /* Sort member into congested members' list */ | 357 | /* Sort member into congested members' list */ |
358 | list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { | 358 | list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { |
@@ -369,18 +369,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) | |||
369 | u16 prev = grp->bc_snd_nxt - 1; | 369 | u16 prev = grp->bc_snd_nxt - 1; |
370 | struct tipc_member *m; | 370 | struct tipc_member *m; |
371 | struct rb_node *n; | 371 | struct rb_node *n; |
372 | u16 ackers = 0; | ||
372 | 373 | ||
373 | for (n = rb_first(&grp->members); n; n = rb_next(n)) { | 374 | for (n = rb_first(&grp->members); n; n = rb_next(n)) { |
374 | m = container_of(n, struct tipc_member, tree_node); | 375 | m = container_of(n, struct tipc_member, tree_node); |
375 | if (tipc_group_is_enabled(m)) { | 376 | if (tipc_group_is_enabled(m)) { |
376 | tipc_group_update_member(m, len); | 377 | tipc_group_update_member(m, len); |
377 | m->bc_acked = prev; | 378 | m->bc_acked = prev; |
379 | ackers++; | ||
378 | } | 380 | } |
379 | } | 381 | } |
380 | 382 | ||
381 | /* Mark number of acknowledges to expect, if any */ | 383 | /* Mark number of acknowledges to expect, if any */ |
382 | if (ack) | 384 | if (ack) |
383 | grp->bc_ackers = grp->member_cnt; | 385 | grp->bc_ackers = ackers; |
384 | grp->bc_snd_nxt++; | 386 | grp->bc_snd_nxt++; |
385 | } | 387 | } |
386 | 388 | ||
@@ -497,6 +499,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, | |||
497 | while ((skb = skb_peek(defq))) { | 499 | while ((skb = skb_peek(defq))) { |
498 | hdr = buf_msg(skb); | 500 | hdr = buf_msg(skb); |
499 | mtyp = msg_type(hdr); | 501 | mtyp = msg_type(hdr); |
502 | blks = msg_blocks(hdr); | ||
500 | deliver = true; | 503 | deliver = true; |
501 | ack = false; | 504 | ack = false; |
502 | update = false; | 505 | update = false; |
@@ -546,7 +549,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, | |||
546 | if (!update) | 549 | if (!update) |
547 | continue; | 550 | continue; |
548 | 551 | ||
549 | blks = msg_blocks(hdr); | ||
550 | tipc_group_update_rcv_win(grp, blks, node, port, xmitq); | 552 | tipc_group_update_rcv_win(grp, blks, node, port, xmitq); |
551 | } | 553 | } |
552 | return; | 554 | return; |
@@ -561,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, | |||
561 | int max_active = grp->max_active; | 563 | int max_active = grp->max_active; |
562 | int reclaim_limit = max_active * 3 / 4; | 564 | int reclaim_limit = max_active * 3 / 4; |
563 | int active_cnt = grp->active_cnt; | 565 | int active_cnt = grp->active_cnt; |
564 | struct tipc_member *m, *rm; | 566 | struct tipc_member *m, *rm, *pm; |
565 | 567 | ||
566 | m = tipc_group_find_member(grp, node, port); | 568 | m = tipc_group_find_member(grp, node, port); |
567 | if (!m) | 569 | if (!m) |
@@ -604,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, | |||
604 | pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); | 606 | pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); |
605 | tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); | 607 | tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); |
606 | } | 608 | } |
609 | grp->active_cnt--; | ||
610 | list_del_init(&m->list); | ||
611 | if (list_empty(&grp->pending)) | ||
612 | return; | ||
613 | |||
614 | /* Set oldest pending member to active and advertise */ | ||
615 | pm = list_first_entry(&grp->pending, struct tipc_member, list); | ||
616 | pm->state = MBR_ACTIVE; | ||
617 | list_move_tail(&pm->list, &grp->active); | ||
618 | grp->active_cnt++; | ||
619 | tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); | ||
607 | break; | 620 | break; |
608 | case MBR_RECLAIMING: | 621 | case MBR_RECLAIMING: |
609 | case MBR_DISCOVERED: | 622 | case MBR_DISCOVERED: |
@@ -648,6 +661,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, | |||
648 | } else if (mtyp == GRP_REMIT_MSG) { | 661 | } else if (mtyp == GRP_REMIT_MSG) { |
649 | msg_set_grp_remitted(hdr, m->window); | 662 | msg_set_grp_remitted(hdr, m->window); |
650 | } | 663 | } |
664 | msg_set_dest_droppable(hdr, true); | ||
651 | __skb_queue_tail(xmitq, skb); | 665 | __skb_queue_tail(xmitq, skb); |
652 | } | 666 | } |
653 | 667 | ||
@@ -689,15 +703,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, | |||
689 | msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); | 703 | msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); |
690 | __skb_queue_tail(inputq, m->event_msg); | 704 | __skb_queue_tail(inputq, m->event_msg); |
691 | } | 705 | } |
692 | if (m->window < ADV_IDLE) | 706 | list_del_init(&m->congested); |
693 | tipc_group_update_member(m, 0); | 707 | tipc_group_update_member(m, 0); |
694 | else | ||
695 | list_del_init(&m->congested); | ||
696 | return; | 708 | return; |
697 | case GRP_LEAVE_MSG: | 709 | case GRP_LEAVE_MSG: |
698 | if (!m) | 710 | if (!m) |
699 | return; | 711 | return; |
700 | m->bc_syncpt = msg_grp_bc_syncpt(hdr); | 712 | m->bc_syncpt = msg_grp_bc_syncpt(hdr); |
713 | list_del_init(&m->list); | ||
714 | list_del_init(&m->congested); | ||
715 | *usr_wakeup = true; | ||
701 | 716 | ||
702 | /* Wait until WITHDRAW event is received */ | 717 | /* Wait until WITHDRAW event is received */ |
703 | if (m->state != MBR_LEAVING) { | 718 | if (m->state != MBR_LEAVING) { |
@@ -709,8 +724,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, | |||
709 | ehdr = buf_msg(m->event_msg); | 724 | ehdr = buf_msg(m->event_msg); |
710 | msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); | 725 | msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); |
711 | __skb_queue_tail(inputq, m->event_msg); | 726 | __skb_queue_tail(inputq, m->event_msg); |
712 | *usr_wakeup = true; | ||
713 | list_del_init(&m->congested); | ||
714 | return; | 727 | return; |
715 | case GRP_ADV_MSG: | 728 | case GRP_ADV_MSG: |
716 | if (!m) | 729 | if (!m) |
@@ -741,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, | |||
741 | if (!m || m->state != MBR_RECLAIMING) | 754 | if (!m || m->state != MBR_RECLAIMING) |
742 | return; | 755 | return; |
743 | 756 | ||
744 | list_del_init(&m->list); | ||
745 | grp->active_cnt--; | ||
746 | remitted = msg_grp_remitted(hdr); | 757 | remitted = msg_grp_remitted(hdr); |
747 | 758 | ||
748 | /* Messages preceding the REMIT still in receive queue */ | 759 | /* Messages preceding the REMIT still in receive queue */ |
749 | if (m->advertised > remitted) { | 760 | if (m->advertised > remitted) { |
750 | m->state = MBR_REMITTED; | 761 | m->state = MBR_REMITTED; |
751 | in_flight = m->advertised - remitted; | 762 | in_flight = m->advertised - remitted; |
763 | m->advertised = ADV_IDLE + in_flight; | ||
764 | return; | ||
752 | } | 765 | } |
753 | /* All messages preceding the REMIT have been read */ | 766 | /* All messages preceding the REMIT have been read */ |
754 | if (m->advertised <= remitted) { | 767 | if (m->advertised <= remitted) { |
@@ -760,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, | |||
760 | tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); | 773 | tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); |
761 | 774 | ||
762 | m->advertised = ADV_IDLE + in_flight; | 775 | m->advertised = ADV_IDLE + in_flight; |
776 | grp->active_cnt--; | ||
777 | list_del_init(&m->list); | ||
763 | 778 | ||
764 | /* Set oldest pending member to active and advertise */ | 779 | /* Set oldest pending member to active and advertise */ |
765 | if (list_empty(&grp->pending)) | 780 | if (list_empty(&grp->pending)) |
@@ -849,19 +864,29 @@ void tipc_group_member_evt(struct tipc_group *grp, | |||
849 | *usr_wakeup = true; | 864 | *usr_wakeup = true; |
850 | m->usr_pending = false; | 865 | m->usr_pending = false; |
851 | node_up = tipc_node_is_up(net, node); | 866 | node_up = tipc_node_is_up(net, node); |
852 | 867 | m->event_msg = NULL; | |
853 | /* Hold back event if more messages might be expected */ | 868 | |
854 | if (m->state != MBR_LEAVING && node_up) { | 869 | if (node_up) { |
855 | m->event_msg = skb; | 870 | /* Hold back event if a LEAVE msg should be expected */ |
856 | tipc_group_decr_active(grp, m); | 871 | if (m->state != MBR_LEAVING) { |
857 | m->state = MBR_LEAVING; | 872 | m->event_msg = skb; |
858 | } else { | 873 | tipc_group_decr_active(grp, m); |
859 | if (node_up) | 874 | m->state = MBR_LEAVING; |
875 | } else { | ||
860 | msg_set_grp_bc_seqno(hdr, m->bc_syncpt); | 876 | msg_set_grp_bc_seqno(hdr, m->bc_syncpt); |
861 | else | 877 | __skb_queue_tail(inputq, skb); |
878 | } | ||
879 | } else { | ||
880 | if (m->state != MBR_LEAVING) { | ||
881 | tipc_group_decr_active(grp, m); | ||
882 | m->state = MBR_LEAVING; | ||
862 | msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); | 883 | msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); |
884 | } else { | ||
885 | msg_set_grp_bc_seqno(hdr, m->bc_syncpt); | ||
886 | } | ||
863 | __skb_queue_tail(inputq, skb); | 887 | __skb_queue_tail(inputq, skb); |
864 | } | 888 | } |
889 | list_del_init(&m->list); | ||
865 | list_del_init(&m->congested); | 890 | list_del_init(&m->congested); |
866 | } | 891 | } |
867 | *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); | 892 | *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); |
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 8e884ed06d4b..32dc33a94bc7 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c | |||
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id) | |||
642 | { | 642 | { |
643 | struct tipc_net *tn = tipc_net(net); | 643 | struct tipc_net *tn = tipc_net(net); |
644 | struct tipc_monitor *mon = tipc_monitor(net, bearer_id); | 644 | struct tipc_monitor *mon = tipc_monitor(net, bearer_id); |
645 | struct tipc_peer *self = get_self(net, bearer_id); | 645 | struct tipc_peer *self; |
646 | struct tipc_peer *peer, *tmp; | 646 | struct tipc_peer *peer, *tmp; |
647 | 647 | ||
648 | if (!mon) | ||
649 | return; | ||
650 | |||
651 | self = get_self(net, bearer_id); | ||
648 | write_lock_bh(&mon->lock); | 652 | write_lock_bh(&mon->lock); |
649 | tn->monitors[bearer_id] = NULL; | 653 | tn->monitors[bearer_id] = NULL; |
650 | list_for_each_entry_safe(peer, tmp, &self->list, list) { | 654 | list_for_each_entry_safe(peer, tmp, &self->list, list) { |
diff --git a/net/tipc/node.c b/net/tipc/node.c index 507017fe0f1b..9036d8756e73 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c | |||
@@ -1880,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) | |||
1880 | 1880 | ||
1881 | if (strcmp(name, tipc_bclink_name) == 0) { | 1881 | if (strcmp(name, tipc_bclink_name) == 0) { |
1882 | err = tipc_nl_add_bc_link(net, &msg); | 1882 | err = tipc_nl_add_bc_link(net, &msg); |
1883 | if (err) { | 1883 | if (err) |
1884 | nlmsg_free(msg.skb); | 1884 | goto err_free; |
1885 | return err; | ||
1886 | } | ||
1887 | } else { | 1885 | } else { |
1888 | int bearer_id; | 1886 | int bearer_id; |
1889 | struct tipc_node *node; | 1887 | struct tipc_node *node; |
1890 | struct tipc_link *link; | 1888 | struct tipc_link *link; |
1891 | 1889 | ||
1892 | node = tipc_node_find_by_name(net, name, &bearer_id); | 1890 | node = tipc_node_find_by_name(net, name, &bearer_id); |
1893 | if (!node) | 1891 | if (!node) { |
1894 | return -EINVAL; | 1892 | err = -EINVAL; |
1893 | goto err_free; | ||
1894 | } | ||
1895 | 1895 | ||
1896 | tipc_node_read_lock(node); | 1896 | tipc_node_read_lock(node); |
1897 | link = node->links[bearer_id].link; | 1897 | link = node->links[bearer_id].link; |
1898 | if (!link) { | 1898 | if (!link) { |
1899 | tipc_node_read_unlock(node); | 1899 | tipc_node_read_unlock(node); |
1900 | nlmsg_free(msg.skb); | 1900 | err = -EINVAL; |
1901 | return -EINVAL; | 1901 | goto err_free; |
1902 | } | 1902 | } |
1903 | 1903 | ||
1904 | err = __tipc_nl_add_link(net, &msg, link, 0); | 1904 | err = __tipc_nl_add_link(net, &msg, link, 0); |
1905 | tipc_node_read_unlock(node); | 1905 | tipc_node_read_unlock(node); |
1906 | if (err) { | 1906 | if (err) |
1907 | nlmsg_free(msg.skb); | 1907 | goto err_free; |
1908 | return err; | ||
1909 | } | ||
1910 | } | 1908 | } |
1911 | 1909 | ||
1912 | return genlmsg_reply(msg.skb, info); | 1910 | return genlmsg_reply(msg.skb, info); |
1911 | |||
1912 | err_free: | ||
1913 | nlmsg_free(msg.skb); | ||
1914 | return err; | ||
1913 | } | 1915 | } |
1914 | 1916 | ||
1915 | int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) | 1917 | int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) |
diff --git a/net/tipc/server.c b/net/tipc/server.c index acaef80fb88c..78a292a84afc 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c | |||
@@ -264,8 +264,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con) | |||
264 | iov.iov_base = buf; | 264 | iov.iov_base = buf; |
265 | iov.iov_len = s->max_rcvbuf_size; | 265 | iov.iov_len = s->max_rcvbuf_size; |
266 | msg.msg_name = &addr; | 266 | msg.msg_name = &addr; |
267 | ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, | 267 | iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); |
268 | MSG_DONTWAIT); | 268 | ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); |
269 | if (ret <= 0) { | 269 | if (ret <= 0) { |
270 | kmem_cache_free(s->rcvbuf_cache, buf); | 270 | kmem_cache_free(s->rcvbuf_cache, buf); |
271 | goto out_close; | 271 | goto out_close; |
@@ -314,6 +314,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) | |||
314 | newcon->usr_data = s->tipc_conn_new(newcon->conid); | 314 | newcon->usr_data = s->tipc_conn_new(newcon->conid); |
315 | if (!newcon->usr_data) { | 315 | if (!newcon->usr_data) { |
316 | sock_release(newsock); | 316 | sock_release(newsock); |
317 | conn_put(newcon); | ||
317 | return -ENOMEM; | 318 | return -ENOMEM; |
318 | } | 319 | } |
319 | 320 | ||
@@ -511,7 +512,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, | |||
511 | s = con->server; | 512 | s = con->server; |
512 | scbr = s->tipc_conn_new(*conid); | 513 | scbr = s->tipc_conn_new(*conid); |
513 | if (!scbr) { | 514 | if (!scbr) { |
514 | tipc_close_conn(con); | 515 | conn_put(con); |
515 | return false; | 516 | return false; |
516 | } | 517 | } |
517 | 518 | ||
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5d18c0caa92b..2aa46e8cd8fe 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c | |||
@@ -710,13 +710,13 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, | |||
710 | * imply that the operation will succeed, merely that it should be performed | 710 | * imply that the operation will succeed, merely that it should be performed |
711 | * and will not block. | 711 | * and will not block. |
712 | */ | 712 | */ |
713 | static unsigned int tipc_poll(struct file *file, struct socket *sock, | 713 | static __poll_t tipc_poll(struct file *file, struct socket *sock, |
714 | poll_table *wait) | 714 | poll_table *wait) |
715 | { | 715 | { |
716 | struct sock *sk = sock->sk; | 716 | struct sock *sk = sock->sk; |
717 | struct tipc_sock *tsk = tipc_sk(sk); | 717 | struct tipc_sock *tsk = tipc_sk(sk); |
718 | struct tipc_group *grp = tsk->group; | 718 | struct tipc_group *grp = tsk->group; |
719 | u32 revents = 0; | 719 | __poll_t revents = 0; |
720 | 720 | ||
721 | sock_poll_wait(file, sk_sleep(sk), wait); | 721 | sock_poll_wait(file, sk_sleep(sk), wait); |
722 | 722 | ||
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, | |||
727 | 727 | ||
728 | switch (sk->sk_state) { | 728 | switch (sk->sk_state) { |
729 | case TIPC_ESTABLISHED: | 729 | case TIPC_ESTABLISHED: |
730 | case TIPC_CONNECTING: | ||
730 | if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) | 731 | if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) |
731 | revents |= POLLOUT; | 732 | revents |= POLLOUT; |
732 | /* fall thru' */ | 733 | /* fall thru' */ |
733 | case TIPC_LISTEN: | 734 | case TIPC_LISTEN: |
734 | case TIPC_CONNECTING: | ||
735 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 735 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
736 | revents |= POLLIN | POLLRDNORM; | 736 | revents |= POLLIN | POLLRDNORM; |
737 | break; | 737 | break; |
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, | |||
1140 | __skb_dequeue(arrvq); | 1140 | __skb_dequeue(arrvq); |
1141 | __skb_queue_tail(inputq, skb); | 1141 | __skb_queue_tail(inputq, skb); |
1142 | } | 1142 | } |
1143 | refcount_dec(&skb->users); | 1143 | kfree_skb(skb); |
1144 | spin_unlock_bh(&inputq->lock); | 1144 | spin_unlock_bh(&inputq->lock); |
1145 | continue; | 1145 | continue; |
1146 | } | 1146 | } |
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..3deabcab4882 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c | |||
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) | |||
371 | goto rcu_out; | 371 | goto rcu_out; |
372 | } | 372 | } |
373 | 373 | ||
374 | tipc_rcv(sock_net(sk), skb, b); | ||
375 | rcu_read_unlock(); | ||
376 | return 0; | ||
377 | |||
378 | rcu_out: | 374 | rcu_out: |
379 | rcu_read_unlock(); | 375 | rcu_read_unlock(); |
380 | out: | 376 | out: |
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index e07ee3ae0023..736719c8314e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c | |||
@@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, | |||
367 | 367 | ||
368 | crypto_info = &ctx->crypto_send; | 368 | crypto_info = &ctx->crypto_send; |
369 | /* Currently we don't support set crypto info more than one time */ | 369 | /* Currently we don't support set crypto info more than one time */ |
370 | if (TLS_CRYPTO_INFO_READY(crypto_info)) | 370 | if (TLS_CRYPTO_INFO_READY(crypto_info)) { |
371 | rc = -EBUSY; | ||
371 | goto out; | 372 | goto out; |
373 | } | ||
372 | 374 | ||
373 | rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); | 375 | rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); |
374 | if (rc) { | 376 | if (rc) { |
@@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, | |||
386 | case TLS_CIPHER_AES_GCM_128: { | 388 | case TLS_CIPHER_AES_GCM_128: { |
387 | if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { | 389 | if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { |
388 | rc = -EINVAL; | 390 | rc = -EINVAL; |
389 | goto out; | 391 | goto err_crypto_info; |
390 | } | 392 | } |
391 | rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), | 393 | rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), |
392 | optlen - sizeof(*crypto_info)); | 394 | optlen - sizeof(*crypto_info)); |
@@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, | |||
398 | } | 400 | } |
399 | default: | 401 | default: |
400 | rc = -EINVAL; | 402 | rc = -EINVAL; |
401 | goto out; | 403 | goto err_crypto_info; |
402 | } | 404 | } |
403 | 405 | ||
404 | /* currently SW is default, we will have ethtool in future */ | 406 | /* currently SW is default, we will have ethtool in future */ |
@@ -454,6 +456,15 @@ static int tls_init(struct sock *sk) | |||
454 | struct tls_context *ctx; | 456 | struct tls_context *ctx; |
455 | int rc = 0; | 457 | int rc = 0; |
456 | 458 | ||
459 | /* The TLS ulp is currently supported only for TCP sockets | ||
460 | * in ESTABLISHED state. | ||
461 | * Supporting sockets in LISTEN state will require us | ||
462 | * to modify the accept implementation to clone rather then | ||
463 | * share the ulp context. | ||
464 | */ | ||
465 | if (sk->sk_state != TCP_ESTABLISHED) | ||
466 | return -ENOTSUPP; | ||
467 | |||
457 | /* allocate tls context */ | 468 | /* allocate tls context */ |
458 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | 469 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); |
459 | if (!ctx) { | 470 | if (!ctx) { |
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 73d19210dd49..0a9b72fbd761 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c | |||
@@ -391,7 +391,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) | |||
391 | 391 | ||
392 | while (msg_data_left(msg)) { | 392 | while (msg_data_left(msg)) { |
393 | if (sk->sk_err) { | 393 | if (sk->sk_err) { |
394 | ret = sk->sk_err; | 394 | ret = -sk->sk_err; |
395 | goto send_end; | 395 | goto send_end; |
396 | } | 396 | } |
397 | 397 | ||
@@ -544,7 +544,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, | |||
544 | size_t copy, required_size; | 544 | size_t copy, required_size; |
545 | 545 | ||
546 | if (sk->sk_err) { | 546 | if (sk->sk_err) { |
547 | ret = sk->sk_err; | 547 | ret = -sk->sk_err; |
548 | goto sendpage_end; | 548 | goto sendpage_end; |
549 | } | 549 | } |
550 | 550 | ||
@@ -577,6 +577,8 @@ alloc_payload: | |||
577 | get_page(page); | 577 | get_page(page); |
578 | sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; | 578 | sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; |
579 | sg_set_page(sg, page, copy, offset); | 579 | sg_set_page(sg, page, copy, offset); |
580 | sg_unmark_end(sg); | ||
581 | |||
580 | ctx->sg_plaintext_num_elem++; | 582 | ctx->sg_plaintext_num_elem++; |
581 | 583 | ||
582 | sk_mem_charge(sk, copy); | 584 | sk_mem_charge(sk, copy); |
@@ -681,18 +683,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) | |||
681 | } | 683 | } |
682 | default: | 684 | default: |
683 | rc = -EINVAL; | 685 | rc = -EINVAL; |
684 | goto out; | 686 | goto free_priv; |
685 | } | 687 | } |
686 | 688 | ||
687 | ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; | 689 | ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; |
688 | ctx->tag_size = tag_size; | 690 | ctx->tag_size = tag_size; |
689 | ctx->overhead_size = ctx->prepend_size + ctx->tag_size; | 691 | ctx->overhead_size = ctx->prepend_size + ctx->tag_size; |
690 | ctx->iv_size = iv_size; | 692 | ctx->iv_size = iv_size; |
691 | ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, | 693 | ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); |
692 | GFP_KERNEL); | ||
693 | if (!ctx->iv) { | 694 | if (!ctx->iv) { |
694 | rc = -ENOMEM; | 695 | rc = -ENOMEM; |
695 | goto out; | 696 | goto free_priv; |
696 | } | 697 | } |
697 | memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); | 698 | memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); |
698 | memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); | 699 | memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); |
@@ -740,7 +741,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) | |||
740 | 741 | ||
741 | rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); | 742 | rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); |
742 | if (!rc) | 743 | if (!rc) |
743 | goto out; | 744 | return 0; |
744 | 745 | ||
745 | free_aead: | 746 | free_aead: |
746 | crypto_free_aead(sw_ctx->aead_send); | 747 | crypto_free_aead(sw_ctx->aead_send); |
@@ -751,6 +752,9 @@ free_rec_seq: | |||
751 | free_iv: | 752 | free_iv: |
752 | kfree(ctx->iv); | 753 | kfree(ctx->iv); |
753 | ctx->iv = NULL; | 754 | ctx->iv = NULL; |
755 | free_priv: | ||
756 | kfree(ctx->priv_ctx); | ||
757 | ctx->priv_ctx = NULL; | ||
754 | out: | 758 | out: |
755 | return rc; | 759 | return rc; |
756 | } | 760 | } |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a9ee634f3c42..6b7678df41e5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -367,7 +367,7 @@ static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int | |||
367 | /* relaying can only happen while the wq still exists */ | 367 | /* relaying can only happen while the wq still exists */ |
368 | u_sleep = sk_sleep(&u->sk); | 368 | u_sleep = sk_sleep(&u->sk); |
369 | if (u_sleep) | 369 | if (u_sleep) |
370 | wake_up_interruptible_poll(u_sleep, key); | 370 | wake_up_interruptible_poll(u_sleep, key_to_poll(key)); |
371 | 371 | ||
372 | return 0; | 372 | return 0; |
373 | } | 373 | } |
@@ -638,8 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *, | |||
638 | static int unix_socketpair(struct socket *, struct socket *); | 638 | static int unix_socketpair(struct socket *, struct socket *); |
639 | static int unix_accept(struct socket *, struct socket *, int, bool); | 639 | static int unix_accept(struct socket *, struct socket *, int, bool); |
640 | static int unix_getname(struct socket *, struct sockaddr *, int *, int); | 640 | static int unix_getname(struct socket *, struct sockaddr *, int *, int); |
641 | static unsigned int unix_poll(struct file *, struct socket *, poll_table *); | 641 | static __poll_t unix_poll(struct file *, struct socket *, poll_table *); |
642 | static unsigned int unix_dgram_poll(struct file *, struct socket *, | 642 | static __poll_t unix_dgram_poll(struct file *, struct socket *, |
643 | poll_table *); | 643 | poll_table *); |
644 | static int unix_ioctl(struct socket *, unsigned int, unsigned long); | 644 | static int unix_ioctl(struct socket *, unsigned int, unsigned long); |
645 | static int unix_shutdown(struct socket *, int); | 645 | static int unix_shutdown(struct socket *, int); |
@@ -2640,10 +2640,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
2640 | return err; | 2640 | return err; |
2641 | } | 2641 | } |
2642 | 2642 | ||
2643 | static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) | 2643 | static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) |
2644 | { | 2644 | { |
2645 | struct sock *sk = sock->sk; | 2645 | struct sock *sk = sock->sk; |
2646 | unsigned int mask; | 2646 | __poll_t mask; |
2647 | 2647 | ||
2648 | sock_poll_wait(file, sk_sleep(sk), wait); | 2648 | sock_poll_wait(file, sk_sleep(sk), wait); |
2649 | mask = 0; | 2649 | mask = 0; |
@@ -2675,11 +2675,12 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table | |||
2675 | return mask; | 2675 | return mask; |
2676 | } | 2676 | } |
2677 | 2677 | ||
2678 | static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, | 2678 | static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, |
2679 | poll_table *wait) | 2679 | poll_table *wait) |
2680 | { | 2680 | { |
2681 | struct sock *sk = sock->sk, *other; | 2681 | struct sock *sk = sock->sk, *other; |
2682 | unsigned int mask, writable; | 2682 | unsigned int writable; |
2683 | __poll_t mask; | ||
2683 | 2684 | ||
2684 | sock_poll_wait(file, sk_sleep(sk), wait); | 2685 | sock_poll_wait(file, sk_sleep(sk), wait); |
2685 | mask = 0; | 2686 | mask = 0; |
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5d28abf87fbf..9d95e773f4c8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c | |||
@@ -850,11 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode) | |||
850 | return err; | 850 | return err; |
851 | } | 851 | } |
852 | 852 | ||
853 | static unsigned int vsock_poll(struct file *file, struct socket *sock, | 853 | static __poll_t vsock_poll(struct file *file, struct socket *sock, |
854 | poll_table *wait) | 854 | poll_table *wait) |
855 | { | 855 | { |
856 | struct sock *sk; | 856 | struct sock *sk; |
857 | unsigned int mask; | 857 | __poll_t mask; |
858 | struct vsock_sock *vsk; | 858 | struct vsock_sock *vsk; |
859 | 859 | ||
860 | sk = sock->sk; | 860 | sk = sock->sk; |
@@ -951,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, | |||
951 | * POLLOUT|POLLWRNORM when peer is closed and nothing to read, | 951 | * POLLOUT|POLLWRNORM when peer is closed and nothing to read, |
952 | * but local send is not shutdown. | 952 | * but local send is not shutdown. |
953 | */ | 953 | */ |
954 | if (sk->sk_state == TCP_CLOSE) { | 954 | if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) { |
955 | if (!(sk->sk_shutdown & SEND_SHUTDOWN)) | 955 | if (!(sk->sk_shutdown & SEND_SHUTDOWN)) |
956 | mask |= POLLOUT | POLLWRNORM; | 956 | mask |= POLLOUT | POLLWRNORM; |
957 | 957 | ||
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 5583df708b8c..a827547aa102 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c | |||
@@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk) | |||
487 | 487 | ||
488 | lock_sock(sk); | 488 | lock_sock(sk); |
489 | 489 | ||
490 | sk->sk_state = SS_DISCONNECTING; | 490 | sk->sk_state = TCP_CLOSING; |
491 | vsock_remove_sock(vsk); | 491 | vsock_remove_sock(vsk); |
492 | 492 | ||
493 | release_sock(sk); | 493 | release_sock(sk); |
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 391775e3575c..a7a73ffe675b 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c | |||
@@ -797,11 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk) | |||
797 | 797 | ||
798 | /* We should not be sending anymore since the peer won't be | 798 | /* We should not be sending anymore since the peer won't be |
799 | * there to receive, but we can still receive if there is data | 799 | * there to receive, but we can still receive if there is data |
800 | * left in our consume queue. | 800 | * left in our consume queue. If the local endpoint is a host, |
801 | * we can't call vsock_stream_has_data, since that may block, | ||
802 | * but a host endpoint can't read data once the VM has | ||
803 | * detached, so there is no available data in that case. | ||
801 | */ | 804 | */ |
802 | if (vsock_stream_has_data(vsk) <= 0) { | 805 | if (vsk->local_addr.svm_cid == VMADDR_CID_HOST || |
803 | sk->sk_state = TCP_CLOSE; | 806 | vsock_stream_has_data(vsk) <= 0) { |
804 | |||
805 | if (sk->sk_state == TCP_SYN_SENT) { | 807 | if (sk->sk_state == TCP_SYN_SENT) { |
806 | /* The peer may detach from a queue pair while | 808 | /* The peer may detach from a queue pair while |
807 | * we are still in the connecting state, i.e., | 809 | * we are still in the connecting state, i.e., |
@@ -811,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk) | |||
811 | * event like a reset. | 813 | * event like a reset. |
812 | */ | 814 | */ |
813 | 815 | ||
816 | sk->sk_state = TCP_CLOSE; | ||
814 | sk->sk_err = ECONNRESET; | 817 | sk->sk_err = ECONNRESET; |
815 | sk->sk_error_report(sk); | 818 | sk->sk_error_report(sk); |
816 | return; | 819 | return; |
817 | } | 820 | } |
821 | sk->sk_state = TCP_CLOSE; | ||
818 | } | 822 | } |
819 | sk->sk_state_change(sk); | 823 | sk->sk_state_change(sk); |
820 | } | 824 | } |
@@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit); | |||
2144 | 2148 | ||
2145 | MODULE_AUTHOR("VMware, Inc."); | 2149 | MODULE_AUTHOR("VMware, Inc."); |
2146 | MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); | 2150 | MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); |
2147 | MODULE_VERSION("1.0.4.0-k"); | 2151 | MODULE_VERSION("1.0.5.0-k"); |
2148 | MODULE_LICENSE("GPL v2"); | 2152 | MODULE_LICENSE("GPL v2"); |
2149 | MODULE_ALIAS("vmware_vsock"); | 2153 | MODULE_ALIAS("vmware_vsock"); |
2150 | MODULE_ALIAS_NETPROTO(PF_VSOCK); | 2154 | MODULE_ALIAS_NETPROTO(PF_VSOCK); |
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index da91bb547db3..1abcc4fc4df1 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig | |||
@@ -20,6 +20,10 @@ config CFG80211 | |||
20 | tristate "cfg80211 - wireless configuration API" | 20 | tristate "cfg80211 - wireless configuration API" |
21 | depends on RFKILL || !RFKILL | 21 | depends on RFKILL || !RFKILL |
22 | select FW_LOADER | 22 | select FW_LOADER |
23 | # may need to update this when certificates are changed and are | ||
24 | # using a different algorithm, though right now they shouldn't | ||
25 | # (this is here rather than below to allow it to be a module) | ||
26 | select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS | ||
23 | ---help--- | 27 | ---help--- |
24 | cfg80211 is the Linux wireless LAN (802.11) configuration API. | 28 | cfg80211 is the Linux wireless LAN (802.11) configuration API. |
25 | Enable this if you have a wireless device. | 29 | Enable this if you have a wireless device. |
@@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR | |||
113 | certificates like in the kernel sources (net/wireless/certs/) | 117 | certificates like in the kernel sources (net/wireless/certs/) |
114 | that shall be accepted for a signed regulatory database. | 118 | that shall be accepted for a signed regulatory database. |
115 | 119 | ||
120 | Note that you need to also select the correct CRYPTO_<hash> modules | ||
121 | for your certificates, and if cfg80211 is built-in they also must be. | ||
122 | |||
116 | config CFG80211_REG_CELLULAR_HINTS | 123 | config CFG80211_REG_CELLULAR_HINTS |
117 | bool "cfg80211 regulatory support for cellular base station hints" | 124 | bool "cfg80211 regulatory support for cellular base station hints" |
118 | depends on CFG80211_CERTIFICATION_ONUS | 125 | depends on CFG80211_CERTIFICATION_ONUS |
diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 278d979c211a..1d84f91bbfb0 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile | |||
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),) | |||
23 | cfg80211-y += extra-certs.o | 23 | cfg80211-y += extra-certs.o |
24 | endif | 24 | endif |
25 | 25 | ||
26 | $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) | 26 | $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) |
27 | @$(kecho) " GEN $@" | 27 | @$(kecho) " GEN $@" |
28 | @echo '#include "reg.h"' > $@ | 28 | @(echo '#include "reg.h"'; \ |
29 | @echo 'const u8 shipped_regdb_certs[] = {' >> $@ | 29 | echo 'const u8 shipped_regdb_certs[] = {'; \ |
30 | @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done | 30 | cat $^ ; \ |
31 | @echo '};' >> $@ | 31 | echo '};'; \ |
32 | @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ | 32 | echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ |
33 | ) > $@ | ||
33 | 34 | ||
34 | $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ | 35 | $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ |
35 | $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) | 36 | $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) |
36 | @$(kecho) " GEN $@" | 37 | @$(kecho) " GEN $@" |
37 | @echo '#include "reg.h"' > $@ | 38 | @(set -e; \ |
38 | @echo 'const u8 extra_regdb_certs[] = {' >> $@ | 39 | allf=""; \ |
39 | @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done | 40 | for f in $^ ; do \ |
40 | @echo '};' >> $@ | 41 | # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ |
41 | @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ | 42 | thisf=$$(od -An -v -tx1 < $$f | \ |
43 | sed -e 's/ /\n/g' | \ | ||
44 | sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \ | ||
45 | sed -e 's/^/0x/;s/$$/,/'); \ | ||
46 | # file should not be empty - maybe command substitution failed? \ | ||
47 | test ! -z "$$thisf";\ | ||
48 | allf=$$allf$$thisf;\ | ||
49 | done; \ | ||
50 | ( \ | ||
51 | echo '#include "reg.h"'; \ | ||
52 | echo 'const u8 extra_regdb_certs[] = {'; \ | ||
53 | echo "$$allf"; \ | ||
54 | echo '};'; \ | ||
55 | echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ | ||
56 | ) > $@) | ||
57 | |||
58 | clean-files += shipped-certs.c extra-certs.c | ||
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex new file mode 100644 index 000000000000..14ea66643ffa --- /dev/null +++ b/net/wireless/certs/sforshee.hex | |||
@@ -0,0 +1,86 @@ | |||
1 | /* Seth Forshee's regdb certificate */ | ||
2 | 0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c, | ||
3 | 0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae, | ||
4 | 0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a, | ||
5 | 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b, | ||
6 | 0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, | ||
7 | 0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, | ||
8 | 0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, | ||
9 | 0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30, | ||
10 | 0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a, | ||
11 | 0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39, | ||
12 | 0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, | ||
13 | 0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06, | ||
14 | 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66, | ||
15 | 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82, | ||
16 | 0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, | ||
17 | 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, | ||
18 | 0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82, | ||
19 | 0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5, | ||
20 | 0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2, | ||
21 | 0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac, | ||
22 | 0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c, | ||
23 | 0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38, | ||
24 | 0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d, | ||
25 | 0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20, | ||
26 | 0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b, | ||
27 | 0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57, | ||
28 | 0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b, | ||
29 | 0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51, | ||
30 | 0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a, | ||
31 | 0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18, | ||
32 | 0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98, | ||
33 | 0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1, | ||
34 | 0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28, | ||
35 | 0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71, | ||
36 | 0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a, | ||
37 | 0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85, | ||
38 | 0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30, | ||
39 | 0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7, | ||
40 | 0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65, | ||
41 | 0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3, | ||
42 | 0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18, | ||
43 | 0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36, | ||
44 | 0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1, | ||
45 | 0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96, | ||
46 | 0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c, | ||
47 | 0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11, | ||
48 | 0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7, | ||
49 | 0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6, | ||
50 | 0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0, | ||
51 | 0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02, | ||
52 | 0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09, | ||
53 | 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, | ||
54 | 0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00, | ||
55 | 0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf, | ||
56 | 0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93, | ||
57 | 0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7, | ||
58 | 0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9, | ||
59 | 0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3, | ||
60 | 0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec, | ||
61 | 0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0, | ||
62 | 0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3, | ||
63 | 0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4, | ||
64 | 0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32, | ||
65 | 0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74, | ||
66 | 0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22, | ||
67 | 0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86, | ||
68 | 0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c, | ||
69 | 0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06, | ||
70 | 0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1, | ||
71 | 0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58, | ||
72 | 0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4, | ||
73 | 0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72, | ||
74 | 0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79, | ||
75 | 0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a, | ||
76 | 0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f, | ||
77 | 0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47, | ||
78 | 0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a, | ||
79 | 0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28, | ||
80 | 0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2, | ||
81 | 0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87, | ||
82 | 0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d, | ||
83 | 0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc, | ||
84 | 0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16, | ||
85 | 0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f, | ||
86 | 0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14, | ||
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509 deleted file mode 100644 index c6f8f9d6b988..000000000000 --- a/net/wireless/certs/sforshee.x509 +++ /dev/null | |||
Binary files differ | |||
diff --git a/net/wireless/core.c b/net/wireless/core.c index fdde0d98fde1..a6f3cac8c640 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c | |||
@@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, | |||
439 | if (rv) | 439 | if (rv) |
440 | goto use_default_name; | 440 | goto use_default_name; |
441 | } else { | 441 | } else { |
442 | int rv; | ||
443 | |||
442 | use_default_name: | 444 | use_default_name: |
443 | /* NOTE: This is *probably* safe w/out holding rtnl because of | 445 | /* NOTE: This is *probably* safe w/out holding rtnl because of |
444 | * the restrictions on phy names. Probably this call could | 446 | * the restrictions on phy names. Probably this call could |
@@ -446,7 +448,11 @@ use_default_name: | |||
446 | * phyX. But, might should add some locking and check return | 448 | * phyX. But, might should add some locking and check return |
447 | * value, and use a different name if this one exists? | 449 | * value, and use a different name if this one exists? |
448 | */ | 450 | */ |
449 | dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); | 451 | rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); |
452 | if (rv < 0) { | ||
453 | kfree(rdev); | ||
454 | return NULL; | ||
455 | } | ||
450 | } | 456 | } |
451 | 457 | ||
452 | INIT_LIST_HEAD(&rdev->wiphy.wdev_list); | 458 | INIT_LIST_HEAD(&rdev->wiphy.wdev_list); |
diff --git a/net/wireless/core.h b/net/wireless/core.h index d2f7e8b8a097..eaff636169c2 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h | |||
@@ -507,8 +507,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, | |||
507 | void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, | 507 | void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, |
508 | struct wireless_dev *wdev); | 508 | struct wireless_dev *wdev); |
509 | 509 | ||
510 | #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 | ||
511 | |||
512 | #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS | 510 | #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS |
513 | #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) | 511 | #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) |
514 | #else | 512 | #else |
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b1ac23ca20c8..542a4fc0a8d7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c | |||
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag | |||
2610 | case NL80211_IFTYPE_AP: | 2610 | case NL80211_IFTYPE_AP: |
2611 | if (wdev->ssid_len && | 2611 | if (wdev->ssid_len && |
2612 | nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) | 2612 | nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) |
2613 | goto nla_put_failure; | 2613 | goto nla_put_failure_locked; |
2614 | break; | 2614 | break; |
2615 | case NL80211_IFTYPE_STATION: | 2615 | case NL80211_IFTYPE_STATION: |
2616 | case NL80211_IFTYPE_P2P_CLIENT: | 2616 | case NL80211_IFTYPE_P2P_CLIENT: |
@@ -2618,12 +2618,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag | |||
2618 | const u8 *ssid_ie; | 2618 | const u8 *ssid_ie; |
2619 | if (!wdev->current_bss) | 2619 | if (!wdev->current_bss) |
2620 | break; | 2620 | break; |
2621 | rcu_read_lock(); | ||
2621 | ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, | 2622 | ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, |
2622 | WLAN_EID_SSID); | 2623 | WLAN_EID_SSID); |
2623 | if (!ssid_ie) | 2624 | if (ssid_ie && |
2624 | break; | 2625 | nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) |
2625 | if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) | 2626 | goto nla_put_failure_rcu_locked; |
2626 | goto nla_put_failure; | 2627 | rcu_read_unlock(); |
2627 | break; | 2628 | break; |
2628 | } | 2629 | } |
2629 | default: | 2630 | default: |
@@ -2635,6 +2636,10 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag | |||
2635 | genlmsg_end(msg, hdr); | 2636 | genlmsg_end(msg, hdr); |
2636 | return 0; | 2637 | return 0; |
2637 | 2638 | ||
2639 | nla_put_failure_rcu_locked: | ||
2640 | rcu_read_unlock(); | ||
2641 | nla_put_failure_locked: | ||
2642 | wdev_unlock(wdev); | ||
2638 | nla_put_failure: | 2643 | nla_put_failure: |
2639 | genlmsg_cancel(msg, hdr); | 2644 | genlmsg_cancel(msg, hdr); |
2640 | return -EMSGSIZE; | 2645 | return -EMSGSIZE; |
@@ -9804,7 +9809,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, | |||
9804 | */ | 9809 | */ |
9805 | if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && | 9810 | if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && |
9806 | rdev->ops->get_station) { | 9811 | rdev->ops->get_station) { |
9807 | struct station_info sinfo; | 9812 | struct station_info sinfo = {}; |
9808 | u8 *mac_addr; | 9813 | u8 *mac_addr; |
9809 | 9814 | ||
9810 | mac_addr = wdev->current_bss->pub.bssid; | 9815 | mac_addr = wdev->current_bss->pub.bssid; |
@@ -11359,7 +11364,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb, | |||
11359 | break; | 11364 | break; |
11360 | case NL80211_NAN_FUNC_FOLLOW_UP: | 11365 | case NL80211_NAN_FUNC_FOLLOW_UP: |
11361 | if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || | 11366 | if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || |
11362 | !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { | 11367 | !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] || |
11368 | !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) { | ||
11363 | err = -EINVAL; | 11369 | err = -EINVAL; |
11364 | goto out; | 11370 | goto out; |
11365 | } | 11371 | } |
diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 78e71b0390be..7b42f0bacfd8 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c | |||
@@ -1769,8 +1769,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, | |||
1769 | if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS) | 1769 | if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS) |
1770 | return; | 1770 | return; |
1771 | 1771 | ||
1772 | chan_before.center_freq = chan->center_freq; | 1772 | chan_before = *chan; |
1773 | chan_before.flags = chan->flags; | ||
1774 | 1773 | ||
1775 | if (chan->flags & IEEE80211_CHAN_NO_IR) { | 1774 | if (chan->flags & IEEE80211_CHAN_NO_IR) { |
1776 | chan->flags &= ~IEEE80211_CHAN_NO_IR; | 1775 | chan->flags &= ~IEEE80211_CHAN_NO_IR; |
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7ca04a7de85a..05186a47878f 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c | |||
@@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, | |||
1254 | { | 1254 | { |
1255 | struct wireless_dev *wdev = dev->ieee80211_ptr; | 1255 | struct wireless_dev *wdev = dev->ieee80211_ptr; |
1256 | struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); | 1256 | struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); |
1257 | /* we are under RTNL - globally locked - so can use a static struct */ | 1257 | struct station_info sinfo = {}; |
1258 | static struct station_info sinfo; | ||
1259 | u8 addr[ETH_ALEN]; | 1258 | u8 addr[ETH_ALEN]; |
1260 | int err; | 1259 | int err; |
1261 | 1260 | ||
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 30e5746085b8..ac9477189d1c 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c | |||
@@ -102,6 +102,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, | |||
102 | 102 | ||
103 | err = dev->xfrmdev_ops->xdo_dev_state_add(x); | 103 | err = dev->xfrmdev_ops->xdo_dev_state_add(x); |
104 | if (err) { | 104 | if (err) { |
105 | xso->dev = NULL; | ||
105 | dev_put(dev); | 106 | dev_put(dev); |
106 | return err; | 107 | return err; |
107 | } | 108 | } |
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 347ab31574d5..5b2409746ae0 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c | |||
@@ -8,15 +8,29 @@ | |||
8 | * | 8 | * |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/bottom_half.h> | ||
12 | #include <linux/interrupt.h> | ||
11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
12 | #include <linux/module.h> | 14 | #include <linux/module.h> |
13 | #include <linux/netdevice.h> | 15 | #include <linux/netdevice.h> |
16 | #include <linux/percpu.h> | ||
14 | #include <net/dst.h> | 17 | #include <net/dst.h> |
15 | #include <net/ip.h> | 18 | #include <net/ip.h> |
16 | #include <net/xfrm.h> | 19 | #include <net/xfrm.h> |
17 | #include <net/ip_tunnels.h> | 20 | #include <net/ip_tunnels.h> |
18 | #include <net/ip6_tunnel.h> | 21 | #include <net/ip6_tunnel.h> |
19 | 22 | ||
23 | struct xfrm_trans_tasklet { | ||
24 | struct tasklet_struct tasklet; | ||
25 | struct sk_buff_head queue; | ||
26 | }; | ||
27 | |||
28 | struct xfrm_trans_cb { | ||
29 | int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); | ||
30 | }; | ||
31 | |||
32 | #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) | ||
33 | |||
20 | static struct kmem_cache *secpath_cachep __read_mostly; | 34 | static struct kmem_cache *secpath_cachep __read_mostly; |
21 | 35 | ||
22 | static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); | 36 | static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); |
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; | |||
25 | static struct gro_cells gro_cells; | 39 | static struct gro_cells gro_cells; |
26 | static struct net_device xfrm_napi_dev; | 40 | static struct net_device xfrm_napi_dev; |
27 | 41 | ||
42 | static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); | ||
43 | |||
28 | int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) | 44 | int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) |
29 | { | 45 | { |
30 | int err = 0; | 46 | int err = 0; |
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) | |||
207 | xfrm_address_t *daddr; | 223 | xfrm_address_t *daddr; |
208 | struct xfrm_mode *inner_mode; | 224 | struct xfrm_mode *inner_mode; |
209 | u32 mark = skb->mark; | 225 | u32 mark = skb->mark; |
210 | unsigned int family; | 226 | unsigned int family = AF_UNSPEC; |
211 | int decaps = 0; | 227 | int decaps = 0; |
212 | int async = 0; | 228 | int async = 0; |
213 | bool xfrm_gro = false; | 229 | bool xfrm_gro = false; |
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) | |||
216 | 232 | ||
217 | if (encap_type < 0) { | 233 | if (encap_type < 0) { |
218 | x = xfrm_input_state(skb); | 234 | x = xfrm_input_state(skb); |
235 | |||
236 | if (unlikely(x->km.state != XFRM_STATE_VALID)) { | ||
237 | if (x->km.state == XFRM_STATE_ACQ) | ||
238 | XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); | ||
239 | else | ||
240 | XFRM_INC_STATS(net, | ||
241 | LINUX_MIB_XFRMINSTATEINVALID); | ||
242 | goto drop; | ||
243 | } | ||
244 | |||
219 | family = x->outer_mode->afinfo->family; | 245 | family = x->outer_mode->afinfo->family; |
220 | 246 | ||
221 | /* An encap_type of -1 indicates async resumption. */ | 247 | /* An encap_type of -1 indicates async resumption. */ |
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr) | |||
467 | } | 493 | } |
468 | EXPORT_SYMBOL(xfrm_input_resume); | 494 | EXPORT_SYMBOL(xfrm_input_resume); |
469 | 495 | ||
496 | static void xfrm_trans_reinject(unsigned long data) | ||
497 | { | ||
498 | struct xfrm_trans_tasklet *trans = (void *)data; | ||
499 | struct sk_buff_head queue; | ||
500 | struct sk_buff *skb; | ||
501 | |||
502 | __skb_queue_head_init(&queue); | ||
503 | skb_queue_splice_init(&trans->queue, &queue); | ||
504 | |||
505 | while ((skb = __skb_dequeue(&queue))) | ||
506 | XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); | ||
507 | } | ||
508 | |||
509 | int xfrm_trans_queue(struct sk_buff *skb, | ||
510 | int (*finish)(struct net *, struct sock *, | ||
511 | struct sk_buff *)) | ||
512 | { | ||
513 | struct xfrm_trans_tasklet *trans; | ||
514 | |||
515 | trans = this_cpu_ptr(&xfrm_trans_tasklet); | ||
516 | |||
517 | if (skb_queue_len(&trans->queue) >= netdev_max_backlog) | ||
518 | return -ENOBUFS; | ||
519 | |||
520 | XFRM_TRANS_SKB_CB(skb)->finish = finish; | ||
521 | __skb_queue_tail(&trans->queue, skb); | ||
522 | tasklet_schedule(&trans->tasklet); | ||
523 | return 0; | ||
524 | } | ||
525 | EXPORT_SYMBOL(xfrm_trans_queue); | ||
526 | |||
470 | void __init xfrm_input_init(void) | 527 | void __init xfrm_input_init(void) |
471 | { | 528 | { |
472 | int err; | 529 | int err; |
530 | int i; | ||
473 | 531 | ||
474 | init_dummy_netdev(&xfrm_napi_dev); | 532 | init_dummy_netdev(&xfrm_napi_dev); |
475 | err = gro_cells_init(&gro_cells, &xfrm_napi_dev); | 533 | err = gro_cells_init(&gro_cells, &xfrm_napi_dev); |
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void) | |||
480 | sizeof(struct sec_path), | 538 | sizeof(struct sec_path), |
481 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, | 539 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
482 | NULL); | 540 | NULL); |
541 | |||
542 | for_each_possible_cpu(i) { | ||
543 | struct xfrm_trans_tasklet *trans; | ||
544 | |||
545 | trans = &per_cpu(xfrm_trans_tasklet, i); | ||
546 | __skb_queue_head_init(&trans->queue); | ||
547 | tasklet_init(&trans->tasklet, xfrm_trans_reinject, | ||
548 | (unsigned long)trans); | ||
549 | } | ||
483 | } | 550 | } |
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9542975eb2f9..bd6b0e7a0ee4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
@@ -609,7 +609,8 @@ static void xfrm_hash_rebuild(struct work_struct *work) | |||
609 | 609 | ||
610 | /* re-insert all policies by order of creation */ | 610 | /* re-insert all policies by order of creation */ |
611 | list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { | 611 | list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { |
612 | if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { | 612 | if (policy->walk.dead || |
613 | xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { | ||
613 | /* skip socket policies */ | 614 | /* skip socket policies */ |
614 | continue; | 615 | continue; |
615 | } | 616 | } |
@@ -974,8 +975,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) | |||
974 | } | 975 | } |
975 | if (!cnt) | 976 | if (!cnt) |
976 | err = -ESRCH; | 977 | err = -ESRCH; |
977 | else | ||
978 | xfrm_policy_cache_flush(); | ||
979 | out: | 978 | out: |
980 | spin_unlock_bh(&net->xfrm.xfrm_policy_lock); | 979 | spin_unlock_bh(&net->xfrm.xfrm_policy_lock); |
981 | return err; | 980 | return err; |
@@ -1168,9 +1167,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, | |||
1168 | again: | 1167 | again: |
1169 | pol = rcu_dereference(sk->sk_policy[dir]); | 1168 | pol = rcu_dereference(sk->sk_policy[dir]); |
1170 | if (pol != NULL) { | 1169 | if (pol != NULL) { |
1171 | bool match = xfrm_selector_match(&pol->selector, fl, family); | 1170 | bool match; |
1172 | int err = 0; | 1171 | int err = 0; |
1173 | 1172 | ||
1173 | if (pol->family != family) { | ||
1174 | pol = NULL; | ||
1175 | goto out; | ||
1176 | } | ||
1177 | |||
1178 | match = xfrm_selector_match(&pol->selector, fl, family); | ||
1174 | if (match) { | 1179 | if (match) { |
1175 | if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { | 1180 | if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { |
1176 | pol = NULL; | 1181 | pol = NULL; |
@@ -1737,6 +1742,8 @@ void xfrm_policy_cache_flush(void) | |||
1737 | bool found = 0; | 1742 | bool found = 0; |
1738 | int cpu; | 1743 | int cpu; |
1739 | 1744 | ||
1745 | might_sleep(); | ||
1746 | |||
1740 | local_bh_disable(); | 1747 | local_bh_disable(); |
1741 | rcu_read_lock(); | 1748 | rcu_read_lock(); |
1742 | for_each_possible_cpu(cpu) { | 1749 | for_each_possible_cpu(cpu) { |
@@ -1833,6 +1840,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, | |||
1833 | sizeof(struct xfrm_policy *) * num_pols) == 0 && | 1840 | sizeof(struct xfrm_policy *) * num_pols) == 0 && |
1834 | xfrm_xdst_can_reuse(xdst, xfrm, err)) { | 1841 | xfrm_xdst_can_reuse(xdst, xfrm, err)) { |
1835 | dst_hold(&xdst->u.dst); | 1842 | dst_hold(&xdst->u.dst); |
1843 | xfrm_pols_put(pols, num_pols); | ||
1836 | while (err > 0) | 1844 | while (err > 0) |
1837 | xfrm_state_put(xfrm[--err]); | 1845 | xfrm_state_put(xfrm[--err]); |
1838 | return xdst; | 1846 | return xdst; |
@@ -2055,8 +2063,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, | |||
2055 | if (num_xfrms <= 0) | 2063 | if (num_xfrms <= 0) |
2056 | goto make_dummy_bundle; | 2064 | goto make_dummy_bundle; |
2057 | 2065 | ||
2066 | local_bh_disable(); | ||
2058 | xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, | 2067 | xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, |
2059 | xflo->dst_orig); | 2068 | xflo->dst_orig); |
2069 | local_bh_enable(); | ||
2070 | |||
2060 | if (IS_ERR(xdst)) { | 2071 | if (IS_ERR(xdst)) { |
2061 | err = PTR_ERR(xdst); | 2072 | err = PTR_ERR(xdst); |
2062 | if (err != -EAGAIN) | 2073 | if (err != -EAGAIN) |
@@ -2143,9 +2154,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, | |||
2143 | goto no_transform; | 2154 | goto no_transform; |
2144 | } | 2155 | } |
2145 | 2156 | ||
2157 | local_bh_disable(); | ||
2146 | xdst = xfrm_resolve_and_create_bundle( | 2158 | xdst = xfrm_resolve_and_create_bundle( |
2147 | pols, num_pols, fl, | 2159 | pols, num_pols, fl, |
2148 | family, dst_orig); | 2160 | family, dst_orig); |
2161 | local_bh_enable(); | ||
2162 | |||
2149 | if (IS_ERR(xdst)) { | 2163 | if (IS_ERR(xdst)) { |
2150 | xfrm_pols_put(pols, num_pols); | 2164 | xfrm_pols_put(pols, num_pols); |
2151 | err = PTR_ERR(xdst); | 2165 | err = PTR_ERR(xdst); |
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 065d89606888..a3785f538018 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c | |||
@@ -313,13 +313,14 @@ retry: | |||
313 | if ((type && !try_module_get(type->owner))) | 313 | if ((type && !try_module_get(type->owner))) |
314 | type = NULL; | 314 | type = NULL; |
315 | 315 | ||
316 | rcu_read_unlock(); | ||
317 | |||
316 | if (!type && try_load) { | 318 | if (!type && try_load) { |
317 | request_module("xfrm-offload-%d-%d", family, proto); | 319 | request_module("xfrm-offload-%d-%d", family, proto); |
318 | try_load = 0; | 320 | try_load = false; |
319 | goto retry; | 321 | goto retry; |
320 | } | 322 | } |
321 | 323 | ||
322 | rcu_read_unlock(); | ||
323 | return type; | 324 | return type; |
324 | } | 325 | } |
325 | 326 | ||
@@ -1343,6 +1344,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, | |||
1343 | 1344 | ||
1344 | if (orig->aead) { | 1345 | if (orig->aead) { |
1345 | x->aead = xfrm_algo_aead_clone(orig->aead); | 1346 | x->aead = xfrm_algo_aead_clone(orig->aead); |
1347 | x->geniv = orig->geniv; | ||
1346 | if (!x->aead) | 1348 | if (!x->aead) |
1347 | goto error; | 1349 | goto error; |
1348 | } | 1350 | } |
@@ -1533,8 +1535,12 @@ out: | |||
1533 | err = -EINVAL; | 1535 | err = -EINVAL; |
1534 | spin_lock_bh(&x1->lock); | 1536 | spin_lock_bh(&x1->lock); |
1535 | if (likely(x1->km.state == XFRM_STATE_VALID)) { | 1537 | if (likely(x1->km.state == XFRM_STATE_VALID)) { |
1536 | if (x->encap && x1->encap) | 1538 | if (x->encap && x1->encap && |
1539 | x->encap->encap_type == x1->encap->encap_type) | ||
1537 | memcpy(x1->encap, x->encap, sizeof(*x1->encap)); | 1540 | memcpy(x1->encap, x->encap, sizeof(*x1->encap)); |
1541 | else if (x->encap || x1->encap) | ||
1542 | goto fail; | ||
1543 | |||
1538 | if (x->coaddr && x1->coaddr) { | 1544 | if (x->coaddr && x1->coaddr) { |
1539 | memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); | 1545 | memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); |
1540 | } | 1546 | } |
@@ -1551,6 +1557,8 @@ out: | |||
1551 | x->km.state = XFRM_STATE_DEAD; | 1557 | x->km.state = XFRM_STATE_DEAD; |
1552 | __xfrm_state_put(x); | 1558 | __xfrm_state_put(x); |
1553 | } | 1559 | } |
1560 | |||
1561 | fail: | ||
1554 | spin_unlock_bh(&x1->lock); | 1562 | spin_unlock_bh(&x1->lock); |
1555 | 1563 | ||
1556 | xfrm_state_put(x1); | 1564 | xfrm_state_put(x1); |
@@ -2264,8 +2272,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) | |||
2264 | goto error; | 2272 | goto error; |
2265 | } | 2273 | } |
2266 | 2274 | ||
2267 | x->km.state = XFRM_STATE_VALID; | ||
2268 | |||
2269 | error: | 2275 | error: |
2270 | return err; | 2276 | return err; |
2271 | } | 2277 | } |
@@ -2274,7 +2280,13 @@ EXPORT_SYMBOL(__xfrm_init_state); | |||
2274 | 2280 | ||
2275 | int xfrm_init_state(struct xfrm_state *x) | 2281 | int xfrm_init_state(struct xfrm_state *x) |
2276 | { | 2282 | { |
2277 | return __xfrm_init_state(x, true, false); | 2283 | int err; |
2284 | |||
2285 | err = __xfrm_init_state(x, true, false); | ||
2286 | if (!err) | ||
2287 | x->km.state = XFRM_STATE_VALID; | ||
2288 | |||
2289 | return err; | ||
2278 | } | 2290 | } |
2279 | 2291 | ||
2280 | EXPORT_SYMBOL(xfrm_init_state); | 2292 | EXPORT_SYMBOL(xfrm_init_state); |
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 983b0233767b..7f52b8eb177d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c | |||
@@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, | |||
598 | goto error; | 598 | goto error; |
599 | } | 599 | } |
600 | 600 | ||
601 | if (attrs[XFRMA_OFFLOAD_DEV]) { | ||
602 | err = xfrm_dev_state_add(net, x, | ||
603 | nla_data(attrs[XFRMA_OFFLOAD_DEV])); | ||
604 | if (err) | ||
605 | goto error; | ||
606 | } | ||
607 | |||
608 | if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, | 601 | if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, |
609 | attrs[XFRMA_REPLAY_ESN_VAL]))) | 602 | attrs[XFRMA_REPLAY_ESN_VAL]))) |
610 | goto error; | 603 | goto error; |
@@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, | |||
620 | /* override default values from above */ | 613 | /* override default values from above */ |
621 | xfrm_update_ae_params(x, attrs, 0); | 614 | xfrm_update_ae_params(x, attrs, 0); |
622 | 615 | ||
616 | /* configure the hardware if offload is requested */ | ||
617 | if (attrs[XFRMA_OFFLOAD_DEV]) { | ||
618 | err = xfrm_dev_state_add(net, x, | ||
619 | nla_data(attrs[XFRMA_OFFLOAD_DEV])); | ||
620 | if (err) | ||
621 | goto error; | ||
622 | } | ||
623 | |||
623 | return x; | 624 | return x; |
624 | 625 | ||
625 | error: | 626 | error: |
@@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
662 | goto out; | 663 | goto out; |
663 | } | 664 | } |
664 | 665 | ||
666 | if (x->km.state == XFRM_STATE_VOID) | ||
667 | x->km.state = XFRM_STATE_VALID; | ||
668 | |||
665 | c.seq = nlh->nlmsg_seq; | 669 | c.seq = nlh->nlmsg_seq; |
666 | c.portid = nlh->nlmsg_pid; | 670 | c.portid = nlh->nlmsg_pid; |
667 | c.event = nlh->nlmsg_type; | 671 | c.event = nlh->nlmsg_type; |
@@ -1419,11 +1423,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, | |||
1419 | 1423 | ||
1420 | static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) | 1424 | static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) |
1421 | { | 1425 | { |
1426 | u16 prev_family; | ||
1422 | int i; | 1427 | int i; |
1423 | 1428 | ||
1424 | if (nr > XFRM_MAX_DEPTH) | 1429 | if (nr > XFRM_MAX_DEPTH) |
1425 | return -EINVAL; | 1430 | return -EINVAL; |
1426 | 1431 | ||
1432 | prev_family = family; | ||
1433 | |||
1427 | for (i = 0; i < nr; i++) { | 1434 | for (i = 0; i < nr; i++) { |
1428 | /* We never validated the ut->family value, so many | 1435 | /* We never validated the ut->family value, so many |
1429 | * applications simply leave it at zero. The check was | 1436 | * applications simply leave it at zero. The check was |
@@ -1435,6 +1442,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) | |||
1435 | if (!ut[i].family) | 1442 | if (!ut[i].family) |
1436 | ut[i].family = family; | 1443 | ut[i].family = family; |
1437 | 1444 | ||
1445 | if ((ut[i].mode == XFRM_MODE_TRANSPORT) && | ||
1446 | (ut[i].family != prev_family)) | ||
1447 | return -EINVAL; | ||
1448 | |||
1449 | prev_family = ut[i].family; | ||
1450 | |||
1438 | switch (ut[i].family) { | 1451 | switch (ut[i].family) { |
1439 | case AF_INET: | 1452 | case AF_INET: |
1440 | break; | 1453 | break; |
@@ -1445,6 +1458,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) | |||
1445 | default: | 1458 | default: |
1446 | return -EINVAL; | 1459 | return -EINVAL; |
1447 | } | 1460 | } |
1461 | |||
1462 | switch (ut[i].id.proto) { | ||
1463 | case IPPROTO_AH: | ||
1464 | case IPPROTO_ESP: | ||
1465 | case IPPROTO_COMP: | ||
1466 | #if IS_ENABLED(CONFIG_IPV6) | ||
1467 | case IPPROTO_ROUTING: | ||
1468 | case IPPROTO_DSTOPTS: | ||
1469 | #endif | ||
1470 | case IPSEC_PROTO_ANY: | ||
1471 | break; | ||
1472 | default: | ||
1473 | return -EINVAL; | ||
1474 | } | ||
1475 | |||
1448 | } | 1476 | } |
1449 | 1477 | ||
1450 | return 0; | 1478 | return 0; |
@@ -2470,7 +2498,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { | |||
2470 | [XFRMA_PROTO] = { .type = NLA_U8 }, | 2498 | [XFRMA_PROTO] = { .type = NLA_U8 }, |
2471 | [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, | 2499 | [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, |
2472 | [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, | 2500 | [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, |
2473 | [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, | 2501 | [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 }, |
2474 | }; | 2502 | }; |
2475 | 2503 | ||
2476 | static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { | 2504 | static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { |