diff options
Diffstat (limited to 'net')
99 files changed, 2392 insertions, 1097 deletions
diff --git a/net/9p/protocol.c b/net/9p/protocol.c index e9d0f0c1a048..16d287565987 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c | |||
@@ -275,7 +275,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, | |||
275 | } | 275 | } |
276 | break; | 276 | break; |
277 | case 'R':{ | 277 | case 'R':{ |
278 | int16_t *nwqid = va_arg(ap, int16_t *); | 278 | uint16_t *nwqid = va_arg(ap, uint16_t *); |
279 | struct p9_qid **wqids = | 279 | struct p9_qid **wqids = |
280 | va_arg(ap, struct p9_qid **); | 280 | va_arg(ap, struct p9_qid **); |
281 | 281 | ||
@@ -440,7 +440,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, | |||
440 | stbuf->n_gid, stbuf->n_muid); | 440 | stbuf->n_gid, stbuf->n_muid); |
441 | } break; | 441 | } break; |
442 | case 'V':{ | 442 | case 'V':{ |
443 | int32_t count = va_arg(ap, int32_t); | 443 | uint32_t count = va_arg(ap, uint32_t); |
444 | struct iov_iter *from = | 444 | struct iov_iter *from = |
445 | va_arg(ap, struct iov_iter *); | 445 | va_arg(ap, struct iov_iter *); |
446 | errcode = p9pdu_writef(pdu, proto_version, "d", | 446 | errcode = p9pdu_writef(pdu, proto_version, "d", |
@@ -471,7 +471,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, | |||
471 | } | 471 | } |
472 | break; | 472 | break; |
473 | case 'R':{ | 473 | case 'R':{ |
474 | int16_t nwqid = va_arg(ap, int); | 474 | uint16_t nwqid = va_arg(ap, int); |
475 | struct p9_qid *wqids = | 475 | struct p9_qid *wqids = |
476 | va_arg(ap, struct p9_qid *); | 476 | va_arg(ap, struct p9_qid *); |
477 | 477 | ||
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 3e3d82d8ff70..bced8c074c12 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c | |||
@@ -734,6 +734,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) | |||
734 | opts->port = P9_PORT; | 734 | opts->port = P9_PORT; |
735 | opts->rfd = ~0; | 735 | opts->rfd = ~0; |
736 | opts->wfd = ~0; | 736 | opts->wfd = ~0; |
737 | opts->privport = 0; | ||
737 | 738 | ||
738 | if (!params) | 739 | if (!params) |
739 | return 0; | 740 | return 0; |
@@ -1013,7 +1014,6 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) | |||
1013 | { | 1014 | { |
1014 | int err; | 1015 | int err; |
1015 | struct p9_fd_opts opts; | 1016 | struct p9_fd_opts opts; |
1016 | struct p9_trans_fd *p; | ||
1017 | 1017 | ||
1018 | parse_opts(args, &opts); | 1018 | parse_opts(args, &opts); |
1019 | 1019 | ||
@@ -1026,7 +1026,6 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) | |||
1026 | if (err < 0) | 1026 | if (err < 0) |
1027 | return err; | 1027 | return err; |
1028 | 1028 | ||
1029 | p = (struct p9_trans_fd *) client->trans; | ||
1030 | p9_conn_create(client); | 1029 | p9_conn_create(client); |
1031 | 1030 | ||
1032 | return 0; | 1031 | return 0; |
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 14ad43b5cf89..3533d2a53ab6 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c | |||
@@ -139,6 +139,7 @@ struct p9_rdma_opts { | |||
139 | int sq_depth; | 139 | int sq_depth; |
140 | int rq_depth; | 140 | int rq_depth; |
141 | long timeout; | 141 | long timeout; |
142 | int privport; | ||
142 | }; | 143 | }; |
143 | 144 | ||
144 | /* | 145 | /* |
@@ -146,7 +147,10 @@ struct p9_rdma_opts { | |||
146 | */ | 147 | */ |
147 | enum { | 148 | enum { |
148 | /* Options that take integer arguments */ | 149 | /* Options that take integer arguments */ |
149 | Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err, | 150 | Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, |
151 | /* Options that take no argument */ | ||
152 | Opt_privport, | ||
153 | Opt_err, | ||
150 | }; | 154 | }; |
151 | 155 | ||
152 | static match_table_t tokens = { | 156 | static match_table_t tokens = { |
@@ -154,6 +158,7 @@ static match_table_t tokens = { | |||
154 | {Opt_sq_depth, "sq=%u"}, | 158 | {Opt_sq_depth, "sq=%u"}, |
155 | {Opt_rq_depth, "rq=%u"}, | 159 | {Opt_rq_depth, "rq=%u"}, |
156 | {Opt_timeout, "timeout=%u"}, | 160 | {Opt_timeout, "timeout=%u"}, |
161 | {Opt_privport, "privport"}, | ||
157 | {Opt_err, NULL}, | 162 | {Opt_err, NULL}, |
158 | }; | 163 | }; |
159 | 164 | ||
@@ -175,6 +180,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) | |||
175 | opts->sq_depth = P9_RDMA_SQ_DEPTH; | 180 | opts->sq_depth = P9_RDMA_SQ_DEPTH; |
176 | opts->rq_depth = P9_RDMA_RQ_DEPTH; | 181 | opts->rq_depth = P9_RDMA_RQ_DEPTH; |
177 | opts->timeout = P9_RDMA_TIMEOUT; | 182 | opts->timeout = P9_RDMA_TIMEOUT; |
183 | opts->privport = 0; | ||
178 | 184 | ||
179 | if (!params) | 185 | if (!params) |
180 | return 0; | 186 | return 0; |
@@ -193,13 +199,13 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) | |||
193 | if (!*p) | 199 | if (!*p) |
194 | continue; | 200 | continue; |
195 | token = match_token(p, tokens, args); | 201 | token = match_token(p, tokens, args); |
196 | if (token == Opt_err) | 202 | if ((token != Opt_err) && (token != Opt_privport)) { |
197 | continue; | 203 | r = match_int(&args[0], &option); |
198 | r = match_int(&args[0], &option); | 204 | if (r < 0) { |
199 | if (r < 0) { | 205 | p9_debug(P9_DEBUG_ERROR, |
200 | p9_debug(P9_DEBUG_ERROR, | 206 | "integer field, but no integer?\n"); |
201 | "integer field, but no integer?\n"); | 207 | continue; |
202 | continue; | 208 | } |
203 | } | 209 | } |
204 | switch (token) { | 210 | switch (token) { |
205 | case Opt_port: | 211 | case Opt_port: |
@@ -214,6 +220,9 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) | |||
214 | case Opt_timeout: | 220 | case Opt_timeout: |
215 | opts->timeout = option; | 221 | opts->timeout = option; |
216 | break; | 222 | break; |
223 | case Opt_privport: | ||
224 | opts->privport = 1; | ||
225 | break; | ||
217 | default: | 226 | default: |
218 | continue; | 227 | continue; |
219 | } | 228 | } |
@@ -607,6 +616,23 @@ static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) | |||
607 | return 0; | 616 | return 0; |
608 | } | 617 | } |
609 | 618 | ||
619 | static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) | ||
620 | { | ||
621 | struct sockaddr_in cl = { | ||
622 | .sin_family = AF_INET, | ||
623 | .sin_addr.s_addr = htonl(INADDR_ANY), | ||
624 | }; | ||
625 | int port, err = -EINVAL; | ||
626 | |||
627 | for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { | ||
628 | cl.sin_port = htons((ushort)port); | ||
629 | err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); | ||
630 | if (err != -EADDRINUSE) | ||
631 | break; | ||
632 | } | ||
633 | return err; | ||
634 | } | ||
635 | |||
610 | /** | 636 | /** |
611 | * trans_create_rdma - Transport method for creating atransport instance | 637 | * trans_create_rdma - Transport method for creating atransport instance |
612 | * @client: client instance | 638 | * @client: client instance |
@@ -642,6 +668,16 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) | |||
642 | /* Associate the client with the transport */ | 668 | /* Associate the client with the transport */ |
643 | client->trans = rdma; | 669 | client->trans = rdma; |
644 | 670 | ||
671 | /* Bind to a privileged port if we need to */ | ||
672 | if (opts.privport) { | ||
673 | err = p9_rdma_bind_privport(rdma); | ||
674 | if (err < 0) { | ||
675 | pr_err("%s (%d): problem binding to privport: %d\n", | ||
676 | __func__, task_pid_nr(current), -err); | ||
677 | goto error; | ||
678 | } | ||
679 | } | ||
680 | |||
645 | /* Resolve the server's address */ | 681 | /* Resolve the server's address */ |
646 | rdma->addr.sin_family = AF_INET; | 682 | rdma->addr.sin_family = AF_INET; |
647 | rdma->addr.sin_addr.s_addr = in_aton(addr); | 683 | rdma->addr.sin_addr.s_addr = in_aton(addr); |
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e62bcbbabb5e..9dd49ca67dbc 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -525,7 +525,10 @@ static ssize_t p9_mount_tag_show(struct device *dev, | |||
525 | vdev = dev_to_virtio(dev); | 525 | vdev = dev_to_virtio(dev); |
526 | chan = vdev->priv; | 526 | chan = vdev->priv; |
527 | 527 | ||
528 | return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); | 528 | memcpy(buf, chan->tag, chan->tag_len); |
529 | buf[chan->tag_len] = 0; | ||
530 | |||
531 | return chan->tag_len + 1; | ||
529 | } | 532 | } |
530 | 533 | ||
531 | static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); | 534 | static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); |
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 476709bd068a..4663c3dad3f5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c | |||
@@ -1557,7 +1557,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) | |||
1557 | { | 1557 | { |
1558 | BT_DBG("%s %p", hdev->name, hdev); | 1558 | BT_DBG("%s %p", hdev->name, hdev); |
1559 | 1559 | ||
1560 | if (!hci_dev_test_flag(hdev, HCI_UNREGISTER)) { | 1560 | if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && |
1561 | test_bit(HCI_UP, &hdev->flags)) { | ||
1561 | /* Execute vendor specific shutdown routine */ | 1562 | /* Execute vendor specific shutdown routine */ |
1562 | if (hdev->shutdown) | 1563 | if (hdev->shutdown) |
1563 | hdev->shutdown(hdev); | 1564 | hdev->shutdown(hdev); |
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index a05b9dbf14c9..9070dfd6b4ad 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c | |||
@@ -1313,7 +1313,8 @@ int hidp_connection_add(struct hidp_connadd_req *req, | |||
1313 | struct socket *ctrl_sock, | 1313 | struct socket *ctrl_sock, |
1314 | struct socket *intr_sock) | 1314 | struct socket *intr_sock) |
1315 | { | 1315 | { |
1316 | u32 valid_flags = 0; | 1316 | u32 valid_flags = BIT(HIDP_VIRTUAL_CABLE_UNPLUG) | |
1317 | BIT(HIDP_BOOT_PROTOCOL_MODE); | ||
1317 | struct hidp_session *session; | 1318 | struct hidp_session *session; |
1318 | struct l2cap_conn *conn; | 1319 | struct l2cap_conn *conn; |
1319 | struct l2cap_chan *chan; | 1320 | struct l2cap_chan *chan; |
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 409608960899..e29ad70b3000 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c | |||
@@ -170,7 +170,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, | |||
170 | struct br_port_msg *bpm; | 170 | struct br_port_msg *bpm; |
171 | struct nlattr *nest, *nest2; | 171 | struct nlattr *nest, *nest2; |
172 | 172 | ||
173 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); | 173 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); |
174 | if (!nlh) | 174 | if (!nlh) |
175 | return -EMSGSIZE; | 175 | return -EMSGSIZE; |
176 | 176 | ||
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0e4ddb81610d..4b5c236998ff 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c | |||
@@ -394,7 +394,7 @@ errout: | |||
394 | * Dump information about all ports, in response to GETLINK | 394 | * Dump information about all ports, in response to GETLINK |
395 | */ | 395 | */ |
396 | int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, | 396 | int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, |
397 | struct net_device *dev, u32 filter_mask) | 397 | struct net_device *dev, u32 filter_mask, int nlflags) |
398 | { | 398 | { |
399 | struct net_bridge_port *port = br_port_get_rtnl(dev); | 399 | struct net_bridge_port *port = br_port_get_rtnl(dev); |
400 | 400 | ||
@@ -402,7 +402,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, | |||
402 | !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) | 402 | !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) |
403 | return 0; | 403 | return 0; |
404 | 404 | ||
405 | return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, | 405 | return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags, |
406 | filter_mask, dev); | 406 | filter_mask, dev); |
407 | } | 407 | } |
408 | 408 | ||
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6ca0251cb478..3362c29400f1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h | |||
@@ -828,7 +828,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port); | |||
828 | int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); | 828 | int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); |
829 | int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); | 829 | int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); |
830 | int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, | 830 | int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, |
831 | u32 filter_mask); | 831 | u32 filter_mask, int nlflags); |
832 | 832 | ||
833 | #ifdef CONFIG_SYSFS | 833 | #ifdef CONFIG_SYSFS |
834 | /* br_sysfs_if.c */ | 834 | /* br_sysfs_if.c */ |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ec565508e904..79e8f71aef5b 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -490,6 +490,43 @@ out: | |||
490 | } | 490 | } |
491 | EXPORT_SYMBOL(ceph_parse_options); | 491 | EXPORT_SYMBOL(ceph_parse_options); |
492 | 492 | ||
493 | int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) | ||
494 | { | ||
495 | struct ceph_options *opt = client->options; | ||
496 | size_t pos = m->count; | ||
497 | |||
498 | if (opt->name) | ||
499 | seq_printf(m, "name=%s,", opt->name); | ||
500 | if (opt->key) | ||
501 | seq_puts(m, "secret=<hidden>,"); | ||
502 | |||
503 | if (opt->flags & CEPH_OPT_FSID) | ||
504 | seq_printf(m, "fsid=%pU,", &opt->fsid); | ||
505 | if (opt->flags & CEPH_OPT_NOSHARE) | ||
506 | seq_puts(m, "noshare,"); | ||
507 | if (opt->flags & CEPH_OPT_NOCRC) | ||
508 | seq_puts(m, "nocrc,"); | ||
509 | if (opt->flags & CEPH_OPT_NOMSGAUTH) | ||
510 | seq_puts(m, "nocephx_require_signatures,"); | ||
511 | if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) | ||
512 | seq_puts(m, "notcp_nodelay,"); | ||
513 | |||
514 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
515 | seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); | ||
516 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
517 | seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); | ||
518 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
519 | seq_printf(m, "osdkeepalivetimeout=%d,", | ||
520 | opt->osd_keepalive_timeout); | ||
521 | |||
522 | /* drop redundant comma */ | ||
523 | if (m->count != pos) | ||
524 | m->count--; | ||
525 | |||
526 | return 0; | ||
527 | } | ||
528 | EXPORT_SYMBOL(ceph_print_client_options); | ||
529 | |||
493 | u64 ceph_client_id(struct ceph_client *client) | 530 | u64 ceph_client_id(struct ceph_client *client) |
494 | { | 531 | { |
495 | return client->monc.auth->global_id; | 532 | return client->monc.auth->global_id; |
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 16bc199d9a62..9d84ce4ea0df 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c | |||
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg) | |||
17 | case CRUSH_BUCKET_LIST: return "list"; | 17 | case CRUSH_BUCKET_LIST: return "list"; |
18 | case CRUSH_BUCKET_TREE: return "tree"; | 18 | case CRUSH_BUCKET_TREE: return "tree"; |
19 | case CRUSH_BUCKET_STRAW: return "straw"; | 19 | case CRUSH_BUCKET_STRAW: return "straw"; |
20 | case CRUSH_BUCKET_STRAW2: return "straw2"; | ||
20 | default: return "unknown"; | 21 | default: return "unknown"; |
21 | } | 22 | } |
22 | } | 23 | } |
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p) | |||
40 | return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)]; | 41 | return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)]; |
41 | case CRUSH_BUCKET_STRAW: | 42 | case CRUSH_BUCKET_STRAW: |
42 | return ((struct crush_bucket_straw *)b)->item_weights[p]; | 43 | return ((struct crush_bucket_straw *)b)->item_weights[p]; |
44 | case CRUSH_BUCKET_STRAW2: | ||
45 | return ((struct crush_bucket_straw2 *)b)->item_weights[p]; | ||
43 | } | 46 | } |
44 | return 0; | 47 | return 0; |
45 | } | 48 | } |
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b) | |||
77 | kfree(b); | 80 | kfree(b); |
78 | } | 81 | } |
79 | 82 | ||
83 | void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b) | ||
84 | { | ||
85 | kfree(b->item_weights); | ||
86 | kfree(b->h.perm); | ||
87 | kfree(b->h.items); | ||
88 | kfree(b); | ||
89 | } | ||
90 | |||
80 | void crush_destroy_bucket(struct crush_bucket *b) | 91 | void crush_destroy_bucket(struct crush_bucket *b) |
81 | { | 92 | { |
82 | switch (b->alg) { | 93 | switch (b->alg) { |
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b) | |||
92 | case CRUSH_BUCKET_STRAW: | 103 | case CRUSH_BUCKET_STRAW: |
93 | crush_destroy_bucket_straw((struct crush_bucket_straw *)b); | 104 | crush_destroy_bucket_straw((struct crush_bucket_straw *)b); |
94 | break; | 105 | break; |
106 | case CRUSH_BUCKET_STRAW2: | ||
107 | crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b); | ||
108 | break; | ||
95 | } | 109 | } |
96 | } | 110 | } |
97 | 111 | ||
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h new file mode 100644 index 000000000000..6192c7fc958c --- /dev/null +++ b/net/ceph/crush/crush_ln_table.h | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * Ceph - scalable distributed file system | ||
3 | * | ||
4 | * Copyright (C) 2015 Intel Corporation All Rights Reserved | ||
5 | * | ||
6 | * This is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License version 2.1, as published by the Free Software | ||
9 | * Foundation. See file COPYING. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #if defined(__linux__) | ||
14 | #include <linux/types.h> | ||
15 | #elif defined(__FreeBSD__) | ||
16 | #include <sys/types.h> | ||
17 | #endif | ||
18 | |||
19 | #ifndef CEPH_CRUSH_LN_H | ||
20 | #define CEPH_CRUSH_LN_H | ||
21 | |||
22 | |||
23 | // RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) | ||
24 | // RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) | ||
25 | |||
26 | static int64_t __RH_LH_tbl[128*2+2] = { | ||
27 | 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll, | ||
28 | 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all, | ||
29 | 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll, | ||
30 | 0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll, | ||
31 | 0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll, | ||
32 | 0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll, | ||
33 | 0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll, | ||
34 | 0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell, | ||
35 | 0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll, | ||
36 | 0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll, | ||
37 | 0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll, | ||
38 | 0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll, | ||
39 | 0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll, | ||
40 | 0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll, | ||
41 | 0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all, | ||
42 | 0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll, | ||
43 | 0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all, | ||
44 | 0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell, | ||
45 | 0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll, | ||
46 | 0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll, | ||
47 | 0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll, | ||
48 | 0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll, | ||
49 | 0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll, | ||
50 | 0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll, | ||
51 | 0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll, | ||
52 | 0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll, | ||
53 | 0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell, | ||
54 | 0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll, | ||
55 | 0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll, | ||
56 | 0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll, | ||
57 | 0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll, | ||
58 | 0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll, | ||
59 | 0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll, | ||
60 | 0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll, | ||
61 | 0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll, | ||
62 | 0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll, | ||
63 | 0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll, | ||
64 | 0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll, | ||
65 | 0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll, | ||
66 | 0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll, | ||
67 | 0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll, | ||
68 | 0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll, | ||
69 | 0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll, | ||
70 | 0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll, | ||
71 | 0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll, | ||
72 | 0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll, | ||
73 | 0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll, | ||
74 | 0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll, | ||
75 | 0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll, | ||
76 | 0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll, | ||
77 | 0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll, | ||
78 | 0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll, | ||
79 | 0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll, | ||
80 | 0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell, | ||
81 | 0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell, | ||
82 | 0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll, | ||
83 | 0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell, | ||
84 | 0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll, | ||
85 | 0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll, | ||
86 | 0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll, | ||
87 | 0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll, | ||
88 | 0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll, | ||
89 | 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll, | ||
90 | 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll, | ||
91 | 0x0000800000000000ll, 0x0000ffff00000000ll, | ||
92 | }; | ||
93 | |||
94 | |||
95 | // LL_tbl[k] = 2^48*log2(1.0+k/2^15); | ||
96 | static int64_t __LL_tbl[256] = { | ||
97 | 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull, | ||
98 | 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull, | ||
99 | 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull, | ||
100 | 0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull, | ||
101 | 0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull, | ||
102 | 0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull, | ||
103 | 0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull, | ||
104 | 0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull, | ||
105 | 0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull, | ||
106 | 0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull, | ||
107 | 0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull, | ||
108 | 0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull, | ||
109 | 0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull, | ||
110 | 0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull, | ||
111 | 0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull, | ||
112 | 0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull, | ||
113 | 0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull, | ||
114 | 0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull, | ||
115 | 0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull, | ||
116 | 0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull, | ||
117 | 0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull, | ||
118 | 0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull, | ||
119 | 0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull, | ||
120 | 0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull, | ||
121 | 0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull, | ||
122 | 0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull, | ||
123 | 0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull, | ||
124 | 0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull, | ||
125 | 0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull, | ||
126 | 0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull, | ||
127 | 0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull, | ||
128 | 0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull, | ||
129 | 0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull, | ||
130 | 0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull, | ||
131 | 0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull, | ||
132 | 0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull, | ||
133 | 0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull, | ||
134 | 0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull, | ||
135 | 0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull, | ||
136 | 0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull, | ||
137 | 0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull, | ||
138 | 0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull, | ||
139 | 0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull, | ||
140 | 0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull, | ||
141 | 0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull, | ||
142 | 0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull, | ||
143 | 0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull, | ||
144 | 0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull, | ||
145 | 0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull, | ||
146 | 0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull, | ||
147 | 0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull, | ||
148 | 0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull, | ||
149 | 0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull, | ||
150 | 0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull, | ||
151 | 0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull, | ||
152 | 0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull, | ||
153 | 0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull, | ||
154 | 0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull, | ||
155 | 0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull, | ||
156 | 0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull, | ||
157 | 0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull, | ||
158 | 0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull, | ||
159 | 0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull, | ||
160 | 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull, | ||
161 | }; | ||
162 | |||
163 | |||
164 | |||
165 | |||
166 | #endif | ||
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index a1ef53c04415..5b47736d27d9 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | #include <linux/crush/crush.h> | 21 | #include <linux/crush/crush.h> |
22 | #include <linux/crush/hash.h> | 22 | #include <linux/crush/hash.h> |
23 | #include <linux/crush/mapper.h> | 23 | #include "crush_ln_table.h" |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Implement the core CRUSH mapping algorithm. | 26 | * Implement the core CRUSH mapping algorithm. |
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, | |||
238 | return bucket->h.items[high]; | 238 | return bucket->h.items[high]; |
239 | } | 239 | } |
240 | 240 | ||
241 | // compute 2^44*log2(input+1) | ||
242 | uint64_t crush_ln(unsigned xin) | ||
243 | { | ||
244 | unsigned x=xin, x1; | ||
245 | int iexpon, index1, index2; | ||
246 | uint64_t RH, LH, LL, xl64, result; | ||
247 | |||
248 | x++; | ||
249 | |||
250 | // normalize input | ||
251 | iexpon = 15; | ||
252 | while(!(x&0x18000)) { x<<=1; iexpon--; } | ||
253 | |||
254 | index1 = (x>>8)<<1; | ||
255 | // RH ~ 2^56/index1 | ||
256 | RH = __RH_LH_tbl[index1 - 256]; | ||
257 | // LH ~ 2^48 * log2(index1/256) | ||
258 | LH = __RH_LH_tbl[index1 + 1 - 256]; | ||
259 | |||
260 | // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 | ||
261 | xl64 = (int64_t)x * RH; | ||
262 | xl64 >>= 48; | ||
263 | x1 = xl64; | ||
264 | |||
265 | result = iexpon; | ||
266 | result <<= (12 + 32); | ||
267 | |||
268 | index2 = x1 & 0xff; | ||
269 | // LL ~ 2^48*log2(1.0+index2/2^15) | ||
270 | LL = __LL_tbl[index2]; | ||
271 | |||
272 | LH = LH + LL; | ||
273 | |||
274 | LH >>= (48-12 - 32); | ||
275 | result += LH; | ||
276 | |||
277 | return result; | ||
278 | } | ||
279 | |||
280 | |||
281 | /* | ||
282 | * straw2 | ||
283 | * | ||
284 | * for reference, see: | ||
285 | * | ||
286 | * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables | ||
287 | * | ||
288 | */ | ||
289 | |||
290 | static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, | ||
291 | int x, int r) | ||
292 | { | ||
293 | unsigned i, high = 0; | ||
294 | unsigned u; | ||
295 | unsigned w; | ||
296 | __s64 ln, draw, high_draw = 0; | ||
297 | |||
298 | for (i = 0; i < bucket->h.size; i++) { | ||
299 | w = bucket->item_weights[i]; | ||
300 | if (w) { | ||
301 | u = crush_hash32_3(bucket->h.hash, x, | ||
302 | bucket->h.items[i], r); | ||
303 | u &= 0xffff; | ||
304 | |||
305 | /* | ||
306 | * for some reason slightly less than 0x10000 produces | ||
307 | * a slightly more accurate distribution... probably a | ||
308 | * rounding effect. | ||
309 | * | ||
310 | * the natural log lookup table maps [0,0xffff] | ||
311 | * (corresponding to real numbers [1/0x10000, 1] to | ||
312 | * [0, 0xffffffffffff] (corresponding to real numbers | ||
313 | * [-11.090355,0]). | ||
314 | */ | ||
315 | ln = crush_ln(u) - 0x1000000000000ll; | ||
316 | |||
317 | /* | ||
318 | * divide by 16.16 fixed-point weight. note | ||
319 | * that the ln value is negative, so a larger | ||
320 | * weight means a larger (less negative) value | ||
321 | * for draw. | ||
322 | */ | ||
323 | draw = div64_s64(ln, w); | ||
324 | } else { | ||
325 | draw = S64_MIN; | ||
326 | } | ||
327 | |||
328 | if (i == 0 || draw > high_draw) { | ||
329 | high = i; | ||
330 | high_draw = draw; | ||
331 | } | ||
332 | } | ||
333 | return bucket->h.items[high]; | ||
334 | } | ||
335 | |||
336 | |||
241 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | 337 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) |
242 | { | 338 | { |
243 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); | 339 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); |
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | |||
255 | case CRUSH_BUCKET_STRAW: | 351 | case CRUSH_BUCKET_STRAW: |
256 | return bucket_straw_choose((struct crush_bucket_straw *)in, | 352 | return bucket_straw_choose((struct crush_bucket_straw *)in, |
257 | x, r); | 353 | x, r); |
354 | case CRUSH_BUCKET_STRAW2: | ||
355 | return bucket_straw2_choose((struct crush_bucket_straw2 *)in, | ||
356 | x, r); | ||
258 | default: | 357 | default: |
259 | dprintk("unknown bucket %d alg %d\n", in->id, in->alg); | 358 | dprintk("unknown bucket %d alg %d\n", in->id, in->alg); |
260 | return in->items[0]; | 359 | return in->items[0]; |
261 | } | 360 | } |
262 | } | 361 | } |
263 | 362 | ||
363 | |||
264 | /* | 364 | /* |
265 | * true if device is marked "out" (failed, fully offloaded) | 365 | * true if device is marked "out" (failed, fully offloaded) |
266 | * of the cluster | 366 | * of the cluster |
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map, | |||
290 | * @type: the type of item to choose | 390 | * @type: the type of item to choose |
291 | * @out: pointer to output vector | 391 | * @out: pointer to output vector |
292 | * @outpos: our position in that vector | 392 | * @outpos: our position in that vector |
393 | * @out_size: size of the out vector | ||
293 | * @tries: number of attempts to make | 394 | * @tries: number of attempts to make |
294 | * @recurse_tries: number of attempts to have recursive chooseleaf make | 395 | * @recurse_tries: number of attempts to have recursive chooseleaf make |
295 | * @local_retries: localized retries | 396 | * @local_retries: localized retries |
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
304 | const __u32 *weight, int weight_max, | 405 | const __u32 *weight, int weight_max, |
305 | int x, int numrep, int type, | 406 | int x, int numrep, int type, |
306 | int *out, int outpos, | 407 | int *out, int outpos, |
408 | int out_size, | ||
307 | unsigned int tries, | 409 | unsigned int tries, |
308 | unsigned int recurse_tries, | 410 | unsigned int recurse_tries, |
309 | unsigned int local_retries, | 411 | unsigned int local_retries, |
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
322 | int item = 0; | 424 | int item = 0; |
323 | int itemtype; | 425 | int itemtype; |
324 | int collide, reject; | 426 | int collide, reject; |
427 | int count = out_size; | ||
325 | 428 | ||
326 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", | 429 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", |
327 | recurse_to_leaf ? "_LEAF" : "", | 430 | recurse_to_leaf ? "_LEAF" : "", |
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
329 | tries, recurse_tries, local_retries, local_fallback_retries, | 432 | tries, recurse_tries, local_retries, local_fallback_retries, |
330 | parent_r); | 433 | parent_r); |
331 | 434 | ||
332 | for (rep = outpos; rep < numrep; rep++) { | 435 | for (rep = outpos; rep < numrep && count > 0 ; rep++) { |
333 | /* keep trying until we get a non-out, non-colliding item */ | 436 | /* keep trying until we get a non-out, non-colliding item */ |
334 | ftotal = 0; | 437 | ftotal = 0; |
335 | skip_rep = 0; | 438 | skip_rep = 0; |
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
403 | map->buckets[-1-item], | 506 | map->buckets[-1-item], |
404 | weight, weight_max, | 507 | weight, weight_max, |
405 | x, outpos+1, 0, | 508 | x, outpos+1, 0, |
406 | out2, outpos, | 509 | out2, outpos, count, |
407 | recurse_tries, 0, | 510 | recurse_tries, 0, |
408 | local_retries, | 511 | local_retries, |
409 | local_fallback_retries, | 512 | local_fallback_retries, |
@@ -463,6 +566,7 @@ reject: | |||
463 | dprintk("CHOOSE got %d\n", item); | 566 | dprintk("CHOOSE got %d\n", item); |
464 | out[outpos] = item; | 567 | out[outpos] = item; |
465 | outpos++; | 568 | outpos++; |
569 | count--; | ||
466 | } | 570 | } |
467 | 571 | ||
468 | dprintk("CHOOSE returns %d\n", outpos); | 572 | dprintk("CHOOSE returns %d\n", outpos); |
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map, | |||
654 | __u32 step; | 758 | __u32 step; |
655 | int i, j; | 759 | int i, j; |
656 | int numrep; | 760 | int numrep; |
761 | int out_size; | ||
657 | /* | 762 | /* |
658 | * the original choose_total_tries value was off by one (it | 763 | * the original choose_total_tries value was off by one (it |
659 | * counted "retries" and not "tries"). add one. | 764 | * counted "retries" and not "tries"). add one. |
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map, | |||
761 | x, numrep, | 866 | x, numrep, |
762 | curstep->arg2, | 867 | curstep->arg2, |
763 | o+osize, j, | 868 | o+osize, j, |
869 | result_max-osize, | ||
764 | choose_tries, | 870 | choose_tries, |
765 | recurse_tries, | 871 | recurse_tries, |
766 | choose_local_retries, | 872 | choose_local_retries, |
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map, | |||
770 | c+osize, | 876 | c+osize, |
771 | 0); | 877 | 0); |
772 | } else { | 878 | } else { |
879 | out_size = ((numrep < (result_max-osize)) ? | ||
880 | numrep : (result_max-osize)); | ||
773 | crush_choose_indep( | 881 | crush_choose_indep( |
774 | map, | 882 | map, |
775 | map->buckets[-1-w[i]], | 883 | map->buckets[-1-w[i]], |
776 | weight, weight_max, | 884 | weight, weight_max, |
777 | x, numrep, numrep, | 885 | x, out_size, numrep, |
778 | curstep->arg2, | 886 | curstep->arg2, |
779 | o+osize, j, | 887 | o+osize, j, |
780 | choose_tries, | 888 | choose_tries, |
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map, | |||
783 | recurse_to_leaf, | 891 | recurse_to_leaf, |
784 | c+osize, | 892 | c+osize, |
785 | 0); | 893 | 0); |
786 | osize += numrep; | 894 | osize += out_size; |
787 | } | 895 | } |
788 | } | 896 | } |
789 | 897 | ||
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 14d9995097cc..593dc2eabcc8 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * .../monmap - current monmap | 22 | * .../monmap - current monmap |
23 | * .../osdc - active osd requests | 23 | * .../osdc - active osd requests |
24 | * .../monc - mon client state | 24 | * .../monc - mon client state |
25 | * .../client_options - libceph-only (i.e. not rbd or cephfs) options | ||
25 | * .../dentry_lru - dump contents of dentry lru | 26 | * .../dentry_lru - dump contents of dentry lru |
26 | * .../caps - expose cap (reservation) stats | 27 | * .../caps - expose cap (reservation) stats |
27 | * .../bdi - symlink to ../../bdi/something | 28 | * .../bdi - symlink to ../../bdi/something |
@@ -177,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp) | |||
177 | return 0; | 178 | return 0; |
178 | } | 179 | } |
179 | 180 | ||
181 | static int client_options_show(struct seq_file *s, void *p) | ||
182 | { | ||
183 | struct ceph_client *client = s->private; | ||
184 | int ret; | ||
185 | |||
186 | ret = ceph_print_client_options(s, client); | ||
187 | if (ret) | ||
188 | return ret; | ||
189 | |||
190 | seq_putc(s, '\n'); | ||
191 | return 0; | ||
192 | } | ||
193 | |||
180 | CEPH_DEFINE_SHOW_FUNC(monmap_show) | 194 | CEPH_DEFINE_SHOW_FUNC(monmap_show) |
181 | CEPH_DEFINE_SHOW_FUNC(osdmap_show) | 195 | CEPH_DEFINE_SHOW_FUNC(osdmap_show) |
182 | CEPH_DEFINE_SHOW_FUNC(monc_show) | 196 | CEPH_DEFINE_SHOW_FUNC(monc_show) |
183 | CEPH_DEFINE_SHOW_FUNC(osdc_show) | 197 | CEPH_DEFINE_SHOW_FUNC(osdc_show) |
198 | CEPH_DEFINE_SHOW_FUNC(client_options_show) | ||
184 | 199 | ||
185 | int ceph_debugfs_init(void) | 200 | int ceph_debugfs_init(void) |
186 | { | 201 | { |
@@ -242,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
242 | if (!client->debugfs_osdmap) | 257 | if (!client->debugfs_osdmap) |
243 | goto out; | 258 | goto out; |
244 | 259 | ||
260 | client->debugfs_options = debugfs_create_file("client_options", | ||
261 | 0600, | ||
262 | client->debugfs_dir, | ||
263 | client, | ||
264 | &client_options_show_fops); | ||
265 | if (!client->debugfs_options) | ||
266 | goto out; | ||
267 | |||
245 | return 0; | 268 | return 0; |
246 | 269 | ||
247 | out: | 270 | out: |
@@ -252,6 +275,7 @@ out: | |||
252 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | 275 | void ceph_debugfs_client_cleanup(struct ceph_client *client) |
253 | { | 276 | { |
254 | dout("ceph_debugfs_client_cleanup %p\n", client); | 277 | dout("ceph_debugfs_client_cleanup %p\n", client); |
278 | debugfs_remove(client->debugfs_options); | ||
255 | debugfs_remove(client->debugfs_osdmap); | 279 | debugfs_remove(client->debugfs_osdmap); |
256 | debugfs_remove(client->debugfs_monmap); | 280 | debugfs_remove(client->debugfs_monmap); |
257 | debugfs_remove(client->osdc.debugfs_file); | 281 | debugfs_remove(client->osdc.debugfs_file); |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index a9f4ae45b7fb..967080a9f043 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -505,8 +505,6 @@ static int ceph_tcp_connect(struct ceph_connection *con) | |||
505 | pr_err("connect %s error %d\n", | 505 | pr_err("connect %s error %d\n", |
506 | ceph_pr_addr(&con->peer_addr.in_addr), ret); | 506 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
507 | sock_release(sock); | 507 | sock_release(sock); |
508 | con->error_msg = "connect error"; | ||
509 | |||
510 | return ret; | 508 | return ret; |
511 | } | 509 | } |
512 | 510 | ||
@@ -2145,12 +2143,10 @@ static int process_connect(struct ceph_connection *con) | |||
2145 | * to WAIT. This shouldn't happen if we are the | 2143 | * to WAIT. This shouldn't happen if we are the |
2146 | * client. | 2144 | * client. |
2147 | */ | 2145 | */ |
2148 | pr_err("process_connect got WAIT as client\n"); | ||
2149 | con->error_msg = "protocol error, got WAIT as client"; | 2146 | con->error_msg = "protocol error, got WAIT as client"; |
2150 | return -1; | 2147 | return -1; |
2151 | 2148 | ||
2152 | default: | 2149 | default: |
2153 | pr_err("connect protocol error, will retry\n"); | ||
2154 | con->error_msg = "protocol error, garbage tag during connect"; | 2150 | con->error_msg = "protocol error, garbage tag during connect"; |
2155 | return -1; | 2151 | return -1; |
2156 | } | 2152 | } |
@@ -2282,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
2282 | 2278 | ||
2283 | crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); | 2279 | crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); |
2284 | if (cpu_to_le32(crc) != con->in_hdr.crc) { | 2280 | if (cpu_to_le32(crc) != con->in_hdr.crc) { |
2285 | pr_err("read_partial_message bad hdr " | 2281 | pr_err("read_partial_message bad hdr crc %u != expected %u\n", |
2286 | " crc %u != expected %u\n", | ||
2287 | crc, con->in_hdr.crc); | 2282 | crc, con->in_hdr.crc); |
2288 | return -EBADMSG; | 2283 | return -EBADMSG; |
2289 | } | 2284 | } |
@@ -2313,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
2313 | pr_err("read_partial_message bad seq %lld expected %lld\n", | 2308 | pr_err("read_partial_message bad seq %lld expected %lld\n", |
2314 | seq, con->in_seq + 1); | 2309 | seq, con->in_seq + 1); |
2315 | con->error_msg = "bad message sequence # for incoming message"; | 2310 | con->error_msg = "bad message sequence # for incoming message"; |
2316 | return -EBADMSG; | 2311 | return -EBADE; |
2317 | } | 2312 | } |
2318 | 2313 | ||
2319 | /* allocate message? */ | 2314 | /* allocate message? */ |
@@ -2660,6 +2655,8 @@ more: | |||
2660 | switch (ret) { | 2655 | switch (ret) { |
2661 | case -EBADMSG: | 2656 | case -EBADMSG: |
2662 | con->error_msg = "bad crc"; | 2657 | con->error_msg = "bad crc"; |
2658 | /* fall through */ | ||
2659 | case -EBADE: | ||
2663 | ret = -EIO; | 2660 | ret = -EIO; |
2664 | break; | 2661 | break; |
2665 | case -EIO: | 2662 | case -EIO: |
@@ -2838,7 +2835,8 @@ static void con_work(struct work_struct *work) | |||
2838 | if (ret < 0) { | 2835 | if (ret < 0) { |
2839 | if (ret == -EAGAIN) | 2836 | if (ret == -EAGAIN) |
2840 | continue; | 2837 | continue; |
2841 | con->error_msg = "socket error on read"; | 2838 | if (!con->error_msg) |
2839 | con->error_msg = "socket error on read"; | ||
2842 | fault = true; | 2840 | fault = true; |
2843 | break; | 2841 | break; |
2844 | } | 2842 | } |
@@ -2847,7 +2845,8 @@ static void con_work(struct work_struct *work) | |||
2847 | if (ret < 0) { | 2845 | if (ret < 0) { |
2848 | if (ret == -EAGAIN) | 2846 | if (ret == -EAGAIN) |
2849 | continue; | 2847 | continue; |
2850 | con->error_msg = "socket error on write"; | 2848 | if (!con->error_msg) |
2849 | con->error_msg = "socket error on write"; | ||
2851 | fault = true; | 2850 | fault = true; |
2852 | } | 2851 | } |
2853 | 2852 | ||
@@ -2869,11 +2868,13 @@ static void con_work(struct work_struct *work) | |||
2869 | */ | 2868 | */ |
2870 | static void con_fault(struct ceph_connection *con) | 2869 | static void con_fault(struct ceph_connection *con) |
2871 | { | 2870 | { |
2872 | pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | ||
2873 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); | ||
2874 | dout("fault %p state %lu to peer %s\n", | 2871 | dout("fault %p state %lu to peer %s\n", |
2875 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); | 2872 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
2876 | 2873 | ||
2874 | pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | ||
2875 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); | ||
2876 | con->error_msg = NULL; | ||
2877 | |||
2877 | WARN_ON(con->state != CON_STATE_CONNECTING && | 2878 | WARN_ON(con->state != CON_STATE_CONNECTING && |
2878 | con->state != CON_STATE_NEGOTIATING && | 2879 | con->state != CON_STATE_NEGOTIATING && |
2879 | con->state != CON_STATE_OPEN); | 2880 | con->state != CON_STATE_OPEN); |
@@ -3295,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) | |||
3295 | */ | 3296 | */ |
3296 | if (*skip) | 3297 | if (*skip) |
3297 | return 0; | 3298 | return 0; |
3298 | con->error_msg = "error allocating memory for incoming message"; | ||
3299 | 3299 | ||
3300 | con->error_msg = "error allocating memory for incoming message"; | ||
3300 | return -ENOMEM; | 3301 | return -ENOMEM; |
3301 | } | 3302 | } |
3302 | memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 3303 | memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index b8c3fde5b04f..15796696d64e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -122,6 +122,22 @@ bad: | |||
122 | return -EINVAL; | 122 | return -EINVAL; |
123 | } | 123 | } |
124 | 124 | ||
125 | static int crush_decode_straw2_bucket(void **p, void *end, | ||
126 | struct crush_bucket_straw2 *b) | ||
127 | { | ||
128 | int j; | ||
129 | dout("crush_decode_straw2_bucket %p to %p\n", *p, end); | ||
130 | b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | ||
131 | if (b->item_weights == NULL) | ||
132 | return -ENOMEM; | ||
133 | ceph_decode_need(p, end, b->h.size * sizeof(u32), bad); | ||
134 | for (j = 0; j < b->h.size; j++) | ||
135 | b->item_weights[j] = ceph_decode_32(p); | ||
136 | return 0; | ||
137 | bad: | ||
138 | return -EINVAL; | ||
139 | } | ||
140 | |||
125 | static int skip_name_map(void **p, void *end) | 141 | static int skip_name_map(void **p, void *end) |
126 | { | 142 | { |
127 | int len; | 143 | int len; |
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
204 | case CRUSH_BUCKET_STRAW: | 220 | case CRUSH_BUCKET_STRAW: |
205 | size = sizeof(struct crush_bucket_straw); | 221 | size = sizeof(struct crush_bucket_straw); |
206 | break; | 222 | break; |
223 | case CRUSH_BUCKET_STRAW2: | ||
224 | size = sizeof(struct crush_bucket_straw2); | ||
225 | break; | ||
207 | default: | 226 | default: |
208 | err = -EINVAL; | 227 | err = -EINVAL; |
209 | goto bad; | 228 | goto bad; |
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
261 | if (err < 0) | 280 | if (err < 0) |
262 | goto bad; | 281 | goto bad; |
263 | break; | 282 | break; |
283 | case CRUSH_BUCKET_STRAW2: | ||
284 | err = crush_decode_straw2_bucket(p, end, | ||
285 | (struct crush_bucket_straw2 *)b); | ||
286 | if (err < 0) | ||
287 | goto bad; | ||
288 | break; | ||
264 | } | 289 | } |
265 | } | 290 | } |
266 | 291 | ||
diff --git a/net/core/dev.c b/net/core/dev.c index af4a1b0adc10..2c1c67fad64d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2713,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device | |||
2713 | if (unlikely(!skb)) | 2713 | if (unlikely(!skb)) |
2714 | goto out_null; | 2714 | goto out_null; |
2715 | 2715 | ||
2716 | if (netif_needs_gso(dev, skb, features)) { | 2716 | if (netif_needs_gso(skb, features)) { |
2717 | struct sk_buff *segs; | 2717 | struct sk_buff *segs; |
2718 | 2718 | ||
2719 | segs = skb_gso_segment(skb, features); | 2719 | segs = skb_gso_segment(skb, features); |
@@ -3079,7 +3079,7 @@ static struct rps_dev_flow * | |||
3079 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 3079 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
3080 | struct rps_dev_flow *rflow, u16 next_cpu) | 3080 | struct rps_dev_flow *rflow, u16 next_cpu) |
3081 | { | 3081 | { |
3082 | if (next_cpu != RPS_NO_CPU) { | 3082 | if (next_cpu < nr_cpu_ids) { |
3083 | #ifdef CONFIG_RFS_ACCEL | 3083 | #ifdef CONFIG_RFS_ACCEL |
3084 | struct netdev_rx_queue *rxqueue; | 3084 | struct netdev_rx_queue *rxqueue; |
3085 | struct rps_dev_flow_table *flow_table; | 3085 | struct rps_dev_flow_table *flow_table; |
@@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
3184 | * If the desired CPU (where last recvmsg was done) is | 3184 | * If the desired CPU (where last recvmsg was done) is |
3185 | * different from current CPU (one in the rx-queue flow | 3185 | * different from current CPU (one in the rx-queue flow |
3186 | * table entry), switch if one of the following holds: | 3186 | * table entry), switch if one of the following holds: |
3187 | * - Current CPU is unset (equal to RPS_NO_CPU). | 3187 | * - Current CPU is unset (>= nr_cpu_ids). |
3188 | * - Current CPU is offline. | 3188 | * - Current CPU is offline. |
3189 | * - The current CPU's queue tail has advanced beyond the | 3189 | * - The current CPU's queue tail has advanced beyond the |
3190 | * last packet that was enqueued using this table entry. | 3190 | * last packet that was enqueued using this table entry. |
@@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
3192 | * have been dequeued, thus preserving in order delivery. | 3192 | * have been dequeued, thus preserving in order delivery. |
3193 | */ | 3193 | */ |
3194 | if (unlikely(tcpu != next_cpu) && | 3194 | if (unlikely(tcpu != next_cpu) && |
3195 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 3195 | (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || |
3196 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 3196 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
3197 | rflow->last_qtail)) >= 0)) { | 3197 | rflow->last_qtail)) >= 0)) { |
3198 | tcpu = next_cpu; | 3198 | tcpu = next_cpu; |
3199 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); | 3199 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
3200 | } | 3200 | } |
3201 | 3201 | ||
3202 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 3202 | if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { |
3203 | *rflowp = rflow; | 3203 | *rflowp = rflow; |
3204 | cpu = tcpu; | 3204 | cpu = tcpu; |
3205 | goto done; | 3205 | goto done; |
@@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | |||
3240 | struct rps_dev_flow_table *flow_table; | 3240 | struct rps_dev_flow_table *flow_table; |
3241 | struct rps_dev_flow *rflow; | 3241 | struct rps_dev_flow *rflow; |
3242 | bool expire = true; | 3242 | bool expire = true; |
3243 | int cpu; | 3243 | unsigned int cpu; |
3244 | 3244 | ||
3245 | rcu_read_lock(); | 3245 | rcu_read_lock(); |
3246 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 3246 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
3247 | if (flow_table && flow_id <= flow_table->mask) { | 3247 | if (flow_table && flow_id <= flow_table->mask) { |
3248 | rflow = &flow_table->flows[flow_id]; | 3248 | rflow = &flow_table->flows[flow_id]; |
3249 | cpu = ACCESS_ONCE(rflow->cpu); | 3249 | cpu = ACCESS_ONCE(rflow->cpu); |
3250 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | 3250 | if (rflow->filter == filter_id && cpu < nr_cpu_ids && |
3251 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | 3251 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - |
3252 | rflow->last_qtail) < | 3252 | rflow->last_qtail) < |
3253 | (int)(10 * flow_table->mask))) | 3253 | (int)(10 * flow_table->mask))) |
@@ -5209,7 +5209,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, | |||
5209 | if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) | 5209 | if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) |
5210 | return -EBUSY; | 5210 | return -EBUSY; |
5211 | 5211 | ||
5212 | if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) | 5212 | if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) |
5213 | return -EEXIST; | 5213 | return -EEXIST; |
5214 | 5214 | ||
5215 | if (master && netdev_master_upper_dev_get(dev)) | 5215 | if (master && netdev_master_upper_dev_get(dev)) |
diff --git a/net/core/filter.c b/net/core/filter.c index b669e75d2b36..bf831a85c315 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) | |||
1175 | return 0; | 1175 | return 0; |
1176 | } | 1176 | } |
1177 | 1177 | ||
1178 | /** | ||
1179 | * bpf_skb_clone_not_writable - is the header of a clone not writable | ||
1180 | * @skb: buffer to check | ||
1181 | * @len: length up to which to write, can be negative | ||
1182 | * | ||
1183 | * Returns true if modifying the header part of the cloned buffer | ||
1184 | * does require the data to be copied. I.e. this version works with | ||
1185 | * negative lengths needed for eBPF case! | ||
1186 | */ | ||
1187 | static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len) | ||
1188 | { | ||
1189 | return skb_header_cloned(skb) || | ||
1190 | (int) skb_headroom(skb) + len > skb->hdr_len; | ||
1191 | } | ||
1192 | |||
1178 | #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) | 1193 | #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) |
1179 | 1194 | ||
1180 | static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) | 1195 | static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) |
1181 | { | 1196 | { |
1182 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | 1197 | struct sk_buff *skb = (struct sk_buff *) (long) r1; |
1183 | unsigned int offset = (unsigned int) r2; | 1198 | int offset = (int) r2; |
1184 | void *from = (void *) (long) r3; | 1199 | void *from = (void *) (long) r3; |
1185 | unsigned int len = (unsigned int) r4; | 1200 | unsigned int len = (unsigned int) r4; |
1186 | char buf[16]; | 1201 | char buf[16]; |
@@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) | |||
1194 | * | 1209 | * |
1195 | * so check for invalid 'offset' and too large 'len' | 1210 | * so check for invalid 'offset' and too large 'len' |
1196 | */ | 1211 | */ |
1197 | if (unlikely(offset > 0xffff || len > sizeof(buf))) | 1212 | if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) |
1198 | return -EFAULT; | 1213 | return -EFAULT; |
1199 | 1214 | ||
1200 | if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) | 1215 | offset -= skb->data - skb_mac_header(skb); |
1216 | if (unlikely(skb_cloned(skb) && | ||
1217 | bpf_skb_clone_unwritable(skb, offset + len))) | ||
1201 | return -EFAULT; | 1218 | return -EFAULT; |
1202 | 1219 | ||
1203 | ptr = skb_header_pointer(skb, offset, len, buf); | 1220 | ptr = skb_header_pointer(skb, offset, len, buf); |
@@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { | |||
1232 | #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) | 1249 | #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) |
1233 | #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) | 1250 | #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) |
1234 | 1251 | ||
1235 | static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) | 1252 | static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) |
1236 | { | 1253 | { |
1237 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | 1254 | struct sk_buff *skb = (struct sk_buff *) (long) r1; |
1255 | int offset = (int) r2; | ||
1238 | __sum16 sum, *ptr; | 1256 | __sum16 sum, *ptr; |
1239 | 1257 | ||
1240 | if (unlikely(offset > 0xffff)) | 1258 | if (unlikely((u32) offset > 0xffff)) |
1241 | return -EFAULT; | 1259 | return -EFAULT; |
1242 | 1260 | ||
1243 | if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) | 1261 | offset -= skb->data - skb_mac_header(skb); |
1262 | if (unlikely(skb_cloned(skb) && | ||
1263 | bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) | ||
1244 | return -EFAULT; | 1264 | return -EFAULT; |
1245 | 1265 | ||
1246 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); | 1266 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); |
@@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { | |||
1276 | .arg5_type = ARG_ANYTHING, | 1296 | .arg5_type = ARG_ANYTHING, |
1277 | }; | 1297 | }; |
1278 | 1298 | ||
1279 | static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) | 1299 | static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) |
1280 | { | 1300 | { |
1281 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | 1301 | struct sk_buff *skb = (struct sk_buff *) (long) r1; |
1282 | u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); | 1302 | u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); |
1303 | int offset = (int) r2; | ||
1283 | __sum16 sum, *ptr; | 1304 | __sum16 sum, *ptr; |
1284 | 1305 | ||
1285 | if (unlikely(offset > 0xffff)) | 1306 | if (unlikely((u32) offset > 0xffff)) |
1286 | return -EFAULT; | 1307 | return -EFAULT; |
1287 | 1308 | ||
1288 | if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) | 1309 | offset -= skb->data - skb_mac_header(skb); |
1310 | if (unlikely(skb_cloned(skb) && | ||
1311 | bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) | ||
1289 | return -EFAULT; | 1312 | return -EFAULT; |
1290 | 1313 | ||
1291 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); | 1314 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a3abb719221f..572af0011997 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/export.h> | 16 | #include <linux/export.h> |
17 | #include <linux/user_namespace.h> | 17 | #include <linux/user_namespace.h> |
18 | #include <linux/net_namespace.h> | 18 | #include <linux/net_namespace.h> |
19 | #include <linux/rtnetlink.h> | ||
20 | #include <net/sock.h> | 19 | #include <net/sock.h> |
21 | #include <net/netlink.h> | 20 | #include <net/netlink.h> |
22 | #include <net/net_namespace.h> | 21 | #include <net/net_namespace.h> |
@@ -602,7 +601,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
602 | } | 601 | } |
603 | 602 | ||
604 | err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, | 603 | err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, |
605 | RTM_GETNSID, net, peer, -1); | 604 | RTM_NEWNSID, net, peer, -1); |
606 | if (err < 0) | 605 | if (err < 0) |
607 | goto err_out; | 606 | goto err_out; |
608 | 607 | ||
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 358d52a38533..666e0928ba40 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -2854,7 +2854,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, | |||
2854 | 2854 | ||
2855 | int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, | 2855 | int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, |
2856 | struct net_device *dev, u16 mode, | 2856 | struct net_device *dev, u16 mode, |
2857 | u32 flags, u32 mask) | 2857 | u32 flags, u32 mask, int nlflags) |
2858 | { | 2858 | { |
2859 | struct nlmsghdr *nlh; | 2859 | struct nlmsghdr *nlh; |
2860 | struct ifinfomsg *ifm; | 2860 | struct ifinfomsg *ifm; |
@@ -2863,7 +2863,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, | |||
2863 | u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; | 2863 | u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; |
2864 | struct net_device *br_dev = netdev_master_upper_dev_get(dev); | 2864 | struct net_device *br_dev = netdev_master_upper_dev_get(dev); |
2865 | 2865 | ||
2866 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); | 2866 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); |
2867 | if (nlh == NULL) | 2867 | if (nlh == NULL) |
2868 | return -EMSGSIZE; | 2868 | return -EMSGSIZE; |
2869 | 2869 | ||
@@ -2969,7 +2969,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) | |||
2969 | if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { | 2969 | if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { |
2970 | if (idx >= cb->args[0] && | 2970 | if (idx >= cb->args[0] && |
2971 | br_dev->netdev_ops->ndo_bridge_getlink( | 2971 | br_dev->netdev_ops->ndo_bridge_getlink( |
2972 | skb, portid, seq, dev, filter_mask) < 0) | 2972 | skb, portid, seq, dev, filter_mask, |
2973 | NLM_F_MULTI) < 0) | ||
2973 | break; | 2974 | break; |
2974 | idx++; | 2975 | idx++; |
2975 | } | 2976 | } |
@@ -2977,7 +2978,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) | |||
2977 | if (ops->ndo_bridge_getlink) { | 2978 | if (ops->ndo_bridge_getlink) { |
2978 | if (idx >= cb->args[0] && | 2979 | if (idx >= cb->args[0] && |
2979 | ops->ndo_bridge_getlink(skb, portid, seq, dev, | 2980 | ops->ndo_bridge_getlink(skb, portid, seq, dev, |
2980 | filter_mask) < 0) | 2981 | filter_mask, |
2982 | NLM_F_MULTI) < 0) | ||
2981 | break; | 2983 | break; |
2982 | idx++; | 2984 | idx++; |
2983 | } | 2985 | } |
@@ -3018,7 +3020,7 @@ static int rtnl_bridge_notify(struct net_device *dev) | |||
3018 | goto errout; | 3020 | goto errout; |
3019 | } | 3021 | } |
3020 | 3022 | ||
3021 | err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); | 3023 | err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0); |
3022 | if (err < 0) | 3024 | if (err < 0) |
3023 | goto errout; | 3025 | goto errout; |
3024 | 3026 | ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3b6e5830256e..3cfff2a3d651 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -280,13 +280,14 @@ nodata: | |||
280 | EXPORT_SYMBOL(__alloc_skb); | 280 | EXPORT_SYMBOL(__alloc_skb); |
281 | 281 | ||
282 | /** | 282 | /** |
283 | * build_skb - build a network buffer | 283 | * __build_skb - build a network buffer |
284 | * @data: data buffer provided by caller | 284 | * @data: data buffer provided by caller |
285 | * @frag_size: size of fragment, or 0 if head was kmalloced | 285 | * @frag_size: size of data, or 0 if head was kmalloced |
286 | * | 286 | * |
287 | * Allocate a new &sk_buff. Caller provides space holding head and | 287 | * Allocate a new &sk_buff. Caller provides space holding head and |
288 | * skb_shared_info. @data must have been allocated by kmalloc() only if | 288 | * skb_shared_info. @data must have been allocated by kmalloc() only if |
289 | * @frag_size is 0, otherwise data should come from the page allocator. | 289 | * @frag_size is 0, otherwise data should come from the page allocator |
290 | * or vmalloc() | ||
290 | * The return is the new skb buffer. | 291 | * The return is the new skb buffer. |
291 | * On a failure the return is %NULL, and @data is not freed. | 292 | * On a failure the return is %NULL, and @data is not freed. |
292 | * Notes : | 293 | * Notes : |
@@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); | |||
297 | * before giving packet to stack. | 298 | * before giving packet to stack. |
298 | * RX rings only contains data buffers, not full skbs. | 299 | * RX rings only contains data buffers, not full skbs. |
299 | */ | 300 | */ |
300 | struct sk_buff *build_skb(void *data, unsigned int frag_size) | 301 | struct sk_buff *__build_skb(void *data, unsigned int frag_size) |
301 | { | 302 | { |
302 | struct skb_shared_info *shinfo; | 303 | struct skb_shared_info *shinfo; |
303 | struct sk_buff *skb; | 304 | struct sk_buff *skb; |
@@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
311 | 312 | ||
312 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 313 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
313 | skb->truesize = SKB_TRUESIZE(size); | 314 | skb->truesize = SKB_TRUESIZE(size); |
314 | skb->head_frag = frag_size != 0; | ||
315 | atomic_set(&skb->users, 1); | 315 | atomic_set(&skb->users, 1); |
316 | skb->head = data; | 316 | skb->head = data; |
317 | skb->data = data; | 317 | skb->data = data; |
@@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
328 | 328 | ||
329 | return skb; | 329 | return skb; |
330 | } | 330 | } |
331 | |||
332 | /* build_skb() is wrapper over __build_skb(), that specifically | ||
333 | * takes care of skb->head and skb->pfmemalloc | ||
334 | * This means that if @frag_size is not zero, then @data must be backed | ||
335 | * by a page fragment, not kmalloc() or vmalloc() | ||
336 | */ | ||
337 | struct sk_buff *build_skb(void *data, unsigned int frag_size) | ||
338 | { | ||
339 | struct sk_buff *skb = __build_skb(data, frag_size); | ||
340 | |||
341 | if (skb && frag_size) { | ||
342 | skb->head_frag = 1; | ||
343 | if (virt_to_head_page(data)->pfmemalloc) | ||
344 | skb->pfmemalloc = 1; | ||
345 | } | ||
346 | return skb; | ||
347 | } | ||
331 | EXPORT_SYMBOL(build_skb); | 348 | EXPORT_SYMBOL(build_skb); |
332 | 349 | ||
333 | struct netdev_alloc_cache { | 350 | struct netdev_alloc_cache { |
@@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, | |||
348 | gfp_t gfp = gfp_mask; | 365 | gfp_t gfp = gfp_mask; |
349 | 366 | ||
350 | if (order) { | 367 | if (order) { |
351 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; | 368 | gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | |
369 | __GFP_NOMEMALLOC; | ||
352 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); | 370 | page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); |
353 | nc->frag.size = PAGE_SIZE << (page ? order : 0); | 371 | nc->frag.size = PAGE_SIZE << (page ? order : 0); |
354 | } | 372 | } |
@@ -4124,19 +4142,21 @@ EXPORT_SYMBOL(skb_try_coalesce); | |||
4124 | */ | 4142 | */ |
4125 | void skb_scrub_packet(struct sk_buff *skb, bool xnet) | 4143 | void skb_scrub_packet(struct sk_buff *skb, bool xnet) |
4126 | { | 4144 | { |
4127 | if (xnet) | ||
4128 | skb_orphan(skb); | ||
4129 | skb->tstamp.tv64 = 0; | 4145 | skb->tstamp.tv64 = 0; |
4130 | skb->pkt_type = PACKET_HOST; | 4146 | skb->pkt_type = PACKET_HOST; |
4131 | skb->skb_iif = 0; | 4147 | skb->skb_iif = 0; |
4132 | skb->ignore_df = 0; | 4148 | skb->ignore_df = 0; |
4133 | skb_dst_drop(skb); | 4149 | skb_dst_drop(skb); |
4134 | skb->mark = 0; | ||
4135 | skb_sender_cpu_clear(skb); | 4150 | skb_sender_cpu_clear(skb); |
4136 | skb_init_secmark(skb); | ||
4137 | secpath_reset(skb); | 4151 | secpath_reset(skb); |
4138 | nf_reset(skb); | 4152 | nf_reset(skb); |
4139 | nf_reset_trace(skb); | 4153 | nf_reset_trace(skb); |
4154 | |||
4155 | if (!xnet) | ||
4156 | return; | ||
4157 | |||
4158 | skb_orphan(skb); | ||
4159 | skb->mark = 0; | ||
4140 | } | 4160 | } |
4141 | EXPORT_SYMBOL_GPL(skb_scrub_packet); | 4161 | EXPORT_SYMBOL_GPL(skb_scrub_packet); |
4142 | 4162 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index e891bcf325ca..292f42228bfb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1474,8 +1474,8 @@ void sk_release_kernel(struct sock *sk) | |||
1474 | return; | 1474 | return; |
1475 | 1475 | ||
1476 | sock_hold(sk); | 1476 | sock_hold(sk); |
1477 | sock_net_set(sk, get_net(&init_net)); | ||
1478 | sock_release(sk->sk_socket); | 1477 | sock_release(sk->sk_socket); |
1478 | sock_net_set(sk, get_net(&init_net)); | ||
1479 | sock_put(sk); | 1479 | sock_put(sk); |
1480 | } | 1480 | } |
1481 | EXPORT_SYMBOL(sk_release_kernel); | 1481 | EXPORT_SYMBOL(sk_release_kernel); |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2b4f21d34df6..ccf4c5629b3c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -453,7 +453,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
453 | iph->saddr, iph->daddr); | 453 | iph->saddr, iph->daddr); |
454 | if (req) { | 454 | if (req) { |
455 | nsk = dccp_check_req(sk, skb, req); | 455 | nsk = dccp_check_req(sk, skb, req); |
456 | reqsk_put(req); | 456 | if (!nsk) |
457 | reqsk_put(req); | ||
457 | return nsk; | 458 | return nsk; |
458 | } | 459 | } |
459 | nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, | 460 | nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9d0551092c6c..5165571f397a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -301,7 +301,8 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
301 | &iph->daddr, inet6_iif(skb)); | 301 | &iph->daddr, inet6_iif(skb)); |
302 | if (req) { | 302 | if (req) { |
303 | nsk = dccp_check_req(sk, skb, req); | 303 | nsk = dccp_check_req(sk, skb, req); |
304 | reqsk_put(req); | 304 | if (!nsk) |
305 | reqsk_put(req); | ||
305 | return nsk; | 306 | return nsk; |
306 | } | 307 | } |
307 | nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, | 308 | nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, |
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5f566663e47f..30addee2dd03 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -186,8 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | |||
186 | if (child == NULL) | 186 | if (child == NULL) |
187 | goto listen_overflow; | 187 | goto listen_overflow; |
188 | 188 | ||
189 | inet_csk_reqsk_queue_unlink(sk, req); | 189 | inet_csk_reqsk_queue_drop(sk, req); |
190 | inet_csk_reqsk_queue_removed(sk, req); | ||
191 | inet_csk_reqsk_queue_add(sk, req, child); | 190 | inet_csk_reqsk_queue_add(sk, req, child); |
192 | out: | 191 | out: |
193 | return child; | 192 | return child; |
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 5eaadabe23a1..e6f6cc3a1bcf 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c | |||
@@ -124,7 +124,7 @@ static ssize_t temp1_max_store(struct device *dev, | |||
124 | 124 | ||
125 | return count; | 125 | return count; |
126 | } | 126 | } |
127 | static DEVICE_ATTR(temp1_max, S_IRUGO, temp1_max_show, temp1_max_store); | 127 | static DEVICE_ATTR_RW(temp1_max); |
128 | 128 | ||
129 | static ssize_t temp1_max_alarm_show(struct device *dev, | 129 | static ssize_t temp1_max_alarm_show(struct device *dev, |
130 | struct device_attribute *attr, char *buf) | 130 | struct device_attribute *attr, char *buf) |
@@ -159,8 +159,8 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, | |||
159 | if (index == 1) { | 159 | if (index == 1) { |
160 | if (!drv->get_temp_limit) | 160 | if (!drv->get_temp_limit) |
161 | mode = 0; | 161 | mode = 0; |
162 | else if (drv->set_temp_limit) | 162 | else if (!drv->set_temp_limit) |
163 | mode |= S_IWUSR; | 163 | mode &= ~S_IWUSR; |
164 | } else if (index == 2 && !drv->get_temp_alarm) { | 164 | } else if (index == 2 && !drv->get_temp_alarm) { |
165 | mode = 0; | 165 | mode = 0; |
166 | } | 166 | } |
@@ -633,7 +633,7 @@ static int dsa_of_probe(struct device *dev) | |||
633 | if (cd->sw_addr > PHY_MAX_ADDR) | 633 | if (cd->sw_addr > PHY_MAX_ADDR) |
634 | continue; | 634 | continue; |
635 | 635 | ||
636 | if (!of_property_read_u32(np, "eeprom-length", &eeprom_len)) | 636 | if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) |
637 | cd->eeprom_len = eeprom_len; | 637 | cd->eeprom_len = eeprom_len; |
638 | 638 | ||
639 | for_each_available_child_of_node(child, port) { | 639 | for_each_available_child_of_node(child, port) { |
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 05dab2957cd4..4adfd4d5471b 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile | |||
@@ -3,7 +3,9 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o | |||
3 | obj-y += 6lowpan/ | 3 | obj-y += 6lowpan/ |
4 | 4 | ||
5 | ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ | 5 | ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ |
6 | header_ops.o sysfs.o nl802154.o | 6 | header_ops.o sysfs.o nl802154.o trace.o |
7 | ieee802154_socket-y := socket.o | 7 | ieee802154_socket-y := socket.o |
8 | 8 | ||
9 | CFLAGS_trace.o := -I$(src) | ||
10 | |||
9 | ccflags-y += -D__CHECK_ENDIAN__ | 11 | ccflags-y += -D__CHECK_ENDIAN__ |
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 1b9d25f6e898..346c6665d25e 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c | |||
@@ -175,6 +175,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) | |||
175 | int rc = -ENOBUFS; | 175 | int rc = -ENOBUFS; |
176 | struct net_device *dev; | 176 | struct net_device *dev; |
177 | int type = __IEEE802154_DEV_INVALID; | 177 | int type = __IEEE802154_DEV_INVALID; |
178 | unsigned char name_assign_type; | ||
178 | 179 | ||
179 | pr_debug("%s\n", __func__); | 180 | pr_debug("%s\n", __func__); |
180 | 181 | ||
@@ -190,8 +191,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) | |||
190 | if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] | 191 | if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] |
191 | != '\0') | 192 | != '\0') |
192 | return -EINVAL; /* phy name should be null-terminated */ | 193 | return -EINVAL; /* phy name should be null-terminated */ |
194 | name_assign_type = NET_NAME_USER; | ||
193 | } else { | 195 | } else { |
194 | devname = "wpan%d"; | 196 | devname = "wpan%d"; |
197 | name_assign_type = NET_NAME_ENUM; | ||
195 | } | 198 | } |
196 | 199 | ||
197 | if (strlen(devname) >= IFNAMSIZ) | 200 | if (strlen(devname) >= IFNAMSIZ) |
@@ -221,7 +224,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) | |||
221 | } | 224 | } |
222 | 225 | ||
223 | dev = rdev_add_virtual_intf_deprecated(wpan_phy_to_rdev(phy), devname, | 226 | dev = rdev_add_virtual_intf_deprecated(wpan_phy_to_rdev(phy), devname, |
224 | type); | 227 | name_assign_type, type); |
225 | if (IS_ERR(dev)) { | 228 | if (IS_ERR(dev)) { |
226 | rc = PTR_ERR(dev); | 229 | rc = PTR_ERR(dev); |
227 | goto nla_put_failure; | 230 | goto nla_put_failure; |
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index a4daf91b8d0a..f3c12f6a4a39 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c | |||
@@ -589,7 +589,7 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info) | |||
589 | 589 | ||
590 | return rdev_add_virtual_intf(rdev, | 590 | return rdev_add_virtual_intf(rdev, |
591 | nla_data(info->attrs[NL802154_ATTR_IFNAME]), | 591 | nla_data(info->attrs[NL802154_ATTR_IFNAME]), |
592 | type, extended_addr); | 592 | NET_NAME_USER, type, extended_addr); |
593 | } | 593 | } |
594 | 594 | ||
595 | static int nl802154_del_interface(struct sk_buff *skb, struct genl_info *info) | 595 | static int nl802154_del_interface(struct sk_buff *skb, struct genl_info *info) |
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 7c46732fad2b..7b5a9dd94fe5 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h | |||
@@ -4,13 +4,16 @@ | |||
4 | #include <net/cfg802154.h> | 4 | #include <net/cfg802154.h> |
5 | 5 | ||
6 | #include "core.h" | 6 | #include "core.h" |
7 | #include "trace.h" | ||
7 | 8 | ||
8 | static inline struct net_device * | 9 | static inline struct net_device * |
9 | rdev_add_virtual_intf_deprecated(struct cfg802154_registered_device *rdev, | 10 | rdev_add_virtual_intf_deprecated(struct cfg802154_registered_device *rdev, |
10 | const char *name, int type) | 11 | const char *name, |
12 | unsigned char name_assign_type, | ||
13 | int type) | ||
11 | { | 14 | { |
12 | return rdev->ops->add_virtual_intf_deprecated(&rdev->wpan_phy, name, | 15 | return rdev->ops->add_virtual_intf_deprecated(&rdev->wpan_phy, name, |
13 | type); | 16 | name_assign_type, type); |
14 | } | 17 | } |
15 | 18 | ||
16 | static inline void | 19 | static inline void |
@@ -22,75 +25,131 @@ rdev_del_virtual_intf_deprecated(struct cfg802154_registered_device *rdev, | |||
22 | 25 | ||
23 | static inline int | 26 | static inline int |
24 | rdev_add_virtual_intf(struct cfg802154_registered_device *rdev, char *name, | 27 | rdev_add_virtual_intf(struct cfg802154_registered_device *rdev, char *name, |
28 | unsigned char name_assign_type, | ||
25 | enum nl802154_iftype type, __le64 extended_addr) | 29 | enum nl802154_iftype type, __le64 extended_addr) |
26 | { | 30 | { |
27 | return rdev->ops->add_virtual_intf(&rdev->wpan_phy, name, type, | 31 | int ret; |
32 | |||
33 | trace_802154_rdev_add_virtual_intf(&rdev->wpan_phy, name, type, | ||
28 | extended_addr); | 34 | extended_addr); |
35 | ret = rdev->ops->add_virtual_intf(&rdev->wpan_phy, name, | ||
36 | name_assign_type, type, | ||
37 | extended_addr); | ||
38 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
39 | return ret; | ||
29 | } | 40 | } |
30 | 41 | ||
31 | static inline int | 42 | static inline int |
32 | rdev_del_virtual_intf(struct cfg802154_registered_device *rdev, | 43 | rdev_del_virtual_intf(struct cfg802154_registered_device *rdev, |
33 | struct wpan_dev *wpan_dev) | 44 | struct wpan_dev *wpan_dev) |
34 | { | 45 | { |
35 | return rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev); | 46 | int ret; |
47 | |||
48 | trace_802154_rdev_del_virtual_intf(&rdev->wpan_phy, wpan_dev); | ||
49 | ret = rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev); | ||
50 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
51 | return ret; | ||
36 | } | 52 | } |
37 | 53 | ||
38 | static inline int | 54 | static inline int |
39 | rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel) | 55 | rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel) |
40 | { | 56 | { |
41 | return rdev->ops->set_channel(&rdev->wpan_phy, page, channel); | 57 | int ret; |
58 | |||
59 | trace_802154_rdev_set_channel(&rdev->wpan_phy, page, channel); | ||
60 | ret = rdev->ops->set_channel(&rdev->wpan_phy, page, channel); | ||
61 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
62 | return ret; | ||
42 | } | 63 | } |
43 | 64 | ||
44 | static inline int | 65 | static inline int |
45 | rdev_set_cca_mode(struct cfg802154_registered_device *rdev, | 66 | rdev_set_cca_mode(struct cfg802154_registered_device *rdev, |
46 | const struct wpan_phy_cca *cca) | 67 | const struct wpan_phy_cca *cca) |
47 | { | 68 | { |
48 | return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); | 69 | int ret; |
70 | |||
71 | trace_802154_rdev_set_cca_mode(&rdev->wpan_phy, cca); | ||
72 | ret = rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); | ||
73 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
74 | return ret; | ||
49 | } | 75 | } |
50 | 76 | ||
51 | static inline int | 77 | static inline int |
52 | rdev_set_pan_id(struct cfg802154_registered_device *rdev, | 78 | rdev_set_pan_id(struct cfg802154_registered_device *rdev, |
53 | struct wpan_dev *wpan_dev, __le16 pan_id) | 79 | struct wpan_dev *wpan_dev, __le16 pan_id) |
54 | { | 80 | { |
55 | return rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); | 81 | int ret; |
82 | |||
83 | trace_802154_rdev_set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); | ||
84 | ret = rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id); | ||
85 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
86 | return ret; | ||
56 | } | 87 | } |
57 | 88 | ||
58 | static inline int | 89 | static inline int |
59 | rdev_set_short_addr(struct cfg802154_registered_device *rdev, | 90 | rdev_set_short_addr(struct cfg802154_registered_device *rdev, |
60 | struct wpan_dev *wpan_dev, __le16 short_addr) | 91 | struct wpan_dev *wpan_dev, __le16 short_addr) |
61 | { | 92 | { |
62 | return rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); | 93 | int ret; |
94 | |||
95 | trace_802154_rdev_set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); | ||
96 | ret = rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr); | ||
97 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
98 | return ret; | ||
63 | } | 99 | } |
64 | 100 | ||
65 | static inline int | 101 | static inline int |
66 | rdev_set_backoff_exponent(struct cfg802154_registered_device *rdev, | 102 | rdev_set_backoff_exponent(struct cfg802154_registered_device *rdev, |
67 | struct wpan_dev *wpan_dev, u8 min_be, u8 max_be) | 103 | struct wpan_dev *wpan_dev, u8 min_be, u8 max_be) |
68 | { | 104 | { |
69 | return rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev, | 105 | int ret; |
106 | |||
107 | trace_802154_rdev_set_backoff_exponent(&rdev->wpan_phy, wpan_dev, | ||
70 | min_be, max_be); | 108 | min_be, max_be); |
109 | ret = rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev, | ||
110 | min_be, max_be); | ||
111 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
112 | return ret; | ||
71 | } | 113 | } |
72 | 114 | ||
73 | static inline int | 115 | static inline int |
74 | rdev_set_max_csma_backoffs(struct cfg802154_registered_device *rdev, | 116 | rdev_set_max_csma_backoffs(struct cfg802154_registered_device *rdev, |
75 | struct wpan_dev *wpan_dev, u8 max_csma_backoffs) | 117 | struct wpan_dev *wpan_dev, u8 max_csma_backoffs) |
76 | { | 118 | { |
77 | return rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev, | 119 | int ret; |
78 | max_csma_backoffs); | 120 | |
121 | trace_802154_rdev_set_csma_backoffs(&rdev->wpan_phy, wpan_dev, | ||
122 | max_csma_backoffs); | ||
123 | ret = rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev, | ||
124 | max_csma_backoffs); | ||
125 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
126 | return ret; | ||
79 | } | 127 | } |
80 | 128 | ||
81 | static inline int | 129 | static inline int |
82 | rdev_set_max_frame_retries(struct cfg802154_registered_device *rdev, | 130 | rdev_set_max_frame_retries(struct cfg802154_registered_device *rdev, |
83 | struct wpan_dev *wpan_dev, s8 max_frame_retries) | 131 | struct wpan_dev *wpan_dev, s8 max_frame_retries) |
84 | { | 132 | { |
85 | return rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev, | 133 | int ret; |
134 | |||
135 | trace_802154_rdev_set_max_frame_retries(&rdev->wpan_phy, wpan_dev, | ||
86 | max_frame_retries); | 136 | max_frame_retries); |
137 | ret = rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev, | ||
138 | max_frame_retries); | ||
139 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
140 | return ret; | ||
87 | } | 141 | } |
88 | 142 | ||
89 | static inline int | 143 | static inline int |
90 | rdev_set_lbt_mode(struct cfg802154_registered_device *rdev, | 144 | rdev_set_lbt_mode(struct cfg802154_registered_device *rdev, |
91 | struct wpan_dev *wpan_dev, bool mode) | 145 | struct wpan_dev *wpan_dev, bool mode) |
92 | { | 146 | { |
93 | return rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); | 147 | int ret; |
148 | |||
149 | trace_802154_rdev_set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); | ||
150 | ret = rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode); | ||
151 | trace_802154_rdev_return_int(&rdev->wpan_phy, ret); | ||
152 | return ret; | ||
94 | } | 153 | } |
95 | 154 | ||
96 | #endif /* __CFG802154_RDEV_OPS */ | 155 | #endif /* __CFG802154_RDEV_OPS */ |
diff --git a/net/ieee802154/trace.c b/net/ieee802154/trace.c new file mode 100644 index 000000000000..95f997fad755 --- /dev/null +++ b/net/ieee802154/trace.c | |||
@@ -0,0 +1,7 @@ | |||
1 | #include <linux/module.h> | ||
2 | |||
3 | #ifndef __CHECKER__ | ||
4 | #define CREATE_TRACE_POINTS | ||
5 | #include "trace.h" | ||
6 | |||
7 | #endif | ||
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h new file mode 100644 index 000000000000..5ac25eb6ed17 --- /dev/null +++ b/net/ieee802154/trace.h | |||
@@ -0,0 +1,247 @@ | |||
1 | /* Based on net/wireless/tracing.h */ | ||
2 | |||
3 | #undef TRACE_SYSTEM | ||
4 | #define TRACE_SYSTEM cfg802154 | ||
5 | |||
6 | #if !defined(__RDEV_CFG802154_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ) | ||
7 | #define __RDEV_CFG802154_OPS_TRACE | ||
8 | |||
9 | #include <linux/tracepoint.h> | ||
10 | |||
11 | #include <net/cfg802154.h> | ||
12 | |||
13 | #define MAXNAME 32 | ||
14 | #define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) | ||
15 | #define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \ | ||
16 | wpan_phy_name(wpan_phy), \ | ||
17 | MAXNAME) | ||
18 | #define WPAN_PHY_PR_FMT "%s" | ||
19 | #define WPAN_PHY_PR_ARG __entry->wpan_phy_name | ||
20 | |||
21 | #define WPAN_DEV_ENTRY __field(u32, identifier) | ||
22 | #define WPAN_DEV_ASSIGN (__entry->identifier) = (!IS_ERR_OR_NULL(wpan_dev) \ | ||
23 | ? wpan_dev->identifier : 0) | ||
24 | #define WPAN_DEV_PR_FMT "wpan_dev(%u)" | ||
25 | #define WPAN_DEV_PR_ARG (__entry->identifier) | ||
26 | |||
27 | #define WPAN_CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \ | ||
28 | __field(enum nl802154_cca_opts, cca_opt) | ||
29 | #define WPAN_CCA_ASSIGN \ | ||
30 | do { \ | ||
31 | (__entry->cca_mode) = cca->mode; \ | ||
32 | (__entry->cca_opt) = cca->opt; \ | ||
33 | } while (0) | ||
34 | #define WPAN_CCA_PR_FMT "cca_mode: %d, cca_opt: %d" | ||
35 | #define WPAN_CCA_PR_ARG __entry->cca_mode, __entry->cca_opt | ||
36 | |||
37 | #define BOOL_TO_STR(bo) (bo) ? "true" : "false" | ||
38 | |||
39 | /************************************************************* | ||
40 | * rdev->ops traces * | ||
41 | *************************************************************/ | ||
42 | |||
43 | TRACE_EVENT(802154_rdev_add_virtual_intf, | ||
44 | TP_PROTO(struct wpan_phy *wpan_phy, char *name, | ||
45 | enum nl802154_iftype type, __le64 extended_addr), | ||
46 | TP_ARGS(wpan_phy, name, type, extended_addr), | ||
47 | TP_STRUCT__entry( | ||
48 | WPAN_PHY_ENTRY | ||
49 | __string(vir_intf_name, name ? name : "<noname>") | ||
50 | __field(enum nl802154_iftype, type) | ||
51 | __field(__le64, extended_addr) | ||
52 | ), | ||
53 | TP_fast_assign( | ||
54 | WPAN_PHY_ASSIGN; | ||
55 | __assign_str(vir_intf_name, name ? name : "<noname>"); | ||
56 | __entry->type = type; | ||
57 | __entry->extended_addr = extended_addr; | ||
58 | ), | ||
59 | TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, ea %llx", | ||
60 | WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type, | ||
61 | __le64_to_cpu(__entry->extended_addr)) | ||
62 | ); | ||
63 | |||
64 | TRACE_EVENT(802154_rdev_del_virtual_intf, | ||
65 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), | ||
66 | TP_ARGS(wpan_phy, wpan_dev), | ||
67 | TP_STRUCT__entry( | ||
68 | WPAN_PHY_ENTRY | ||
69 | WPAN_DEV_ENTRY | ||
70 | ), | ||
71 | TP_fast_assign( | ||
72 | WPAN_PHY_ASSIGN; | ||
73 | WPAN_DEV_ASSIGN; | ||
74 | ), | ||
75 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG, | ||
76 | WPAN_DEV_PR_ARG) | ||
77 | ); | ||
78 | |||
79 | TRACE_EVENT(802154_rdev_set_channel, | ||
80 | TP_PROTO(struct wpan_phy *wpan_phy, u8 page, u8 channel), | ||
81 | TP_ARGS(wpan_phy, page, channel), | ||
82 | TP_STRUCT__entry( | ||
83 | WPAN_PHY_ENTRY | ||
84 | __field(u8, page) | ||
85 | __field(u8, channel) | ||
86 | ), | ||
87 | TP_fast_assign( | ||
88 | WPAN_PHY_ASSIGN; | ||
89 | __entry->page = page; | ||
90 | __entry->channel = channel; | ||
91 | ), | ||
92 | TP_printk(WPAN_PHY_PR_FMT ", page: %d, channel: %d", WPAN_PHY_PR_ARG, | ||
93 | __entry->page, __entry->channel) | ||
94 | ); | ||
95 | |||
96 | TRACE_EVENT(802154_rdev_set_cca_mode, | ||
97 | TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca), | ||
98 | TP_ARGS(wpan_phy, cca), | ||
99 | TP_STRUCT__entry( | ||
100 | WPAN_PHY_ENTRY | ||
101 | WPAN_CCA_ENTRY | ||
102 | ), | ||
103 | TP_fast_assign( | ||
104 | WPAN_PHY_ASSIGN; | ||
105 | WPAN_CCA_ASSIGN; | ||
106 | ), | ||
107 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_CCA_PR_FMT, WPAN_PHY_PR_ARG, | ||
108 | WPAN_CCA_PR_ARG) | ||
109 | ); | ||
110 | |||
111 | DECLARE_EVENT_CLASS(802154_le16_template, | ||
112 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, | ||
113 | __le16 le16arg), | ||
114 | TP_ARGS(wpan_phy, wpan_dev, le16arg), | ||
115 | TP_STRUCT__entry( | ||
116 | WPAN_PHY_ENTRY | ||
117 | WPAN_DEV_ENTRY | ||
118 | __field(__le16, le16arg) | ||
119 | ), | ||
120 | TP_fast_assign( | ||
121 | WPAN_PHY_ASSIGN; | ||
122 | WPAN_DEV_ASSIGN; | ||
123 | __entry->le16arg = le16arg; | ||
124 | ), | ||
125 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", pan id: 0x%04x", | ||
126 | WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, | ||
127 | __le16_to_cpu(__entry->le16arg)) | ||
128 | ); | ||
129 | |||
130 | DEFINE_EVENT(802154_le16_template, 802154_rdev_set_pan_id, | ||
131 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, | ||
132 | __le16 le16arg), | ||
133 | TP_ARGS(wpan_phy, wpan_dev, le16arg) | ||
134 | ); | ||
135 | |||
136 | DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr, | ||
137 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, | ||
138 | __le16 le16arg), | ||
139 | TP_ARGS(wpan_phy, wpan_dev, le16arg), | ||
140 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", sa: 0x%04x", | ||
141 | WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, | ||
142 | __le16_to_cpu(__entry->le16arg)) | ||
143 | ); | ||
144 | |||
145 | TRACE_EVENT(802154_rdev_set_backoff_exponent, | ||
146 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, | ||
147 | u8 min_be, u8 max_be), | ||
148 | TP_ARGS(wpan_phy, wpan_dev, min_be, max_be), | ||
149 | TP_STRUCT__entry( | ||
150 | WPAN_PHY_ENTRY | ||
151 | WPAN_DEV_ENTRY | ||
152 | __field(u8, min_be) | ||
153 | __field(u8, max_be) | ||
154 | ), | ||
155 | TP_fast_assign( | ||
156 | WPAN_PHY_ASSIGN; | ||
157 | WPAN_DEV_ASSIGN; | ||
158 | __entry->min_be = min_be; | ||
159 | __entry->max_be = max_be; | ||
160 | ), | ||
161 | |||
162 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT | ||
163 | ", min be: %d, max_be: %d", WPAN_PHY_PR_ARG, | ||
164 | WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be) | ||
165 | ); | ||
166 | |||
167 | TRACE_EVENT(802154_rdev_set_csma_backoffs, | ||
168 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, | ||
169 | u8 max_csma_backoffs), | ||
170 | TP_ARGS(wpan_phy, wpan_dev, max_csma_backoffs), | ||
171 | TP_STRUCT__entry( | ||
172 | WPAN_PHY_ENTRY | ||
173 | WPAN_DEV_ENTRY | ||
174 | __field(u8, max_csma_backoffs) | ||
175 | ), | ||
176 | TP_fast_assign( | ||
177 | WPAN_PHY_ASSIGN; | ||
178 | WPAN_DEV_ASSIGN; | ||
179 | __entry->max_csma_backoffs = max_csma_backoffs; | ||
180 | ), | ||
181 | |||
182 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT | ||
183 | ", max csma backoffs: %d", WPAN_PHY_PR_ARG, | ||
184 | WPAN_DEV_PR_ARG, __entry->max_csma_backoffs) | ||
185 | ); | ||
186 | |||
187 | TRACE_EVENT(802154_rdev_set_max_frame_retries, | ||
188 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, | ||
189 | s8 max_frame_retries), | ||
190 | TP_ARGS(wpan_phy, wpan_dev, max_frame_retries), | ||
191 | TP_STRUCT__entry( | ||
192 | WPAN_PHY_ENTRY | ||
193 | WPAN_DEV_ENTRY | ||
194 | __field(s8, max_frame_retries) | ||
195 | ), | ||
196 | TP_fast_assign( | ||
197 | WPAN_PHY_ASSIGN; | ||
198 | WPAN_DEV_ASSIGN; | ||
199 | __entry->max_frame_retries = max_frame_retries; | ||
200 | ), | ||
201 | |||
202 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT | ||
203 | ", max frame retries: %d", WPAN_PHY_PR_ARG, | ||
204 | WPAN_DEV_PR_ARG, __entry->max_frame_retries) | ||
205 | ); | ||
206 | |||
207 | TRACE_EVENT(802154_rdev_set_lbt_mode, | ||
208 | TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, | ||
209 | bool mode), | ||
210 | TP_ARGS(wpan_phy, wpan_dev, mode), | ||
211 | TP_STRUCT__entry( | ||
212 | WPAN_PHY_ENTRY | ||
213 | WPAN_DEV_ENTRY | ||
214 | __field(bool, mode) | ||
215 | ), | ||
216 | TP_fast_assign( | ||
217 | WPAN_PHY_ASSIGN; | ||
218 | WPAN_DEV_ASSIGN; | ||
219 | __entry->mode = mode; | ||
220 | ), | ||
221 | TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT | ||
222 | ", lbt mode: %s", WPAN_PHY_PR_ARG, | ||
223 | WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode)) | ||
224 | ); | ||
225 | |||
226 | TRACE_EVENT(802154_rdev_return_int, | ||
227 | TP_PROTO(struct wpan_phy *wpan_phy, int ret), | ||
228 | TP_ARGS(wpan_phy, ret), | ||
229 | TP_STRUCT__entry( | ||
230 | WPAN_PHY_ENTRY | ||
231 | __field(int, ret) | ||
232 | ), | ||
233 | TP_fast_assign( | ||
234 | WPAN_PHY_ASSIGN; | ||
235 | __entry->ret = ret; | ||
236 | ), | ||
237 | TP_printk(WPAN_PHY_PR_FMT ", returned: %d", WPAN_PHY_PR_ARG, | ||
238 | __entry->ret) | ||
239 | ); | ||
240 | |||
241 | #endif /* !__RDEV_CFG802154_OPS_TRACE || TRACE_HEADER_MULTI_READ */ | ||
242 | |||
243 | #undef TRACE_INCLUDE_PATH | ||
244 | #define TRACE_INCLUDE_PATH . | ||
245 | #undef TRACE_INCLUDE_FILE | ||
246 | #define TRACE_INCLUDE_FILE trace | ||
247 | #include <trace/define_trace.h> | ||
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index af150b43b214..34968cd5c146 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c | |||
@@ -711,11 +711,10 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
711 | cb->nlh->nlmsg_seq, NLM_F_MULTI, | 711 | cb->nlh->nlmsg_seq, NLM_F_MULTI, |
712 | skb, FOU_CMD_GET); | 712 | skb, FOU_CMD_GET); |
713 | if (ret) | 713 | if (ret) |
714 | goto done; | 714 | break; |
715 | } | 715 | } |
716 | mutex_unlock(&fn->fou_lock); | 716 | mutex_unlock(&fn->fou_lock); |
717 | 717 | ||
718 | done: | ||
719 | cb->args[0] = idx; | 718 | cb->args[0] = idx; |
720 | return skb->len; | 719 | return skb->len; |
721 | } | 720 | } |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5c3dd6267ed3..8976ca423a07 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) | |||
564 | } | 564 | } |
565 | EXPORT_SYMBOL(inet_rtx_syn_ack); | 565 | EXPORT_SYMBOL(inet_rtx_syn_ack); |
566 | 566 | ||
567 | /* return true if req was found in the syn_table[] */ | ||
568 | static bool reqsk_queue_unlink(struct request_sock_queue *queue, | ||
569 | struct request_sock *req) | ||
570 | { | ||
571 | struct listen_sock *lopt = queue->listen_opt; | ||
572 | struct request_sock **prev; | ||
573 | bool found = false; | ||
574 | |||
575 | spin_lock(&queue->syn_wait_lock); | ||
576 | |||
577 | for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; | ||
578 | prev = &(*prev)->dl_next) { | ||
579 | if (*prev == req) { | ||
580 | *prev = req->dl_next; | ||
581 | found = true; | ||
582 | break; | ||
583 | } | ||
584 | } | ||
585 | |||
586 | spin_unlock(&queue->syn_wait_lock); | ||
587 | if (del_timer(&req->rsk_timer)) | ||
588 | reqsk_put(req); | ||
589 | return found; | ||
590 | } | ||
591 | |||
592 | void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) | ||
593 | { | ||
594 | if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { | ||
595 | reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); | ||
596 | reqsk_put(req); | ||
597 | } | ||
598 | } | ||
599 | EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); | ||
600 | |||
567 | static void reqsk_timer_handler(unsigned long data) | 601 | static void reqsk_timer_handler(unsigned long data) |
568 | { | 602 | { |
569 | struct request_sock *req = (struct request_sock *)data; | 603 | struct request_sock *req = (struct request_sock *)data; |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 70e8b3c308ec..4d32262c7502 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -111,6 +111,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
111 | const struct nlmsghdr *unlh) | 111 | const struct nlmsghdr *unlh) |
112 | { | 112 | { |
113 | const struct inet_sock *inet = inet_sk(sk); | 113 | const struct inet_sock *inet = inet_sk(sk); |
114 | const struct tcp_congestion_ops *ca_ops; | ||
114 | const struct inet_diag_handler *handler; | 115 | const struct inet_diag_handler *handler; |
115 | int ext = req->idiag_ext; | 116 | int ext = req->idiag_ext; |
116 | struct inet_diag_msg *r; | 117 | struct inet_diag_msg *r; |
@@ -208,16 +209,33 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
208 | info = nla_data(attr); | 209 | info = nla_data(attr); |
209 | } | 210 | } |
210 | 211 | ||
211 | if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) | 212 | if (ext & (1 << (INET_DIAG_CONG - 1))) { |
212 | if (nla_put_string(skb, INET_DIAG_CONG, | 213 | int err = 0; |
213 | icsk->icsk_ca_ops->name) < 0) | 214 | |
215 | rcu_read_lock(); | ||
216 | ca_ops = READ_ONCE(icsk->icsk_ca_ops); | ||
217 | if (ca_ops) | ||
218 | err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); | ||
219 | rcu_read_unlock(); | ||
220 | if (err < 0) | ||
214 | goto errout; | 221 | goto errout; |
222 | } | ||
215 | 223 | ||
216 | handler->idiag_get_info(sk, r, info); | 224 | handler->idiag_get_info(sk, r, info); |
217 | 225 | ||
218 | if (sk->sk_state < TCP_TIME_WAIT && | 226 | if (sk->sk_state < TCP_TIME_WAIT) { |
219 | icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) | 227 | union tcp_cc_info info; |
220 | icsk->icsk_ca_ops->get_info(sk, ext, skb); | 228 | size_t sz = 0; |
229 | int attr; | ||
230 | |||
231 | rcu_read_lock(); | ||
232 | ca_ops = READ_ONCE(icsk->icsk_ca_ops); | ||
233 | if (ca_ops && ca_ops->get_info) | ||
234 | sz = ca_ops->get_info(sk, ext, &attr, &info); | ||
235 | rcu_read_unlock(); | ||
236 | if (sz && nla_put(skb, attr, sz, &info) < 0) | ||
237 | goto errout; | ||
238 | } | ||
221 | 239 | ||
222 | out: | 240 | out: |
223 | nlmsg_end(skb, nlh); | 241 | nlmsg_end(skb, nlh); |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 939992c456f3..3674484946a5 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -82,6 +82,9 @@ int ip_forward(struct sk_buff *skb) | |||
82 | if (skb->pkt_type != PACKET_HOST) | 82 | if (skb->pkt_type != PACKET_HOST) |
83 | goto drop; | 83 | goto drop; |
84 | 84 | ||
85 | if (unlikely(skb->sk)) | ||
86 | goto drop; | ||
87 | |||
85 | if (skb_warn_if_lro(skb)) | 88 | if (skb_warn_if_lro(skb)) |
86 | goto drop; | 89 | goto drop; |
87 | 90 | ||
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a93f260cf24c..05ff44b758df 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
@@ -158,6 +158,7 @@ void ping_unhash(struct sock *sk) | |||
158 | if (sk_hashed(sk)) { | 158 | if (sk_hashed(sk)) { |
159 | write_lock_bh(&ping_table.lock); | 159 | write_lock_bh(&ping_table.lock); |
160 | hlist_nulls_del(&sk->sk_nulls_node); | 160 | hlist_nulls_del(&sk->sk_nulls_node); |
161 | sk_nulls_node_init(&sk->sk_nulls_node); | ||
161 | sock_put(sk); | 162 | sock_put(sk); |
162 | isk->inet_num = 0; | 163 | isk->inet_num = 0; |
163 | isk->inet_sport = 0; | 164 | isk->inet_sport = 0; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a78540f28276..bff62fc87b8e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -962,10 +962,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
962 | if (dst_metric_locked(dst, RTAX_MTU)) | 962 | if (dst_metric_locked(dst, RTAX_MTU)) |
963 | return; | 963 | return; |
964 | 964 | ||
965 | if (dst->dev->mtu < mtu) | 965 | if (ipv4_mtu(dst) < mtu) |
966 | return; | ||
967 | |||
968 | if (rt->rt_pmtu && rt->rt_pmtu < mtu) | ||
969 | return; | 966 | return; |
970 | 967 | ||
971 | if (mtu < ip_rt_min_pmtu) | 968 | if (mtu < ip_rt_min_pmtu) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 18e3a12eb1b2..46efa03d2b11 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -252,6 +252,7 @@ | |||
252 | #include <linux/types.h> | 252 | #include <linux/types.h> |
253 | #include <linux/fcntl.h> | 253 | #include <linux/fcntl.h> |
254 | #include <linux/poll.h> | 254 | #include <linux/poll.h> |
255 | #include <linux/inet_diag.h> | ||
255 | #include <linux/init.h> | 256 | #include <linux/init.h> |
256 | #include <linux/fs.h> | 257 | #include <linux/fs.h> |
257 | #include <linux/skbuff.h> | 258 | #include <linux/skbuff.h> |
@@ -520,8 +521,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
520 | 521 | ||
521 | /* Race breaker. If space is freed after | 522 | /* Race breaker. If space is freed after |
522 | * wspace test but before the flags are set, | 523 | * wspace test but before the flags are set, |
523 | * IO signal will be lost. | 524 | * IO signal will be lost. Memory barrier |
525 | * pairs with the input side. | ||
524 | */ | 526 | */ |
527 | smp_mb__after_atomic(); | ||
525 | if (sk_stream_is_writeable(sk)) | 528 | if (sk_stream_is_writeable(sk)) |
526 | mask |= POLLOUT | POLLWRNORM; | 529 | mask |= POLLOUT | POLLWRNORM; |
527 | } | 530 | } |
@@ -2590,11 +2593,12 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); | |||
2590 | #endif | 2593 | #endif |
2591 | 2594 | ||
2592 | /* Return information about state of tcp endpoint in API format. */ | 2595 | /* Return information about state of tcp endpoint in API format. */ |
2593 | void tcp_get_info(const struct sock *sk, struct tcp_info *info) | 2596 | void tcp_get_info(struct sock *sk, struct tcp_info *info) |
2594 | { | 2597 | { |
2595 | const struct tcp_sock *tp = tcp_sk(sk); | 2598 | const struct tcp_sock *tp = tcp_sk(sk); |
2596 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2599 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2597 | u32 now = tcp_time_stamp; | 2600 | u32 now = tcp_time_stamp; |
2601 | u32 rate; | ||
2598 | 2602 | ||
2599 | memset(info, 0, sizeof(*info)); | 2603 | memset(info, 0, sizeof(*info)); |
2600 | 2604 | ||
@@ -2655,10 +2659,16 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) | |||
2655 | 2659 | ||
2656 | info->tcpi_total_retrans = tp->total_retrans; | 2660 | info->tcpi_total_retrans = tp->total_retrans; |
2657 | 2661 | ||
2658 | info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ? | 2662 | rate = READ_ONCE(sk->sk_pacing_rate); |
2659 | sk->sk_pacing_rate : ~0ULL; | 2663 | info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; |
2660 | info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ? | 2664 | |
2661 | sk->sk_max_pacing_rate : ~0ULL; | 2665 | rate = READ_ONCE(sk->sk_max_pacing_rate); |
2666 | info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; | ||
2667 | |||
2668 | spin_lock_bh(&sk->sk_lock.slock); | ||
2669 | info->tcpi_bytes_acked = tp->bytes_acked; | ||
2670 | info->tcpi_bytes_received = tp->bytes_received; | ||
2671 | spin_unlock_bh(&sk->sk_lock.slock); | ||
2662 | } | 2672 | } |
2663 | EXPORT_SYMBOL_GPL(tcp_get_info); | 2673 | EXPORT_SYMBOL_GPL(tcp_get_info); |
2664 | 2674 | ||
@@ -2730,6 +2740,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2730 | return -EFAULT; | 2740 | return -EFAULT; |
2731 | return 0; | 2741 | return 0; |
2732 | } | 2742 | } |
2743 | case TCP_CC_INFO: { | ||
2744 | const struct tcp_congestion_ops *ca_ops; | ||
2745 | union tcp_cc_info info; | ||
2746 | size_t sz = 0; | ||
2747 | int attr; | ||
2748 | |||
2749 | if (get_user(len, optlen)) | ||
2750 | return -EFAULT; | ||
2751 | |||
2752 | ca_ops = icsk->icsk_ca_ops; | ||
2753 | if (ca_ops && ca_ops->get_info) | ||
2754 | sz = ca_ops->get_info(sk, ~0U, &attr, &info); | ||
2755 | |||
2756 | len = min_t(unsigned int, len, sz); | ||
2757 | if (put_user(len, optlen)) | ||
2758 | return -EFAULT; | ||
2759 | if (copy_to_user(optval, &info, len)) | ||
2760 | return -EFAULT; | ||
2761 | return 0; | ||
2762 | } | ||
2733 | case TCP_QUICKACK: | 2763 | case TCP_QUICKACK: |
2734 | val = !icsk->icsk_ack.pingpong; | 2764 | val = !icsk->icsk_ack.pingpong; |
2735 | break; | 2765 | break; |
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index b504371af742..4c41c1287197 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c | |||
@@ -277,7 +277,8 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) | |||
277 | } | 277 | } |
278 | } | 278 | } |
279 | 279 | ||
280 | static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | 280 | static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, |
281 | union tcp_cc_info *info) | ||
281 | { | 282 | { |
282 | const struct dctcp *ca = inet_csk_ca(sk); | 283 | const struct dctcp *ca = inet_csk_ca(sk); |
283 | 284 | ||
@@ -286,19 +287,19 @@ static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | |||
286 | */ | 287 | */ |
287 | if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) || | 288 | if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) || |
288 | ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 289 | ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
289 | struct tcp_dctcp_info info; | 290 | memset(info, 0, sizeof(struct tcp_dctcp_info)); |
290 | |||
291 | memset(&info, 0, sizeof(info)); | ||
292 | if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) { | 291 | if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) { |
293 | info.dctcp_enabled = 1; | 292 | info->dctcp.dctcp_enabled = 1; |
294 | info.dctcp_ce_state = (u16) ca->ce_state; | 293 | info->dctcp.dctcp_ce_state = (u16) ca->ce_state; |
295 | info.dctcp_alpha = ca->dctcp_alpha; | 294 | info->dctcp.dctcp_alpha = ca->dctcp_alpha; |
296 | info.dctcp_ab_ecn = ca->acked_bytes_ecn; | 295 | info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; |
297 | info.dctcp_ab_tot = ca->acked_bytes_total; | 296 | info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; |
298 | } | 297 | } |
299 | 298 | ||
300 | nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); | 299 | *attr = INET_DIAG_DCTCPINFO; |
300 | return sizeof(*info); | ||
301 | } | 301 | } |
302 | return 0; | ||
302 | } | 303 | } |
303 | 304 | ||
304 | static struct tcp_congestion_ops dctcp __read_mostly = { | 305 | static struct tcp_congestion_ops dctcp __read_mostly = { |
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index e3d87aca6be8..3c673d5e6cff 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
@@ -206,6 +206,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, | |||
206 | skb_set_owner_r(skb2, child); | 206 | skb_set_owner_r(skb2, child); |
207 | __skb_queue_tail(&child->sk_receive_queue, skb2); | 207 | __skb_queue_tail(&child->sk_receive_queue, skb2); |
208 | tp->syn_data_acked = 1; | 208 | tp->syn_data_acked = 1; |
209 | tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; | ||
209 | } else { | 210 | } else { |
210 | end_seq = TCP_SKB_CB(skb)->seq + 1; | 211 | end_seq = TCP_SKB_CB(skb)->seq + 1; |
211 | } | 212 | } |
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 1d5a30a90adf..f71002e4db0b 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c | |||
@@ -300,26 +300,27 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) | |||
300 | } | 300 | } |
301 | 301 | ||
302 | /* Extract info for Tcp socket info provided via netlink. */ | 302 | /* Extract info for Tcp socket info provided via netlink. */ |
303 | static void tcp_illinois_info(struct sock *sk, u32 ext, | 303 | static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, |
304 | struct sk_buff *skb) | 304 | union tcp_cc_info *info) |
305 | { | 305 | { |
306 | const struct illinois *ca = inet_csk_ca(sk); | 306 | const struct illinois *ca = inet_csk_ca(sk); |
307 | 307 | ||
308 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 308 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
309 | struct tcpvegas_info info = { | 309 | info->vegas.tcpv_enabled = 1; |
310 | .tcpv_enabled = 1, | 310 | info->vegas.tcpv_rttcnt = ca->cnt_rtt; |
311 | .tcpv_rttcnt = ca->cnt_rtt, | 311 | info->vegas.tcpv_minrtt = ca->base_rtt; |
312 | .tcpv_minrtt = ca->base_rtt, | 312 | info->vegas.tcpv_rtt = 0; |
313 | }; | ||
314 | 313 | ||
315 | if (info.tcpv_rttcnt > 0) { | 314 | if (info->vegas.tcpv_rttcnt > 0) { |
316 | u64 t = ca->sum_rtt; | 315 | u64 t = ca->sum_rtt; |
317 | 316 | ||
318 | do_div(t, info.tcpv_rttcnt); | 317 | do_div(t, info->vegas.tcpv_rttcnt); |
319 | info.tcpv_rtt = t; | 318 | info->vegas.tcpv_rtt = t; |
320 | } | 319 | } |
321 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | 320 | *attr = INET_DIAG_VEGASINFO; |
321 | return sizeof(struct tcpvegas_info); | ||
322 | } | 322 | } |
323 | return 0; | ||
323 | } | 324 | } |
324 | 325 | ||
325 | static struct tcp_congestion_ops tcp_illinois __read_mostly = { | 326 | static struct tcp_congestion_ops tcp_illinois __read_mostly = { |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7ef679dd3ea..bc790ea9960f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -1820,14 +1820,12 @@ advance_sp: | |||
1820 | for (j = 0; j < used_sacks; j++) | 1820 | for (j = 0; j < used_sacks; j++) |
1821 | tp->recv_sack_cache[i++] = sp[j]; | 1821 | tp->recv_sack_cache[i++] = sp[j]; |
1822 | 1822 | ||
1823 | tcp_mark_lost_retrans(sk); | ||
1824 | |||
1825 | tcp_verify_left_out(tp); | ||
1826 | |||
1827 | if ((state.reord < tp->fackets_out) && | 1823 | if ((state.reord < tp->fackets_out) && |
1828 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) | 1824 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) |
1829 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); | 1825 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); |
1830 | 1826 | ||
1827 | tcp_mark_lost_retrans(sk); | ||
1828 | tcp_verify_left_out(tp); | ||
1831 | out: | 1829 | out: |
1832 | 1830 | ||
1833 | #if FASTRETRANS_DEBUG > 0 | 1831 | #if FASTRETRANS_DEBUG > 0 |
@@ -3280,6 +3278,24 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, | |||
3280 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); | 3278 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); |
3281 | } | 3279 | } |
3282 | 3280 | ||
3281 | /* If we update tp->snd_una, also update tp->bytes_acked */ | ||
3282 | static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) | ||
3283 | { | ||
3284 | u32 delta = ack - tp->snd_una; | ||
3285 | |||
3286 | tp->bytes_acked += delta; | ||
3287 | tp->snd_una = ack; | ||
3288 | } | ||
3289 | |||
3290 | /* If we update tp->rcv_nxt, also update tp->bytes_received */ | ||
3291 | static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) | ||
3292 | { | ||
3293 | u32 delta = seq - tp->rcv_nxt; | ||
3294 | |||
3295 | tp->bytes_received += delta; | ||
3296 | tp->rcv_nxt = seq; | ||
3297 | } | ||
3298 | |||
3283 | /* Update our send window. | 3299 | /* Update our send window. |
3284 | * | 3300 | * |
3285 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 | 3301 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 |
@@ -3315,7 +3331,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 | |||
3315 | } | 3331 | } |
3316 | } | 3332 | } |
3317 | 3333 | ||
3318 | tp->snd_una = ack; | 3334 | tcp_snd_una_update(tp, ack); |
3319 | 3335 | ||
3320 | return flag; | 3336 | return flag; |
3321 | } | 3337 | } |
@@ -3497,7 +3513,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3497 | * Note, we use the fact that SND.UNA>=SND.WL2. | 3513 | * Note, we use the fact that SND.UNA>=SND.WL2. |
3498 | */ | 3514 | */ |
3499 | tcp_update_wl(tp, ack_seq); | 3515 | tcp_update_wl(tp, ack_seq); |
3500 | tp->snd_una = ack; | 3516 | tcp_snd_una_update(tp, ack); |
3501 | flag |= FLAG_WIN_UPDATE; | 3517 | flag |= FLAG_WIN_UPDATE; |
3502 | 3518 | ||
3503 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); | 3519 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); |
@@ -4236,7 +4252,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4236 | 4252 | ||
4237 | tail = skb_peek_tail(&sk->sk_receive_queue); | 4253 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4238 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); | 4254 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); |
4239 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4255 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4240 | if (!eaten) | 4256 | if (!eaten) |
4241 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4257 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
4242 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4258 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
@@ -4404,7 +4420,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
4404 | __skb_pull(skb, hdrlen); | 4420 | __skb_pull(skb, hdrlen); |
4405 | eaten = (tail && | 4421 | eaten = (tail && |
4406 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; | 4422 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; |
4407 | tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4423 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); |
4408 | if (!eaten) { | 4424 | if (!eaten) { |
4409 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4425 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
4410 | skb_set_owner_r(skb, sk); | 4426 | skb_set_owner_r(skb, sk); |
@@ -4497,7 +4513,7 @@ queue_and_out: | |||
4497 | 4513 | ||
4498 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); | 4514 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
4499 | } | 4515 | } |
4500 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4516 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4501 | if (skb->len) | 4517 | if (skb->len) |
4502 | tcp_event_data_recv(sk, skb); | 4518 | tcp_event_data_recv(sk, skb); |
4503 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4519 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
@@ -4845,6 +4861,8 @@ static void tcp_check_space(struct sock *sk) | |||
4845 | { | 4861 | { |
4846 | if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { | 4862 | if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { |
4847 | sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); | 4863 | sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); |
4864 | /* pairs with tcp_poll() */ | ||
4865 | smp_mb__after_atomic(); | ||
4848 | if (sk->sk_socket && | 4866 | if (sk->sk_socket && |
4849 | test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) | 4867 | test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) |
4850 | tcp_new_space(sk); | 4868 | tcp_new_space(sk); |
@@ -5243,7 +5261,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5243 | tcp_rcv_rtt_measure_ts(sk, skb); | 5261 | tcp_rcv_rtt_measure_ts(sk, skb); |
5244 | 5262 | ||
5245 | __skb_pull(skb, tcp_header_len); | 5263 | __skb_pull(skb, tcp_header_len); |
5246 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 5264 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
5247 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); | 5265 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); |
5248 | eaten = 1; | 5266 | eaten = 1; |
5249 | } | 5267 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3571f2be4470..fc1c658ec6c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); | 1348 | req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); |
1349 | if (req) { | 1349 | if (req) { |
1350 | nsk = tcp_check_req(sk, skb, req, false); | 1350 | nsk = tcp_check_req(sk, skb, req, false); |
1351 | reqsk_put(req); | 1351 | if (!nsk) |
1352 | reqsk_put(req); | ||
1352 | return nsk; | 1353 | return nsk; |
1353 | } | 1354 | } |
1354 | 1355 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d6311b5365..e5d7649136fc 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
755 | if (!child) | 755 | if (!child) |
756 | goto listen_overflow; | 756 | goto listen_overflow; |
757 | 757 | ||
758 | inet_csk_reqsk_queue_unlink(sk, req); | 758 | inet_csk_reqsk_queue_drop(sk, req); |
759 | inet_csk_reqsk_queue_removed(sk, req); | ||
760 | |||
761 | inet_csk_reqsk_queue_add(sk, req, child); | 759 | inet_csk_reqsk_queue_add(sk, req, child); |
760 | /* Warning: caller must not call reqsk_put(req); | ||
761 | * child stole last reference on it. | ||
762 | */ | ||
762 | return child; | 763 | return child; |
763 | 764 | ||
764 | listen_overflow: | 765 | listen_overflow: |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8d7e06b72f..a369e8a70b2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2812,39 +2812,65 @@ begin_fwd: | |||
2812 | } | 2812 | } |
2813 | } | 2813 | } |
2814 | 2814 | ||
2815 | /* Send a fin. The caller locks the socket for us. This cannot be | 2815 | /* We allow to exceed memory limits for FIN packets to expedite |
2816 | * allowed to fail queueing a FIN frame under any circumstances. | 2816 | * connection tear down and (memory) recovery. |
2817 | * Otherwise tcp_send_fin() could be tempted to either delay FIN | ||
2818 | * or even be forced to close flow without any FIN. | ||
2819 | */ | ||
2820 | static void sk_forced_wmem_schedule(struct sock *sk, int size) | ||
2821 | { | ||
2822 | int amt, status; | ||
2823 | |||
2824 | if (size <= sk->sk_forward_alloc) | ||
2825 | return; | ||
2826 | amt = sk_mem_pages(size); | ||
2827 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | ||
2828 | sk_memory_allocated_add(sk, amt, &status); | ||
2829 | } | ||
2830 | |||
2831 | /* Send a FIN. The caller locks the socket for us. | ||
2832 | * We should try to send a FIN packet really hard, but eventually give up. | ||
2817 | */ | 2833 | */ |
2818 | void tcp_send_fin(struct sock *sk) | 2834 | void tcp_send_fin(struct sock *sk) |
2819 | { | 2835 | { |
2836 | struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); | ||
2820 | struct tcp_sock *tp = tcp_sk(sk); | 2837 | struct tcp_sock *tp = tcp_sk(sk); |
2821 | struct sk_buff *skb = tcp_write_queue_tail(sk); | ||
2822 | int mss_now; | ||
2823 | 2838 | ||
2824 | /* Optimization, tack on the FIN if we have a queue of | 2839 | /* Optimization, tack on the FIN if we have one skb in write queue and |
2825 | * unsent frames. But be careful about outgoing SACKS | 2840 | * this skb was not yet sent, or we are under memory pressure. |
2826 | * and IP options. | 2841 | * Note: in the latter case, FIN packet will be sent after a timeout, |
2842 | * as TCP stack thinks it has already been transmitted. | ||
2827 | */ | 2843 | */ |
2828 | mss_now = tcp_current_mss(sk); | 2844 | if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { |
2829 | 2845 | coalesce: | |
2830 | if (tcp_send_head(sk)) { | 2846 | TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; |
2831 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; | 2847 | TCP_SKB_CB(tskb)->end_seq++; |
2832 | TCP_SKB_CB(skb)->end_seq++; | ||
2833 | tp->write_seq++; | 2848 | tp->write_seq++; |
2849 | if (!tcp_send_head(sk)) { | ||
2850 | /* This means tskb was already sent. | ||
2851 | * Pretend we included the FIN on previous transmit. | ||
2852 | * We need to set tp->snd_nxt to the value it would have | ||
2853 | * if FIN had been sent. This is because retransmit path | ||
2854 | * does not change tp->snd_nxt. | ||
2855 | */ | ||
2856 | tp->snd_nxt++; | ||
2857 | return; | ||
2858 | } | ||
2834 | } else { | 2859 | } else { |
2835 | /* Socket is locked, keep trying until memory is available. */ | 2860 | skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); |
2836 | for (;;) { | 2861 | if (unlikely(!skb)) { |
2837 | skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); | 2862 | if (tskb) |
2838 | if (skb) | 2863 | goto coalesce; |
2839 | break; | 2864 | return; |
2840 | yield(); | ||
2841 | } | 2865 | } |
2866 | skb_reserve(skb, MAX_TCP_HEADER); | ||
2867 | sk_forced_wmem_schedule(sk, skb->truesize); | ||
2842 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2868 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
2843 | tcp_init_nondata_skb(skb, tp->write_seq, | 2869 | tcp_init_nondata_skb(skb, tp->write_seq, |
2844 | TCPHDR_ACK | TCPHDR_FIN); | 2870 | TCPHDR_ACK | TCPHDR_FIN); |
2845 | tcp_queue_skb(sk, skb); | 2871 | tcp_queue_skb(sk, skb); |
2846 | } | 2872 | } |
2847 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); | 2873 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); |
2848 | } | 2874 | } |
2849 | 2875 | ||
2850 | /* We get here when a process closes a file descriptor (either due to | 2876 | /* We get here when a process closes a file descriptor (either due to |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a6afde666ab1..a6cea1d5e20d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -286,19 +286,21 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) | |||
286 | } | 286 | } |
287 | 287 | ||
288 | /* Extract info for Tcp socket info provided via netlink. */ | 288 | /* Extract info for Tcp socket info provided via netlink. */ |
289 | void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | 289 | size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, |
290 | union tcp_cc_info *info) | ||
290 | { | 291 | { |
291 | const struct vegas *ca = inet_csk_ca(sk); | 292 | const struct vegas *ca = inet_csk_ca(sk); |
293 | |||
292 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 294 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
293 | struct tcpvegas_info info = { | 295 | info->vegas.tcpv_enabled = ca->doing_vegas_now, |
294 | .tcpv_enabled = ca->doing_vegas_now, | 296 | info->vegas.tcpv_rttcnt = ca->cntRTT, |
295 | .tcpv_rttcnt = ca->cntRTT, | 297 | info->vegas.tcpv_rtt = ca->baseRTT, |
296 | .tcpv_rtt = ca->baseRTT, | 298 | info->vegas.tcpv_minrtt = ca->minRTT, |
297 | .tcpv_minrtt = ca->minRTT, | 299 | |
298 | }; | 300 | *attr = INET_DIAG_VEGASINFO; |
299 | 301 | return sizeof(struct tcpvegas_info); | |
300 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | ||
301 | } | 302 | } |
303 | return 0; | ||
302 | } | 304 | } |
303 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); | 305 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); |
304 | 306 | ||
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 0531b99d8637..ef9da5306c68 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h | |||
@@ -19,6 +19,7 @@ void tcp_vegas_init(struct sock *sk); | |||
19 | void tcp_vegas_state(struct sock *sk, u8 ca_state); | 19 | void tcp_vegas_state(struct sock *sk, u8 ca_state); |
20 | void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); | 20 | void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); |
21 | void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); | 21 | void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); |
22 | void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); | 22 | size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, |
23 | union tcp_cc_info *info); | ||
23 | 24 | ||
24 | #endif /* __TCP_VEGAS_H */ | 25 | #endif /* __TCP_VEGAS_H */ |
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index bb63fba47d47..c10732e39837 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c | |||
@@ -256,20 +256,21 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) | |||
256 | } | 256 | } |
257 | 257 | ||
258 | /* Extract info for Tcp socket info provided via netlink. */ | 258 | /* Extract info for Tcp socket info provided via netlink. */ |
259 | static void tcp_westwood_info(struct sock *sk, u32 ext, | 259 | static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr, |
260 | struct sk_buff *skb) | 260 | union tcp_cc_info *info) |
261 | { | 261 | { |
262 | const struct westwood *ca = inet_csk_ca(sk); | 262 | const struct westwood *ca = inet_csk_ca(sk); |
263 | 263 | ||
264 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 264 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
265 | struct tcpvegas_info info = { | 265 | info->vegas.tcpv_enabled = 1; |
266 | .tcpv_enabled = 1, | 266 | info->vegas.tcpv_rttcnt = 0; |
267 | .tcpv_rtt = jiffies_to_usecs(ca->rtt), | 267 | info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt), |
268 | .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), | 268 | info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), |
269 | }; | ||
270 | 269 | ||
271 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | 270 | *attr = INET_DIAG_VEGASINFO; |
271 | return sizeof(struct tcpvegas_info); | ||
272 | } | 272 | } |
273 | return 0; | ||
273 | } | 274 | } |
274 | 275 | ||
275 | static struct tcp_congestion_ops tcp_westwood __read_mostly = { | 276 | static struct tcp_congestion_ops tcp_westwood __read_mostly = { |
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b5e6cc1d4a73..a38d3ac0f18f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c | |||
@@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) | |||
1246 | static int ip6gre_tunnel_init(struct net_device *dev) | 1246 | static int ip6gre_tunnel_init(struct net_device *dev) |
1247 | { | 1247 | { |
1248 | struct ip6_tnl *tunnel; | 1248 | struct ip6_tnl *tunnel; |
1249 | int i; | ||
1250 | 1249 | ||
1251 | tunnel = netdev_priv(dev); | 1250 | tunnel = netdev_priv(dev); |
1252 | 1251 | ||
@@ -1260,16 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev) | |||
1260 | if (ipv6_addr_any(&tunnel->parms.raddr)) | 1259 | if (ipv6_addr_any(&tunnel->parms.raddr)) |
1261 | dev->header_ops = &ip6gre_header_ops; | 1260 | dev->header_ops = &ip6gre_header_ops; |
1262 | 1261 | ||
1263 | dev->tstats = alloc_percpu(struct pcpu_sw_netstats); | 1262 | dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); |
1264 | if (!dev->tstats) | 1263 | if (!dev->tstats) |
1265 | return -ENOMEM; | 1264 | return -ENOMEM; |
1266 | 1265 | ||
1267 | for_each_possible_cpu(i) { | ||
1268 | struct pcpu_sw_netstats *ip6gre_tunnel_stats; | ||
1269 | ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); | ||
1270 | u64_stats_init(&ip6gre_tunnel_stats->syncp); | ||
1271 | } | ||
1272 | |||
1273 | return 0; | 1266 | return 0; |
1274 | } | 1267 | } |
1275 | 1268 | ||
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7fde1f265c90..c21777565c58 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -886,22 +886,45 @@ static int ip6_dst_lookup_tail(struct sock *sk, | |||
886 | #endif | 886 | #endif |
887 | int err; | 887 | int err; |
888 | 888 | ||
889 | if (!*dst) | 889 | /* The correct way to handle this would be to do |
890 | *dst = ip6_route_output(net, sk, fl6); | 890 | * ip6_route_get_saddr, and then ip6_route_output; however, |
891 | 891 | * the route-specific preferred source forces the | |
892 | err = (*dst)->error; | 892 | * ip6_route_output call _before_ ip6_route_get_saddr. |
893 | if (err) | 893 | * |
894 | goto out_err_release; | 894 | * In source specific routing (no src=any default route), |
895 | * ip6_route_output will fail given src=any saddr, though, so | ||
896 | * that's why we try it again later. | ||
897 | */ | ||
898 | if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { | ||
899 | struct rt6_info *rt; | ||
900 | bool had_dst = *dst != NULL; | ||
895 | 901 | ||
896 | if (ipv6_addr_any(&fl6->saddr)) { | 902 | if (!had_dst) |
897 | struct rt6_info *rt = (struct rt6_info *) *dst; | 903 | *dst = ip6_route_output(net, sk, fl6); |
904 | rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; | ||
898 | err = ip6_route_get_saddr(net, rt, &fl6->daddr, | 905 | err = ip6_route_get_saddr(net, rt, &fl6->daddr, |
899 | sk ? inet6_sk(sk)->srcprefs : 0, | 906 | sk ? inet6_sk(sk)->srcprefs : 0, |
900 | &fl6->saddr); | 907 | &fl6->saddr); |
901 | if (err) | 908 | if (err) |
902 | goto out_err_release; | 909 | goto out_err_release; |
910 | |||
911 | /* If we had an erroneous initial result, pretend it | ||
912 | * never existed and let the SA-enabled version take | ||
913 | * over. | ||
914 | */ | ||
915 | if (!had_dst && (*dst)->error) { | ||
916 | dst_release(*dst); | ||
917 | *dst = NULL; | ||
918 | } | ||
903 | } | 919 | } |
904 | 920 | ||
921 | if (!*dst) | ||
922 | *dst = ip6_route_output(net, sk, fl6); | ||
923 | |||
924 | err = (*dst)->error; | ||
925 | if (err) | ||
926 | goto out_err_release; | ||
927 | |||
905 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD | 928 | #ifdef CONFIG_IPV6_OPTIMISTIC_DAD |
906 | /* | 929 | /* |
907 | * Here if the dst entry we've looked up | 930 | * Here if the dst entry we've looked up |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5c48293ff062..d3588885f097 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -2245,9 +2245,10 @@ int ip6_route_get_saddr(struct net *net, | |||
2245 | unsigned int prefs, | 2245 | unsigned int prefs, |
2246 | struct in6_addr *saddr) | 2246 | struct in6_addr *saddr) |
2247 | { | 2247 | { |
2248 | struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt); | 2248 | struct inet6_dev *idev = |
2249 | rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; | ||
2249 | int err = 0; | 2250 | int err = 0; |
2250 | if (rt->rt6i_prefsrc.plen) | 2251 | if (rt && rt->rt6i_prefsrc.plen) |
2251 | *saddr = rt->rt6i_prefsrc.addr; | 2252 | *saddr = rt->rt6i_prefsrc.addr; |
2252 | else | 2253 | else |
2253 | err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, | 2254 | err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad51df85aa00..b6575d665568 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -946,7 +946,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
946 | &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); | 946 | &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); |
947 | if (req) { | 947 | if (req) { |
948 | nsk = tcp_check_req(sk, skb, req, false); | 948 | nsk = tcp_check_req(sk, skb, req, false); |
949 | reqsk_put(req); | 949 | if (!nsk) |
950 | reqsk_put(req); | ||
950 | return nsk; | 951 | return nsk; |
951 | } | 952 | } |
952 | nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, | 953 | nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, |
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b4ac596a7cb7..bab5c63c0bad 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c | |||
@@ -819,13 +819,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, | |||
819 | * (because if we remove a STA after ops->remove_interface() | 819 | * (because if we remove a STA after ops->remove_interface() |
820 | * the driver will have removed the vif info already!) | 820 | * the driver will have removed the vif info already!) |
821 | * | 821 | * |
822 | * This is relevant only in WDS mode, in all other modes we've | 822 | * In WDS mode a station must exist here and be flushed, for |
823 | * already removed all stations when disconnecting or similar, | 823 | * AP_VLANs stations may exist since there's nothing else that |
824 | * so warn otherwise. | 824 | * would have removed them, but in other modes there shouldn't |
825 | * be any stations. | ||
825 | */ | 826 | */ |
826 | flushed = sta_info_flush(sdata); | 827 | flushed = sta_info_flush(sdata); |
827 | WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || | 828 | WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && |
828 | (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); | 829 | ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || |
830 | (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1))); | ||
829 | 831 | ||
830 | /* don't count this interface for promisc/allmulti while it is down */ | 832 | /* don't count this interface for promisc/allmulti while it is down */ |
831 | if (sdata->flags & IEEE80211_SDATA_ALLMULTI) | 833 | if (sdata->flags & IEEE80211_SDATA_ALLMULTI) |
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 12971b71d0fa..2880f2ae99ab 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c | |||
@@ -66,6 +66,7 @@ | |||
66 | 66 | ||
67 | static const struct rhashtable_params sta_rht_params = { | 67 | static const struct rhashtable_params sta_rht_params = { |
68 | .nelem_hint = 3, /* start small */ | 68 | .nelem_hint = 3, /* start small */ |
69 | .automatic_shrinking = true, | ||
69 | .head_offset = offsetof(struct sta_info, hash_node), | 70 | .head_offset = offsetof(struct sta_info, hash_node), |
70 | .key_offset = offsetof(struct sta_info, sta.addr), | 71 | .key_offset = offsetof(struct sta_info, sta.addr), |
71 | .key_len = ETH_ALEN, | 72 | .key_len = ETH_ALEN, |
@@ -157,8 +158,24 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, | |||
157 | const u8 *addr) | 158 | const u8 *addr) |
158 | { | 159 | { |
159 | struct ieee80211_local *local = sdata->local; | 160 | struct ieee80211_local *local = sdata->local; |
161 | struct sta_info *sta; | ||
162 | struct rhash_head *tmp; | ||
163 | const struct bucket_table *tbl; | ||
164 | |||
165 | rcu_read_lock(); | ||
166 | tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); | ||
160 | 167 | ||
161 | return rhashtable_lookup_fast(&local->sta_hash, addr, sta_rht_params); | 168 | for_each_sta_info(local, tbl, addr, sta, tmp) { |
169 | if (sta->sdata == sdata) { | ||
170 | rcu_read_unlock(); | ||
171 | /* this is safe as the caller must already hold | ||
172 | * another rcu read section or the mutex | ||
173 | */ | ||
174 | return sta; | ||
175 | } | ||
176 | } | ||
177 | rcu_read_unlock(); | ||
178 | return NULL; | ||
162 | } | 179 | } |
163 | 180 | ||
164 | /* | 181 | /* |
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index 5d9f68c75e5f..70be9c799f8a 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c | |||
@@ -22,13 +22,14 @@ | |||
22 | 22 | ||
23 | static struct net_device * | 23 | static struct net_device * |
24 | ieee802154_add_iface_deprecated(struct wpan_phy *wpan_phy, | 24 | ieee802154_add_iface_deprecated(struct wpan_phy *wpan_phy, |
25 | const char *name, int type) | 25 | const char *name, |
26 | unsigned char name_assign_type, int type) | ||
26 | { | 27 | { |
27 | struct ieee802154_local *local = wpan_phy_priv(wpan_phy); | 28 | struct ieee802154_local *local = wpan_phy_priv(wpan_phy); |
28 | struct net_device *dev; | 29 | struct net_device *dev; |
29 | 30 | ||
30 | rtnl_lock(); | 31 | rtnl_lock(); |
31 | dev = ieee802154_if_add(local, name, type, | 32 | dev = ieee802154_if_add(local, name, name_assign_type, type, |
32 | cpu_to_le64(0x0000000000000000ULL)); | 33 | cpu_to_le64(0x0000000000000000ULL)); |
33 | rtnl_unlock(); | 34 | rtnl_unlock(); |
34 | 35 | ||
@@ -45,12 +46,14 @@ static void ieee802154_del_iface_deprecated(struct wpan_phy *wpan_phy, | |||
45 | 46 | ||
46 | static int | 47 | static int |
47 | ieee802154_add_iface(struct wpan_phy *phy, const char *name, | 48 | ieee802154_add_iface(struct wpan_phy *phy, const char *name, |
49 | unsigned char name_assign_type, | ||
48 | enum nl802154_iftype type, __le64 extended_addr) | 50 | enum nl802154_iftype type, __le64 extended_addr) |
49 | { | 51 | { |
50 | struct ieee802154_local *local = wpan_phy_priv(phy); | 52 | struct ieee802154_local *local = wpan_phy_priv(phy); |
51 | struct net_device *err; | 53 | struct net_device *err; |
52 | 54 | ||
53 | err = ieee802154_if_add(local, name, type, extended_addr); | 55 | err = ieee802154_if_add(local, name, name_assign_type, type, |
56 | extended_addr); | ||
54 | return PTR_ERR_OR_ZERO(err); | 57 | return PTR_ERR_OR_ZERO(err); |
55 | } | 58 | } |
56 | 59 | ||
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index bebd70ffc7a3..127ba18386fc 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h | |||
@@ -182,7 +182,8 @@ void ieee802154_iface_exit(void); | |||
182 | void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata); | 182 | void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata); |
183 | struct net_device * | 183 | struct net_device * |
184 | ieee802154_if_add(struct ieee802154_local *local, const char *name, | 184 | ieee802154_if_add(struct ieee802154_local *local, const char *name, |
185 | enum nl802154_iftype type, __le64 extended_addr); | 185 | unsigned char name_assign_type, enum nl802154_iftype type, |
186 | __le64 extended_addr); | ||
186 | void ieee802154_remove_interfaces(struct ieee802154_local *local); | 187 | void ieee802154_remove_interfaces(struct ieee802154_local *local); |
187 | 188 | ||
188 | #endif /* __IEEE802154_I_H */ | 189 | #endif /* __IEEE802154_I_H */ |
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 38b56f9d9386..91b75abbd1a1 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c | |||
@@ -522,7 +522,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, | |||
522 | 522 | ||
523 | struct net_device * | 523 | struct net_device * |
524 | ieee802154_if_add(struct ieee802154_local *local, const char *name, | 524 | ieee802154_if_add(struct ieee802154_local *local, const char *name, |
525 | enum nl802154_iftype type, __le64 extended_addr) | 525 | unsigned char name_assign_type, enum nl802154_iftype type, |
526 | __le64 extended_addr) | ||
526 | { | 527 | { |
527 | struct net_device *ndev = NULL; | 528 | struct net_device *ndev = NULL; |
528 | struct ieee802154_sub_if_data *sdata = NULL; | 529 | struct ieee802154_sub_if_data *sdata = NULL; |
@@ -531,7 +532,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, | |||
531 | ASSERT_RTNL(); | 532 | ASSERT_RTNL(); |
532 | 533 | ||
533 | ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name, | 534 | ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name, |
534 | NET_NAME_UNKNOWN, ieee802154_if_setup); | 535 | name_assign_type, ieee802154_if_setup); |
535 | if (!ndev) | 536 | if (!ndev) |
536 | return ERR_PTR(-ENOMEM); | 537 | return ERR_PTR(-ENOMEM); |
537 | 538 | ||
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index dcf73958133a..5b2be12832e6 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c | |||
@@ -134,7 +134,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template) | |||
134 | for (i = 0; i < ARRAY_SIZE(key->tfm); i++) { | 134 | for (i = 0; i < ARRAY_SIZE(key->tfm); i++) { |
135 | key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0, | 135 | key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0, |
136 | CRYPTO_ALG_ASYNC); | 136 | CRYPTO_ALG_ASYNC); |
137 | if (!key->tfm[i]) | 137 | if (IS_ERR(key->tfm[i])) |
138 | goto err_tfm; | 138 | goto err_tfm; |
139 | if (crypto_aead_setkey(key->tfm[i], template->key, | 139 | if (crypto_aead_setkey(key->tfm[i], template->key, |
140 | IEEE802154_LLSEC_KEY_SIZE)) | 140 | IEEE802154_LLSEC_KEY_SIZE)) |
@@ -144,7 +144,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template) | |||
144 | } | 144 | } |
145 | 145 | ||
146 | key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); | 146 | key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); |
147 | if (!key->tfm0) | 147 | if (IS_ERR(key->tfm0)) |
148 | goto err_tfm; | 148 | goto err_tfm; |
149 | 149 | ||
150 | if (crypto_blkcipher_setkey(key->tfm0, template->key, | 150 | if (crypto_blkcipher_setkey(key->tfm0, template->key, |
diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 8500378c8318..08cb32dc8fd3 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c | |||
@@ -161,18 +161,21 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) | |||
161 | 161 | ||
162 | rtnl_lock(); | 162 | rtnl_lock(); |
163 | 163 | ||
164 | dev = ieee802154_if_add(local, "wpan%d", NL802154_IFTYPE_NODE, | 164 | dev = ieee802154_if_add(local, "wpan%d", NET_NAME_ENUM, |
165 | NL802154_IFTYPE_NODE, | ||
165 | cpu_to_le64(0x0000000000000000ULL)); | 166 | cpu_to_le64(0x0000000000000000ULL)); |
166 | if (IS_ERR(dev)) { | 167 | if (IS_ERR(dev)) { |
167 | rtnl_unlock(); | 168 | rtnl_unlock(); |
168 | rc = PTR_ERR(dev); | 169 | rc = PTR_ERR(dev); |
169 | goto out_wq; | 170 | goto out_phy; |
170 | } | 171 | } |
171 | 172 | ||
172 | rtnl_unlock(); | 173 | rtnl_unlock(); |
173 | 174 | ||
174 | return 0; | 175 | return 0; |
175 | 176 | ||
177 | out_phy: | ||
178 | wpan_phy_unregister(local->phy); | ||
176 | out_wq: | 179 | out_wq: |
177 | destroy_workqueue(local->workqueue); | 180 | destroy_workqueue(local->workqueue); |
178 | out: | 181 | out: |
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index db8a2ea6d4de..7b3f732269e4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c | |||
@@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) | |||
53 | return rt; | 53 | return rt; |
54 | } | 54 | } |
55 | 55 | ||
56 | static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) | ||
57 | { | ||
58 | return rcu_dereference_rtnl(dev->mpls_ptr); | ||
59 | } | ||
60 | |||
56 | static bool mpls_output_possible(const struct net_device *dev) | 61 | static bool mpls_output_possible(const struct net_device *dev) |
57 | { | 62 | { |
58 | return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); | 63 | return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); |
@@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, | |||
136 | struct mpls_route *rt; | 141 | struct mpls_route *rt; |
137 | struct mpls_entry_decoded dec; | 142 | struct mpls_entry_decoded dec; |
138 | struct net_device *out_dev; | 143 | struct net_device *out_dev; |
144 | struct mpls_dev *mdev; | ||
139 | unsigned int hh_len; | 145 | unsigned int hh_len; |
140 | unsigned int new_header_size; | 146 | unsigned int new_header_size; |
141 | unsigned int mtu; | 147 | unsigned int mtu; |
@@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, | |||
143 | 149 | ||
144 | /* Careful this entire function runs inside of an rcu critical section */ | 150 | /* Careful this entire function runs inside of an rcu critical section */ |
145 | 151 | ||
152 | mdev = mpls_dev_get(dev); | ||
153 | if (!mdev || !mdev->input_enabled) | ||
154 | goto drop; | ||
155 | |||
146 | if (skb->pkt_type != PACKET_HOST) | 156 | if (skb->pkt_type != PACKET_HOST) |
147 | goto drop; | 157 | goto drop; |
148 | 158 | ||
@@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg) | |||
352 | if (!dev) | 362 | if (!dev) |
353 | goto errout; | 363 | goto errout; |
354 | 364 | ||
355 | /* For now just support ethernet devices */ | 365 | /* Ensure this is a supported device */ |
356 | err = -EINVAL; | 366 | err = -EINVAL; |
357 | if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) | 367 | if (!mpls_dev_get(dev)) |
358 | goto errout; | 368 | goto errout; |
359 | 369 | ||
360 | err = -EINVAL; | 370 | err = -EINVAL; |
@@ -428,10 +438,89 @@ errout: | |||
428 | return err; | 438 | return err; |
429 | } | 439 | } |
430 | 440 | ||
441 | #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ | ||
442 | (&((struct mpls_dev *)0)->field) | ||
443 | |||
444 | static const struct ctl_table mpls_dev_table[] = { | ||
445 | { | ||
446 | .procname = "input", | ||
447 | .maxlen = sizeof(int), | ||
448 | .mode = 0644, | ||
449 | .proc_handler = proc_dointvec, | ||
450 | .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), | ||
451 | }, | ||
452 | { } | ||
453 | }; | ||
454 | |||
455 | static int mpls_dev_sysctl_register(struct net_device *dev, | ||
456 | struct mpls_dev *mdev) | ||
457 | { | ||
458 | char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; | ||
459 | struct ctl_table *table; | ||
460 | int i; | ||
461 | |||
462 | table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); | ||
463 | if (!table) | ||
464 | goto out; | ||
465 | |||
466 | /* Table data contains only offsets relative to the base of | ||
467 | * the mdev at this point, so make them absolute. | ||
468 | */ | ||
469 | for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) | ||
470 | table[i].data = (char *)mdev + (uintptr_t)table[i].data; | ||
471 | |||
472 | snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); | ||
473 | |||
474 | mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); | ||
475 | if (!mdev->sysctl) | ||
476 | goto free; | ||
477 | |||
478 | return 0; | ||
479 | |||
480 | free: | ||
481 | kfree(table); | ||
482 | out: | ||
483 | return -ENOBUFS; | ||
484 | } | ||
485 | |||
486 | static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) | ||
487 | { | ||
488 | struct ctl_table *table; | ||
489 | |||
490 | table = mdev->sysctl->ctl_table_arg; | ||
491 | unregister_net_sysctl_table(mdev->sysctl); | ||
492 | kfree(table); | ||
493 | } | ||
494 | |||
495 | static struct mpls_dev *mpls_add_dev(struct net_device *dev) | ||
496 | { | ||
497 | struct mpls_dev *mdev; | ||
498 | int err = -ENOMEM; | ||
499 | |||
500 | ASSERT_RTNL(); | ||
501 | |||
502 | mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); | ||
503 | if (!mdev) | ||
504 | return ERR_PTR(err); | ||
505 | |||
506 | err = mpls_dev_sysctl_register(dev, mdev); | ||
507 | if (err) | ||
508 | goto free; | ||
509 | |||
510 | rcu_assign_pointer(dev->mpls_ptr, mdev); | ||
511 | |||
512 | return mdev; | ||
513 | |||
514 | free: | ||
515 | kfree(mdev); | ||
516 | return ERR_PTR(err); | ||
517 | } | ||
518 | |||
431 | static void mpls_ifdown(struct net_device *dev) | 519 | static void mpls_ifdown(struct net_device *dev) |
432 | { | 520 | { |
433 | struct mpls_route __rcu **platform_label; | 521 | struct mpls_route __rcu **platform_label; |
434 | struct net *net = dev_net(dev); | 522 | struct net *net = dev_net(dev); |
523 | struct mpls_dev *mdev; | ||
435 | unsigned index; | 524 | unsigned index; |
436 | 525 | ||
437 | platform_label = rtnl_dereference(net->mpls.platform_label); | 526 | platform_label = rtnl_dereference(net->mpls.platform_label); |
@@ -443,14 +532,35 @@ static void mpls_ifdown(struct net_device *dev) | |||
443 | continue; | 532 | continue; |
444 | rt->rt_dev = NULL; | 533 | rt->rt_dev = NULL; |
445 | } | 534 | } |
535 | |||
536 | mdev = mpls_dev_get(dev); | ||
537 | if (!mdev) | ||
538 | return; | ||
539 | |||
540 | mpls_dev_sysctl_unregister(mdev); | ||
541 | |||
542 | RCU_INIT_POINTER(dev->mpls_ptr, NULL); | ||
543 | |||
544 | kfree(mdev); | ||
446 | } | 545 | } |
447 | 546 | ||
448 | static int mpls_dev_notify(struct notifier_block *this, unsigned long event, | 547 | static int mpls_dev_notify(struct notifier_block *this, unsigned long event, |
449 | void *ptr) | 548 | void *ptr) |
450 | { | 549 | { |
451 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | 550 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
551 | struct mpls_dev *mdev; | ||
452 | 552 | ||
453 | switch(event) { | 553 | switch(event) { |
554 | case NETDEV_REGISTER: | ||
555 | /* For now just support ethernet devices */ | ||
556 | if ((dev->type == ARPHRD_ETHER) || | ||
557 | (dev->type == ARPHRD_LOOPBACK)) { | ||
558 | mdev = mpls_add_dev(dev); | ||
559 | if (IS_ERR(mdev)) | ||
560 | return notifier_from_errno(PTR_ERR(mdev)); | ||
561 | } | ||
562 | break; | ||
563 | |||
454 | case NETDEV_UNREGISTER: | 564 | case NETDEV_UNREGISTER: |
455 | mpls_ifdown(dev); | 565 | mpls_ifdown(dev); |
456 | break; | 566 | break; |
@@ -536,6 +646,15 @@ int nla_get_labels(const struct nlattr *nla, | |||
536 | if ((dec.bos != bos) || dec.ttl || dec.tc) | 646 | if ((dec.bos != bos) || dec.ttl || dec.tc) |
537 | return -EINVAL; | 647 | return -EINVAL; |
538 | 648 | ||
649 | switch (dec.label) { | ||
650 | case MPLS_LABEL_IMPLNULL: | ||
651 | /* RFC3032: This is a label that an LSR may | ||
652 | * assign and distribute, but which never | ||
653 | * actually appears in the encapsulation. | ||
654 | */ | ||
655 | return -EINVAL; | ||
656 | } | ||
657 | |||
539 | label[i] = dec.label; | 658 | label[i] = dec.label; |
540 | } | 659 | } |
541 | *labels = nla_labels; | 660 | *labels = nla_labels; |
@@ -816,7 +935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) | |||
816 | } | 935 | } |
817 | 936 | ||
818 | /* In case the predefined labels need to be populated */ | 937 | /* In case the predefined labels need to be populated */ |
819 | if (limit > LABEL_IPV4_EXPLICIT_NULL) { | 938 | if (limit > MPLS_LABEL_IPV4NULL) { |
820 | struct net_device *lo = net->loopback_dev; | 939 | struct net_device *lo = net->loopback_dev; |
821 | rt0 = mpls_rt_alloc(lo->addr_len); | 940 | rt0 = mpls_rt_alloc(lo->addr_len); |
822 | if (!rt0) | 941 | if (!rt0) |
@@ -826,7 +945,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) | |||
826 | rt0->rt_via_table = NEIGH_LINK_TABLE; | 945 | rt0->rt_via_table = NEIGH_LINK_TABLE; |
827 | memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); | 946 | memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); |
828 | } | 947 | } |
829 | if (limit > LABEL_IPV6_EXPLICIT_NULL) { | 948 | if (limit > MPLS_LABEL_IPV6NULL) { |
830 | struct net_device *lo = net->loopback_dev; | 949 | struct net_device *lo = net->loopback_dev; |
831 | rt2 = mpls_rt_alloc(lo->addr_len); | 950 | rt2 = mpls_rt_alloc(lo->addr_len); |
832 | if (!rt2) | 951 | if (!rt2) |
@@ -854,15 +973,15 @@ static int resize_platform_label_table(struct net *net, size_t limit) | |||
854 | memcpy(labels, old, cp_size); | 973 | memcpy(labels, old, cp_size); |
855 | 974 | ||
856 | /* If needed set the predefined labels */ | 975 | /* If needed set the predefined labels */ |
857 | if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) && | 976 | if ((old_limit <= MPLS_LABEL_IPV6NULL) && |
858 | (limit > LABEL_IPV6_EXPLICIT_NULL)) { | 977 | (limit > MPLS_LABEL_IPV6NULL)) { |
859 | RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2); | 978 | RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); |
860 | rt2 = NULL; | 979 | rt2 = NULL; |
861 | } | 980 | } |
862 | 981 | ||
863 | if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) && | 982 | if ((old_limit <= MPLS_LABEL_IPV4NULL) && |
864 | (limit > LABEL_IPV4_EXPLICIT_NULL)) { | 983 | (limit > MPLS_LABEL_IPV4NULL)) { |
865 | RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0); | 984 | RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); |
866 | rt0 = NULL; | 985 | rt0 = NULL; |
867 | } | 986 | } |
868 | 987 | ||
@@ -912,7 +1031,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write, | |||
912 | return ret; | 1031 | return ret; |
913 | } | 1032 | } |
914 | 1033 | ||
915 | static struct ctl_table mpls_table[] = { | 1034 | static const struct ctl_table mpls_table[] = { |
916 | { | 1035 | { |
917 | .procname = "platform_labels", | 1036 | .procname = "platform_labels", |
918 | .data = NULL, | 1037 | .data = NULL, |
diff --git a/net/mpls/internal.h b/net/mpls/internal.h index fb6de92052c4..b064c345042c 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h | |||
@@ -1,16 +1,6 @@ | |||
1 | #ifndef MPLS_INTERNAL_H | 1 | #ifndef MPLS_INTERNAL_H |
2 | #define MPLS_INTERNAL_H | 2 | #define MPLS_INTERNAL_H |
3 | 3 | ||
4 | #define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ | ||
5 | #define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */ | ||
6 | #define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ | ||
7 | #define LABEL_IMPLICIT_NULL 3 /* RFC3032 */ | ||
8 | #define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ | ||
9 | #define LABEL_GAL 13 /* RFC5586 */ | ||
10 | #define LABEL_OAM_ALERT 14 /* RFC3429 */ | ||
11 | #define LABEL_EXTENSION 15 /* RFC7274 */ | ||
12 | |||
13 | |||
14 | struct mpls_shim_hdr { | 4 | struct mpls_shim_hdr { |
15 | __be32 label_stack_entry; | 5 | __be32 label_stack_entry; |
16 | }; | 6 | }; |
@@ -22,6 +12,12 @@ struct mpls_entry_decoded { | |||
22 | u8 bos; | 12 | u8 bos; |
23 | }; | 13 | }; |
24 | 14 | ||
15 | struct mpls_dev { | ||
16 | int input_enabled; | ||
17 | |||
18 | struct ctl_table_header *sysctl; | ||
19 | }; | ||
20 | |||
25 | struct sk_buff; | 21 | struct sk_buff; |
26 | 22 | ||
27 | static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) | 23 | static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) |
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 78af83bc9c8e..ad9d11fb29fd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c | |||
@@ -4340,7 +4340,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, | |||
4340 | case NFT_CONTINUE: | 4340 | case NFT_CONTINUE: |
4341 | case NFT_BREAK: | 4341 | case NFT_BREAK: |
4342 | case NFT_RETURN: | 4342 | case NFT_RETURN: |
4343 | desc->len = sizeof(data->verdict); | ||
4344 | break; | 4343 | break; |
4345 | case NFT_JUMP: | 4344 | case NFT_JUMP: |
4346 | case NFT_GOTO: | 4345 | case NFT_GOTO: |
@@ -4355,10 +4354,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, | |||
4355 | 4354 | ||
4356 | chain->use++; | 4355 | chain->use++; |
4357 | data->verdict.chain = chain; | 4356 | data->verdict.chain = chain; |
4358 | desc->len = sizeof(data); | ||
4359 | break; | 4357 | break; |
4360 | } | 4358 | } |
4361 | 4359 | ||
4360 | desc->len = sizeof(data->verdict); | ||
4362 | desc->type = NFT_DATA_VERDICT; | 4361 | desc->type = NFT_DATA_VERDICT; |
4363 | return 0; | 4362 | return 0; |
4364 | } | 4363 | } |
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 57d3e1af5630..0522fc9bfb0a 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c | |||
@@ -63,6 +63,8 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) | |||
63 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) | 63 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) |
64 | goto nla_put_failure; | 64 | goto nla_put_failure; |
65 | break; | 65 | break; |
66 | default: | ||
67 | break; | ||
66 | } | 68 | } |
67 | 69 | ||
68 | return 0; | 70 | return 0; |
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 62cabee42fbe..635dbba93d01 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c | |||
@@ -108,6 +108,8 @@ static int nft_reject_inet_dump(struct sk_buff *skb, | |||
108 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) | 108 | if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) |
109 | goto nla_put_failure; | 109 | goto nla_put_failure; |
110 | break; | 110 | break; |
111 | default: | ||
112 | break; | ||
111 | } | 113 | } |
112 | 114 | ||
113 | return 0; | 115 | return 0; |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19909d0786a2..daa0b818174b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, | |||
1629 | if (data == NULL) | 1629 | if (data == NULL) |
1630 | return NULL; | 1630 | return NULL; |
1631 | 1631 | ||
1632 | skb = build_skb(data, size); | 1632 | skb = __build_skb(data, size); |
1633 | if (skb == NULL) | 1633 | if (skb == NULL) |
1634 | vfree(data); | 1634 | vfree(data); |
1635 | else { | 1635 | else |
1636 | skb->head_frag = 0; | ||
1637 | skb->destructor = netlink_skb_destructor; | 1636 | skb->destructor = netlink_skb_destructor; |
1638 | } | ||
1639 | 1637 | ||
1640 | return skb; | 1638 | return skb; |
1641 | } | 1639 | } |
@@ -3141,7 +3139,6 @@ static const struct rhashtable_params netlink_rhashtable_params = { | |||
3141 | .key_len = netlink_compare_arg_len, | 3139 | .key_len = netlink_compare_arg_len, |
3142 | .obj_hashfn = netlink_hash, | 3140 | .obj_hashfn = netlink_hash, |
3143 | .obj_cmpfn = netlink_compare, | 3141 | .obj_cmpfn = netlink_compare, |
3144 | .max_size = 65536, | ||
3145 | .automatic_shrinking = true, | 3142 | .automatic_shrinking = true, |
3146 | }; | 3143 | }; |
3147 | 3144 | ||
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5102c3cc4eec..b5989c6ee551 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -2311,11 +2311,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
2311 | tlen = dev->needed_tailroom; | 2311 | tlen = dev->needed_tailroom; |
2312 | skb = sock_alloc_send_skb(&po->sk, | 2312 | skb = sock_alloc_send_skb(&po->sk, |
2313 | hlen + tlen + sizeof(struct sockaddr_ll), | 2313 | hlen + tlen + sizeof(struct sockaddr_ll), |
2314 | 0, &err); | 2314 | !need_wait, &err); |
2315 | 2315 | ||
2316 | if (unlikely(skb == NULL)) | 2316 | if (unlikely(skb == NULL)) { |
2317 | /* we assume the socket was initially writeable ... */ | ||
2318 | if (likely(len_sum > 0)) | ||
2319 | err = len_sum; | ||
2317 | goto out_status; | 2320 | goto out_status; |
2318 | 2321 | } | |
2319 | tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, | 2322 | tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, |
2320 | addr, hlen); | 2323 | addr, hlen); |
2321 | if (tp_len > dev->mtu + dev->hard_header_len) { | 2324 | if (tp_len > dev->mtu + dev->hard_header_len) { |
diff --git a/net/rds/connection.c b/net/rds/connection.c index 14f041398ca1..da6da57e5f36 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c | |||
@@ -126,7 +126,10 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
126 | struct rds_transport *loop_trans; | 126 | struct rds_transport *loop_trans; |
127 | unsigned long flags; | 127 | unsigned long flags; |
128 | int ret; | 128 | int ret; |
129 | struct rds_transport *otrans = trans; | ||
129 | 130 | ||
131 | if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP) | ||
132 | goto new_conn; | ||
130 | rcu_read_lock(); | 133 | rcu_read_lock(); |
131 | conn = rds_conn_lookup(head, laddr, faddr, trans); | 134 | conn = rds_conn_lookup(head, laddr, faddr, trans); |
132 | if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && | 135 | if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && |
@@ -142,6 +145,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
142 | if (conn) | 145 | if (conn) |
143 | goto out; | 146 | goto out; |
144 | 147 | ||
148 | new_conn: | ||
145 | conn = kmem_cache_zalloc(rds_conn_slab, gfp); | 149 | conn = kmem_cache_zalloc(rds_conn_slab, gfp); |
146 | if (!conn) { | 150 | if (!conn) { |
147 | conn = ERR_PTR(-ENOMEM); | 151 | conn = ERR_PTR(-ENOMEM); |
@@ -230,13 +234,22 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
230 | /* Creating normal conn */ | 234 | /* Creating normal conn */ |
231 | struct rds_connection *found; | 235 | struct rds_connection *found; |
232 | 236 | ||
233 | found = rds_conn_lookup(head, laddr, faddr, trans); | 237 | if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP) |
238 | found = NULL; | ||
239 | else | ||
240 | found = rds_conn_lookup(head, laddr, faddr, trans); | ||
234 | if (found) { | 241 | if (found) { |
235 | trans->conn_free(conn->c_transport_data); | 242 | trans->conn_free(conn->c_transport_data); |
236 | kmem_cache_free(rds_conn_slab, conn); | 243 | kmem_cache_free(rds_conn_slab, conn); |
237 | conn = found; | 244 | conn = found; |
238 | } else { | 245 | } else { |
239 | hlist_add_head_rcu(&conn->c_hash_node, head); | 246 | if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) || |
247 | (otrans->t_type != RDS_TRANS_TCP)) { | ||
248 | /* Only the active side should be added to | ||
249 | * reconnect list for TCP. | ||
250 | */ | ||
251 | hlist_add_head_rcu(&conn->c_hash_node, head); | ||
252 | } | ||
240 | rds_cong_add_conn(conn); | 253 | rds_cong_add_conn(conn); |
241 | rds_conn_count++; | 254 | rds_conn_count++; |
242 | } | 255 | } |
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 31b74f5e61ad..8a09ee7db3c1 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c | |||
@@ -183,8 +183,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even | |||
183 | 183 | ||
184 | /* If the peer gave us the last packet it saw, process this as if | 184 | /* If the peer gave us the last packet it saw, process this as if |
185 | * we had received a regular ACK. */ | 185 | * we had received a regular ACK. */ |
186 | if (dp && dp->dp_ack_seq) | 186 | if (dp) { |
187 | rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); | 187 | /* dp structure start is not guaranteed to be 8 bytes aligned. |
188 | * Since dp_ack_seq is 64-bit extended load operations can be | ||
189 | * used so go through get_unaligned to avoid unaligned errors. | ||
190 | */ | ||
191 | __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq); | ||
192 | |||
193 | if (dp_ack_seq) | ||
194 | rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq), | ||
195 | NULL); | ||
196 | } | ||
188 | 197 | ||
189 | rds_connect_complete(conn); | 198 | rds_connect_complete(conn); |
190 | } | 199 | } |
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index f9f564a6c960..973109c7b8e8 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c | |||
@@ -62,6 +62,7 @@ void rds_tcp_state_change(struct sock *sk) | |||
62 | case TCP_ESTABLISHED: | 62 | case TCP_ESTABLISHED: |
63 | rds_connect_complete(conn); | 63 | rds_connect_complete(conn); |
64 | break; | 64 | break; |
65 | case TCP_CLOSE_WAIT: | ||
65 | case TCP_CLOSE: | 66 | case TCP_CLOSE: |
66 | rds_conn_drop(conn); | 67 | rds_conn_drop(conn); |
67 | default: | 68 | default: |
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 23ab4dcd1d9f..0da49e34495f 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c | |||
@@ -45,12 +45,45 @@ static void rds_tcp_accept_worker(struct work_struct *work); | |||
45 | static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker); | 45 | static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker); |
46 | static struct socket *rds_tcp_listen_sock; | 46 | static struct socket *rds_tcp_listen_sock; |
47 | 47 | ||
48 | static int rds_tcp_keepalive(struct socket *sock) | ||
49 | { | ||
50 | /* values below based on xs_udp_default_timeout */ | ||
51 | int keepidle = 5; /* send a probe 'keepidle' secs after last data */ | ||
52 | int keepcnt = 5; /* number of unack'ed probes before declaring dead */ | ||
53 | int keepalive = 1; | ||
54 | int ret = 0; | ||
55 | |||
56 | ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, | ||
57 | (char *)&keepalive, sizeof(keepalive)); | ||
58 | if (ret < 0) | ||
59 | goto bail; | ||
60 | |||
61 | ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, | ||
62 | (char *)&keepcnt, sizeof(keepcnt)); | ||
63 | if (ret < 0) | ||
64 | goto bail; | ||
65 | |||
66 | ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, | ||
67 | (char *)&keepidle, sizeof(keepidle)); | ||
68 | if (ret < 0) | ||
69 | goto bail; | ||
70 | |||
71 | /* KEEPINTVL is the interval between successive probes. We follow | ||
72 | * the model in xs_tcp_finish_connecting() and re-use keepidle. | ||
73 | */ | ||
74 | ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, | ||
75 | (char *)&keepidle, sizeof(keepidle)); | ||
76 | bail: | ||
77 | return ret; | ||
78 | } | ||
79 | |||
48 | static int rds_tcp_accept_one(struct socket *sock) | 80 | static int rds_tcp_accept_one(struct socket *sock) |
49 | { | 81 | { |
50 | struct socket *new_sock = NULL; | 82 | struct socket *new_sock = NULL; |
51 | struct rds_connection *conn; | 83 | struct rds_connection *conn; |
52 | int ret; | 84 | int ret; |
53 | struct inet_sock *inet; | 85 | struct inet_sock *inet; |
86 | struct rds_tcp_connection *rs_tcp; | ||
54 | 87 | ||
55 | ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, | 88 | ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, |
56 | sock->sk->sk_protocol, &new_sock); | 89 | sock->sk->sk_protocol, &new_sock); |
@@ -63,6 +96,10 @@ static int rds_tcp_accept_one(struct socket *sock) | |||
63 | if (ret < 0) | 96 | if (ret < 0) |
64 | goto out; | 97 | goto out; |
65 | 98 | ||
99 | ret = rds_tcp_keepalive(new_sock); | ||
100 | if (ret < 0) | ||
101 | goto out; | ||
102 | |||
66 | rds_tcp_tune(new_sock); | 103 | rds_tcp_tune(new_sock); |
67 | 104 | ||
68 | inet = inet_sk(new_sock->sk); | 105 | inet = inet_sk(new_sock->sk); |
@@ -77,6 +114,15 @@ static int rds_tcp_accept_one(struct socket *sock) | |||
77 | ret = PTR_ERR(conn); | 114 | ret = PTR_ERR(conn); |
78 | goto out; | 115 | goto out; |
79 | } | 116 | } |
117 | /* An incoming SYN request came in, and TCP just accepted it. | ||
118 | * We always create a new conn for listen side of TCP, and do not | ||
119 | * add it to the c_hash_list. | ||
120 | * | ||
121 | * If the client reboots, this conn will need to be cleaned up. | ||
122 | * rds_tcp_state_change() will do that cleanup | ||
123 | */ | ||
124 | rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; | ||
125 | WARN_ON(!rs_tcp || rs_tcp->t_sock); | ||
80 | 126 | ||
81 | /* | 127 | /* |
82 | * see the comment above rds_queue_delayed_reconnect() | 128 | * see the comment above rds_queue_delayed_reconnect() |
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 4d2cede17468..dc6a2d324bd8 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c | |||
@@ -38,6 +38,9 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, | |||
38 | struct tcf_bpf *prog = act->priv; | 38 | struct tcf_bpf *prog = act->priv; |
39 | int action, filter_res; | 39 | int action, filter_res; |
40 | 40 | ||
41 | if (unlikely(!skb_mac_header_was_set(skb))) | ||
42 | return TC_ACT_UNSPEC; | ||
43 | |||
41 | spin_lock(&prog->tcf_lock); | 44 | spin_lock(&prog->tcf_lock); |
42 | 45 | ||
43 | prog->tcf_tm.lastuse = jiffies; | 46 | prog->tcf_tm.lastuse = jiffies; |
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8e472518f9f6..295d14bd6c67 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c | |||
@@ -63,7 +63,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, | |||
63 | skb->mark = c->mark; | 63 | skb->mark = c->mark; |
64 | /* using overlimits stats to count how many packets marked */ | 64 | /* using overlimits stats to count how many packets marked */ |
65 | ca->tcf_qstats.overlimits++; | 65 | ca->tcf_qstats.overlimits++; |
66 | nf_ct_put(c); | ||
67 | goto out; | 66 | goto out; |
68 | } | 67 | } |
69 | 68 | ||
@@ -82,7 +81,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, | |||
82 | nf_ct_put(c); | 81 | nf_ct_put(c); |
83 | 82 | ||
84 | out: | 83 | out: |
85 | skb->nfct = NULL; | ||
86 | spin_unlock(&ca->tcf_lock); | 84 | spin_unlock(&ca->tcf_lock); |
87 | return ca->tcf_action; | 85 | return ca->tcf_action; |
88 | } | 86 | } |
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5953517ec059..3f63ceac8e01 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c | |||
@@ -157,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, | |||
157 | 157 | ||
158 | if (!(at & AT_EGRESS)) { | 158 | if (!(at & AT_EGRESS)) { |
159 | if (m->tcfm_ok_push) | 159 | if (m->tcfm_ok_push) |
160 | skb_push(skb2, skb2->dev->hard_header_len); | 160 | skb_push(skb2, skb->mac_len); |
161 | } | 161 | } |
162 | 162 | ||
163 | /* mirror is always swallowed */ | 163 | /* mirror is always swallowed */ |
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8b0470e418dc..b6ef9a04de06 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c | |||
@@ -308,12 +308,11 @@ replay: | |||
308 | case RTM_DELTFILTER: | 308 | case RTM_DELTFILTER: |
309 | err = tp->ops->delete(tp, fh); | 309 | err = tp->ops->delete(tp, fh); |
310 | if (err == 0) { | 310 | if (err == 0) { |
311 | tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); | 311 | struct tcf_proto *next = rtnl_dereference(tp->next); |
312 | if (tcf_destroy(tp, false)) { | ||
313 | struct tcf_proto *next = rtnl_dereference(tp->next); | ||
314 | 312 | ||
313 | tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); | ||
314 | if (tcf_destroy(tp, false)) | ||
315 | RCU_INIT_POINTER(*back, next); | 315 | RCU_INIT_POINTER(*back, next); |
316 | } | ||
317 | } | 316 | } |
318 | goto errout; | 317 | goto errout; |
319 | case RTM_GETTFILTER: | 318 | case RTM_GETTFILTER: |
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5c4171c5d2bd..91bd9c19471d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c | |||
@@ -66,6 +66,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, | |||
66 | struct cls_bpf_prog *prog; | 66 | struct cls_bpf_prog *prog; |
67 | int ret = -1; | 67 | int ret = -1; |
68 | 68 | ||
69 | if (unlikely(!skb_mac_header_was_set(skb))) | ||
70 | return -1; | ||
71 | |||
69 | /* Needed here for accessing maps. */ | 72 | /* Needed here for accessing maps. */ |
70 | rcu_read_lock(); | 73 | rcu_read_lock(); |
71 | list_for_each_entry_rcu(prog, &head->plist, link) { | 74 | list_for_each_entry_rcu(prog, &head->plist, link) { |
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index de28f8e968e8..7a0bdb16ac92 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c | |||
@@ -164,7 +164,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt) | |||
164 | 164 | ||
165 | sch->limit = DEFAULT_CODEL_LIMIT; | 165 | sch->limit = DEFAULT_CODEL_LIMIT; |
166 | 166 | ||
167 | codel_params_init(&q->params); | 167 | codel_params_init(&q->params, sch); |
168 | codel_vars_init(&q->vars); | 168 | codel_vars_init(&q->vars); |
169 | codel_stats_init(&q->stats); | 169 | codel_stats_init(&q->stats); |
170 | 170 | ||
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 1e52decb7b59..c244c45b78d7 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c | |||
@@ -391,7 +391,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) | |||
391 | q->perturbation = prandom_u32(); | 391 | q->perturbation = prandom_u32(); |
392 | INIT_LIST_HEAD(&q->new_flows); | 392 | INIT_LIST_HEAD(&q->new_flows); |
393 | INIT_LIST_HEAD(&q->old_flows); | 393 | INIT_LIST_HEAD(&q->old_flows); |
394 | codel_params_init(&q->cparams); | 394 | codel_params_init(&q->cparams, sch); |
395 | codel_stats_init(&q->cstats); | 395 | codel_stats_init(&q->cstats); |
396 | q->cparams.ecn = true; | 396 | q->cparams.ecn = true; |
397 | 397 | ||
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index a4ca4517cdc8..634529e0ce6b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c | |||
@@ -229,7 +229,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
229 | break; | 229 | break; |
230 | } | 230 | } |
231 | 231 | ||
232 | if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { | 232 | if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) { |
233 | q->backlog += qdisc_pkt_len(skb); | 233 | q->backlog += qdisc_pkt_len(skb); |
234 | return qdisc_enqueue_tail(skb, sch); | 234 | return qdisc_enqueue_tail(skb, sch); |
235 | } | 235 | } |
@@ -553,7 +553,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
553 | 553 | ||
554 | opt.limit = q->limit; | 554 | opt.limit = q->limit; |
555 | opt.DP = q->DP; | 555 | opt.DP = q->DP; |
556 | opt.backlog = q->backlog; | 556 | opt.backlog = gred_backlog(table, q, sch); |
557 | opt.prio = q->prio; | 557 | opt.prio = q->prio; |
558 | opt.qth_min = q->parms.qth_min >> q->parms.Wlog; | 558 | opt.qth_min = q->parms.qth_min >> q->parms.Wlog; |
559 | opt.qth_max = q->parms.qth_max >> q->parms.Wlog; | 559 | opt.qth_max = q->parms.qth_max >> q->parms.Wlog; |
diff --git a/net/socket.c b/net/socket.c index 3e33959f3ce5..884e32997698 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -312,7 +312,7 @@ static const struct super_operations sockfs_ops = { | |||
312 | static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) | 312 | static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) |
313 | { | 313 | { |
314 | return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", | 314 | return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", |
315 | dentry->d_inode->i_ino); | 315 | d_inode(dentry)->i_ino); |
316 | } | 316 | } |
317 | 317 | ||
318 | static const struct dentry_operations sockfs_dentry_operations = { | 318 | static const struct dentry_operations sockfs_dentry_operations = { |
@@ -375,7 +375,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) | |||
375 | &socket_file_ops); | 375 | &socket_file_ops); |
376 | if (unlikely(IS_ERR(file))) { | 376 | if (unlikely(IS_ERR(file))) { |
377 | /* drop dentry, keep inode */ | 377 | /* drop dentry, keep inode */ |
378 | ihold(path.dentry->d_inode); | 378 | ihold(d_inode(path.dentry)); |
379 | path_put(&path); | 379 | path_put(&path); |
380 | return file; | 380 | return file; |
381 | } | 381 | } |
@@ -497,7 +497,7 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, | |||
497 | ssize_t len; | 497 | ssize_t len; |
498 | ssize_t used = 0; | 498 | ssize_t used = 0; |
499 | 499 | ||
500 | len = security_inode_listsecurity(dentry->d_inode, buffer, size); | 500 | len = security_inode_listsecurity(d_inode(dentry), buffer, size); |
501 | if (len < 0) | 501 | if (len < 0) |
502 | return len; | 502 | return len; |
503 | used += len; | 503 | used += len; |
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 1ec19f6f0c2b..eeeba5adee6d 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c | |||
@@ -793,20 +793,26 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, | |||
793 | { | 793 | { |
794 | u32 value_follows; | 794 | u32 value_follows; |
795 | int err; | 795 | int err; |
796 | struct page *scratch; | ||
797 | |||
798 | scratch = alloc_page(GFP_KERNEL); | ||
799 | if (!scratch) | ||
800 | return -ENOMEM; | ||
801 | xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE); | ||
796 | 802 | ||
797 | /* res->status */ | 803 | /* res->status */ |
798 | err = gssx_dec_status(xdr, &res->status); | 804 | err = gssx_dec_status(xdr, &res->status); |
799 | if (err) | 805 | if (err) |
800 | return err; | 806 | goto out_free; |
801 | 807 | ||
802 | /* res->context_handle */ | 808 | /* res->context_handle */ |
803 | err = gssx_dec_bool(xdr, &value_follows); | 809 | err = gssx_dec_bool(xdr, &value_follows); |
804 | if (err) | 810 | if (err) |
805 | return err; | 811 | goto out_free; |
806 | if (value_follows) { | 812 | if (value_follows) { |
807 | err = gssx_dec_ctx(xdr, res->context_handle); | 813 | err = gssx_dec_ctx(xdr, res->context_handle); |
808 | if (err) | 814 | if (err) |
809 | return err; | 815 | goto out_free; |
810 | } else { | 816 | } else { |
811 | res->context_handle = NULL; | 817 | res->context_handle = NULL; |
812 | } | 818 | } |
@@ -814,11 +820,11 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, | |||
814 | /* res->output_token */ | 820 | /* res->output_token */ |
815 | err = gssx_dec_bool(xdr, &value_follows); | 821 | err = gssx_dec_bool(xdr, &value_follows); |
816 | if (err) | 822 | if (err) |
817 | return err; | 823 | goto out_free; |
818 | if (value_follows) { | 824 | if (value_follows) { |
819 | err = gssx_dec_buffer(xdr, res->output_token); | 825 | err = gssx_dec_buffer(xdr, res->output_token); |
820 | if (err) | 826 | if (err) |
821 | return err; | 827 | goto out_free; |
822 | } else { | 828 | } else { |
823 | res->output_token = NULL; | 829 | res->output_token = NULL; |
824 | } | 830 | } |
@@ -826,14 +832,17 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, | |||
826 | /* res->delegated_cred_handle */ | 832 | /* res->delegated_cred_handle */ |
827 | err = gssx_dec_bool(xdr, &value_follows); | 833 | err = gssx_dec_bool(xdr, &value_follows); |
828 | if (err) | 834 | if (err) |
829 | return err; | 835 | goto out_free; |
830 | if (value_follows) { | 836 | if (value_follows) { |
831 | /* we do not support upcall servers sending this data. */ | 837 | /* we do not support upcall servers sending this data. */ |
832 | return -EINVAL; | 838 | err = -EINVAL; |
839 | goto out_free; | ||
833 | } | 840 | } |
834 | 841 | ||
835 | /* res->options */ | 842 | /* res->options */ |
836 | err = gssx_dec_option_array(xdr, &res->options); | 843 | err = gssx_dec_option_array(xdr, &res->options); |
837 | 844 | ||
845 | out_free: | ||
846 | __free_page(scratch); | ||
838 | return err; | 847 | return err; |
839 | } | 848 | } |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2d12b76b5a64..d81186d34558 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -94,7 +94,7 @@ rpc_timeout_upcall_queue(struct work_struct *work) | |||
94 | } | 94 | } |
95 | dentry = dget(pipe->dentry); | 95 | dentry = dget(pipe->dentry); |
96 | spin_unlock(&pipe->lock); | 96 | spin_unlock(&pipe->lock); |
97 | rpc_purge_list(dentry ? &RPC_I(dentry->d_inode)->waitq : NULL, | 97 | rpc_purge_list(dentry ? &RPC_I(d_inode(dentry))->waitq : NULL, |
98 | &free_list, destroy_msg, -ETIMEDOUT); | 98 | &free_list, destroy_msg, -ETIMEDOUT); |
99 | dput(dentry); | 99 | dput(dentry); |
100 | } | 100 | } |
@@ -152,7 +152,7 @@ rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) | |||
152 | dentry = dget(pipe->dentry); | 152 | dentry = dget(pipe->dentry); |
153 | spin_unlock(&pipe->lock); | 153 | spin_unlock(&pipe->lock); |
154 | if (dentry) { | 154 | if (dentry) { |
155 | wake_up(&RPC_I(dentry->d_inode)->waitq); | 155 | wake_up(&RPC_I(d_inode(dentry))->waitq); |
156 | dput(dentry); | 156 | dput(dentry); |
157 | } | 157 | } |
158 | return res; | 158 | return res; |
@@ -591,7 +591,7 @@ static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, | |||
591 | err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); | 591 | err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); |
592 | if (err) | 592 | if (err) |
593 | return err; | 593 | return err; |
594 | rpci = RPC_I(dentry->d_inode); | 594 | rpci = RPC_I(d_inode(dentry)); |
595 | rpci->private = private; | 595 | rpci->private = private; |
596 | rpci->pipe = pipe; | 596 | rpci->pipe = pipe; |
597 | fsnotify_create(dir, dentry); | 597 | fsnotify_create(dir, dentry); |
@@ -616,7 +616,7 @@ int rpc_rmdir(struct dentry *dentry) | |||
616 | int error; | 616 | int error; |
617 | 617 | ||
618 | parent = dget_parent(dentry); | 618 | parent = dget_parent(dentry); |
619 | dir = parent->d_inode; | 619 | dir = d_inode(parent); |
620 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 620 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
621 | error = __rpc_rmdir(dir, dentry); | 621 | error = __rpc_rmdir(dir, dentry); |
622 | mutex_unlock(&dir->i_mutex); | 622 | mutex_unlock(&dir->i_mutex); |
@@ -638,7 +638,7 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) | |||
638 | 638 | ||
639 | static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) | 639 | static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) |
640 | { | 640 | { |
641 | struct inode *inode = dentry->d_inode; | 641 | struct inode *inode = d_inode(dentry); |
642 | 642 | ||
643 | rpc_close_pipes(inode); | 643 | rpc_close_pipes(inode); |
644 | return __rpc_unlink(dir, dentry); | 644 | return __rpc_unlink(dir, dentry); |
@@ -654,7 +654,7 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, | |||
654 | if (!dentry) | 654 | if (!dentry) |
655 | return ERR_PTR(-ENOMEM); | 655 | return ERR_PTR(-ENOMEM); |
656 | } | 656 | } |
657 | if (dentry->d_inode == NULL) | 657 | if (d_really_is_negative(dentry)) |
658 | return dentry; | 658 | return dentry; |
659 | dput(dentry); | 659 | dput(dentry); |
660 | return ERR_PTR(-EEXIST); | 660 | return ERR_PTR(-EEXIST); |
@@ -667,7 +667,7 @@ static void __rpc_depopulate(struct dentry *parent, | |||
667 | const struct rpc_filelist *files, | 667 | const struct rpc_filelist *files, |
668 | int start, int eof) | 668 | int start, int eof) |
669 | { | 669 | { |
670 | struct inode *dir = parent->d_inode; | 670 | struct inode *dir = d_inode(parent); |
671 | struct dentry *dentry; | 671 | struct dentry *dentry; |
672 | struct qstr name; | 672 | struct qstr name; |
673 | int i; | 673 | int i; |
@@ -679,9 +679,9 @@ static void __rpc_depopulate(struct dentry *parent, | |||
679 | 679 | ||
680 | if (dentry == NULL) | 680 | if (dentry == NULL) |
681 | continue; | 681 | continue; |
682 | if (dentry->d_inode == NULL) | 682 | if (d_really_is_negative(dentry)) |
683 | goto next; | 683 | goto next; |
684 | switch (dentry->d_inode->i_mode & S_IFMT) { | 684 | switch (d_inode(dentry)->i_mode & S_IFMT) { |
685 | default: | 685 | default: |
686 | BUG(); | 686 | BUG(); |
687 | case S_IFREG: | 687 | case S_IFREG: |
@@ -699,7 +699,7 @@ static void rpc_depopulate(struct dentry *parent, | |||
699 | const struct rpc_filelist *files, | 699 | const struct rpc_filelist *files, |
700 | int start, int eof) | 700 | int start, int eof) |
701 | { | 701 | { |
702 | struct inode *dir = parent->d_inode; | 702 | struct inode *dir = d_inode(parent); |
703 | 703 | ||
704 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); | 704 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); |
705 | __rpc_depopulate(parent, files, start, eof); | 705 | __rpc_depopulate(parent, files, start, eof); |
@@ -711,7 +711,7 @@ static int rpc_populate(struct dentry *parent, | |||
711 | int start, int eof, | 711 | int start, int eof, |
712 | void *private) | 712 | void *private) |
713 | { | 713 | { |
714 | struct inode *dir = parent->d_inode; | 714 | struct inode *dir = d_inode(parent); |
715 | struct dentry *dentry; | 715 | struct dentry *dentry; |
716 | int i, err; | 716 | int i, err; |
717 | 717 | ||
@@ -754,7 +754,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent, | |||
754 | int (*populate)(struct dentry *, void *), void *args_populate) | 754 | int (*populate)(struct dentry *, void *), void *args_populate) |
755 | { | 755 | { |
756 | struct dentry *dentry; | 756 | struct dentry *dentry; |
757 | struct inode *dir = parent->d_inode; | 757 | struct inode *dir = d_inode(parent); |
758 | int error; | 758 | int error; |
759 | 759 | ||
760 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 760 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
@@ -787,7 +787,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, | |||
787 | int error; | 787 | int error; |
788 | 788 | ||
789 | parent = dget_parent(dentry); | 789 | parent = dget_parent(dentry); |
790 | dir = parent->d_inode; | 790 | dir = d_inode(parent); |
791 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 791 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
792 | if (depopulate != NULL) | 792 | if (depopulate != NULL) |
793 | depopulate(dentry); | 793 | depopulate(dentry); |
@@ -819,7 +819,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, | |||
819 | void *private, struct rpc_pipe *pipe) | 819 | void *private, struct rpc_pipe *pipe) |
820 | { | 820 | { |
821 | struct dentry *dentry; | 821 | struct dentry *dentry; |
822 | struct inode *dir = parent->d_inode; | 822 | struct inode *dir = d_inode(parent); |
823 | umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; | 823 | umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; |
824 | int err; | 824 | int err; |
825 | 825 | ||
@@ -864,7 +864,7 @@ rpc_unlink(struct dentry *dentry) | |||
864 | int error = 0; | 864 | int error = 0; |
865 | 865 | ||
866 | parent = dget_parent(dentry); | 866 | parent = dget_parent(dentry); |
867 | dir = parent->d_inode; | 867 | dir = d_inode(parent); |
868 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 868 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
869 | error = __rpc_rmpipe(dir, dentry); | 869 | error = __rpc_rmpipe(dir, dentry); |
870 | mutex_unlock(&dir->i_mutex); | 870 | mutex_unlock(&dir->i_mutex); |
@@ -1375,7 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) | |||
1375 | struct dentry *clnt_dir = pipe_dentry->d_parent; | 1375 | struct dentry *clnt_dir = pipe_dentry->d_parent; |
1376 | struct dentry *gssd_dir = clnt_dir->d_parent; | 1376 | struct dentry *gssd_dir = clnt_dir->d_parent; |
1377 | 1377 | ||
1378 | __rpc_rmpipe(clnt_dir->d_inode, pipe_dentry); | 1378 | __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); |
1379 | __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); | 1379 | __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); |
1380 | __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); | 1380 | __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); |
1381 | dput(pipe_dentry); | 1381 | dput(pipe_dentry); |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b91fd9c597b4..337ca851a350 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -89,8 +89,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) | |||
89 | if (!task->tk_timeout) | 89 | if (!task->tk_timeout) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | dprintk("RPC: %5u setting alarm for %lu ms\n", | 92 | dprintk("RPC: %5u setting alarm for %u ms\n", |
93 | task->tk_pid, task->tk_timeout * 1000 / HZ); | 93 | task->tk_pid, jiffies_to_msecs(task->tk_timeout)); |
94 | 94 | ||
95 | task->u.tk_wait.expires = jiffies + task->tk_timeout; | 95 | task->u.tk_wait.expires = jiffies + task->tk_timeout; |
96 | if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) | 96 | if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9949722d99ce..1d4fe24af06a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -326,6 +326,15 @@ out_unlock: | |||
326 | xprt_clear_locked(xprt); | 326 | xprt_clear_locked(xprt); |
327 | } | 327 | } |
328 | 328 | ||
329 | static void xprt_task_clear_bytes_sent(struct rpc_task *task) | ||
330 | { | ||
331 | if (task != NULL) { | ||
332 | struct rpc_rqst *req = task->tk_rqstp; | ||
333 | if (req != NULL) | ||
334 | req->rq_bytes_sent = 0; | ||
335 | } | ||
336 | } | ||
337 | |||
329 | /** | 338 | /** |
330 | * xprt_release_xprt - allow other requests to use a transport | 339 | * xprt_release_xprt - allow other requests to use a transport |
331 | * @xprt: transport with other tasks potentially waiting | 340 | * @xprt: transport with other tasks potentially waiting |
@@ -336,11 +345,7 @@ out_unlock: | |||
336 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) | 345 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) |
337 | { | 346 | { |
338 | if (xprt->snd_task == task) { | 347 | if (xprt->snd_task == task) { |
339 | if (task != NULL) { | 348 | xprt_task_clear_bytes_sent(task); |
340 | struct rpc_rqst *req = task->tk_rqstp; | ||
341 | if (req != NULL) | ||
342 | req->rq_bytes_sent = 0; | ||
343 | } | ||
344 | xprt_clear_locked(xprt); | 349 | xprt_clear_locked(xprt); |
345 | __xprt_lock_write_next(xprt); | 350 | __xprt_lock_write_next(xprt); |
346 | } | 351 | } |
@@ -358,11 +363,7 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt); | |||
358 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) | 363 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) |
359 | { | 364 | { |
360 | if (xprt->snd_task == task) { | 365 | if (xprt->snd_task == task) { |
361 | if (task != NULL) { | 366 | xprt_task_clear_bytes_sent(task); |
362 | struct rpc_rqst *req = task->tk_rqstp; | ||
363 | if (req != NULL) | ||
364 | req->rq_bytes_sent = 0; | ||
365 | } | ||
366 | xprt_clear_locked(xprt); | 367 | xprt_clear_locked(xprt); |
367 | __xprt_lock_write_next_cong(xprt); | 368 | __xprt_lock_write_next_cong(xprt); |
368 | } | 369 | } |
@@ -700,6 +701,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt, | |||
700 | goto out; | 701 | goto out; |
701 | if (xprt->snd_task != task) | 702 | if (xprt->snd_task != task) |
702 | goto out; | 703 | goto out; |
704 | xprt_task_clear_bytes_sent(task); | ||
703 | xprt->snd_task = cookie; | 705 | xprt->snd_task = cookie; |
704 | ret = true; | 706 | ret = true; |
705 | out: | 707 | out: |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index da5136fd5694..579f72bbcf4b 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o |
2 | 2 | ||
3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o \ |
4 | fmr_ops.o frwr_ops.o physical_ops.o | ||
4 | 5 | ||
5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o | 6 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o |
6 | 7 | ||
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c new file mode 100644 index 000000000000..302d4ebf6fbf --- /dev/null +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -0,0 +1,208 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Memory Regions (FMR). | ||
7 | * Referred to sometimes as MTHCAFMR mode. | ||
8 | * | ||
9 | * FMR uses synchronous memory registration and deregistration. | ||
10 | * FMR registration is known to be fast, but FMR deregistration | ||
11 | * can take tens of usecs to complete. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | /* Maximum scatter/gather per FMR */ | ||
21 | #define RPCRDMA_MAX_FMR_SGES (64) | ||
22 | |||
23 | static int | ||
24 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
25 | struct rpcrdma_create_data_internal *cdata) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | /* FMR mode conveys up to 64 pages of payload per chunk segment. | ||
31 | */ | ||
32 | static size_t | ||
33 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
34 | { | ||
35 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
36 | rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | fmr_op_init(struct rpcrdma_xprt *r_xprt) | ||
41 | { | ||
42 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
43 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
44 | struct ib_fmr_attr fmr_attr = { | ||
45 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
46 | .max_maps = 1, | ||
47 | .page_shift = PAGE_SHIFT | ||
48 | }; | ||
49 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
50 | struct rpcrdma_mw *r; | ||
51 | int i, rc; | ||
52 | |||
53 | INIT_LIST_HEAD(&buf->rb_mws); | ||
54 | INIT_LIST_HEAD(&buf->rb_all); | ||
55 | |||
56 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
57 | dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); | ||
58 | |||
59 | while (i--) { | ||
60 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
61 | if (!r) | ||
62 | return -ENOMEM; | ||
63 | |||
64 | r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | ||
65 | if (IS_ERR(r->r.fmr)) | ||
66 | goto out_fmr_err; | ||
67 | |||
68 | list_add(&r->mw_list, &buf->rb_mws); | ||
69 | list_add(&r->mw_all, &buf->rb_all); | ||
70 | } | ||
71 | return 0; | ||
72 | |||
73 | out_fmr_err: | ||
74 | rc = PTR_ERR(r->r.fmr); | ||
75 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); | ||
76 | kfree(r); | ||
77 | return rc; | ||
78 | } | ||
79 | |||
80 | /* Use the ib_map_phys_fmr() verb to register a memory region | ||
81 | * for remote access via RDMA READ or RDMA WRITE. | ||
82 | */ | ||
83 | static int | ||
84 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
85 | int nsegs, bool writing) | ||
86 | { | ||
87 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
88 | struct ib_device *device = ia->ri_id->device; | ||
89 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
90 | struct rpcrdma_mr_seg *seg1 = seg; | ||
91 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
92 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
93 | int len, pageoff, i, rc; | ||
94 | |||
95 | pageoff = offset_in_page(seg1->mr_offset); | ||
96 | seg1->mr_offset -= pageoff; /* start of page */ | ||
97 | seg1->mr_len += pageoff; | ||
98 | len = -pageoff; | ||
99 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | ||
100 | nsegs = RPCRDMA_MAX_FMR_SGES; | ||
101 | for (i = 0; i < nsegs;) { | ||
102 | rpcrdma_map_one(device, seg, direction); | ||
103 | physaddrs[i] = seg->mr_dma; | ||
104 | len += seg->mr_len; | ||
105 | ++seg; | ||
106 | ++i; | ||
107 | /* Check for holes */ | ||
108 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
109 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
110 | break; | ||
111 | } | ||
112 | |||
113 | rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
114 | if (rc) | ||
115 | goto out_maperr; | ||
116 | |||
117 | seg1->mr_rkey = mw->r.fmr->rkey; | ||
118 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
119 | seg1->mr_nsegs = i; | ||
120 | seg1->mr_len = len; | ||
121 | return i; | ||
122 | |||
123 | out_maperr: | ||
124 | dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | ||
125 | __func__, len, (unsigned long long)seg1->mr_dma, | ||
126 | pageoff, i, rc); | ||
127 | while (i--) | ||
128 | rpcrdma_unmap_one(device, --seg); | ||
129 | return rc; | ||
130 | } | ||
131 | |||
132 | /* Use the ib_unmap_fmr() verb to prevent further remote | ||
133 | * access via RDMA READ or RDMA WRITE. | ||
134 | */ | ||
135 | static int | ||
136 | fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
137 | { | ||
138 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
139 | struct rpcrdma_mr_seg *seg1 = seg; | ||
140 | struct ib_device *device; | ||
141 | int rc, nsegs = seg->mr_nsegs; | ||
142 | LIST_HEAD(l); | ||
143 | |||
144 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
145 | rc = ib_unmap_fmr(&l); | ||
146 | read_lock(&ia->ri_qplock); | ||
147 | device = ia->ri_id->device; | ||
148 | while (seg1->mr_nsegs--) | ||
149 | rpcrdma_unmap_one(device, seg++); | ||
150 | read_unlock(&ia->ri_qplock); | ||
151 | if (rc) | ||
152 | goto out_err; | ||
153 | return nsegs; | ||
154 | |||
155 | out_err: | ||
156 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); | ||
157 | return nsegs; | ||
158 | } | ||
159 | |||
160 | /* After a disconnect, unmap all FMRs. | ||
161 | * | ||
162 | * This is invoked only in the transport connect worker in order | ||
163 | * to serialize with rpcrdma_register_fmr_external(). | ||
164 | */ | ||
165 | static void | ||
166 | fmr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
167 | { | ||
168 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
169 | struct rpcrdma_mw *r; | ||
170 | LIST_HEAD(list); | ||
171 | int rc; | ||
172 | |||
173 | list_for_each_entry(r, &buf->rb_all, mw_all) | ||
174 | list_add(&r->r.fmr->list, &list); | ||
175 | |||
176 | rc = ib_unmap_fmr(&list); | ||
177 | if (rc) | ||
178 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
179 | __func__, rc); | ||
180 | } | ||
181 | |||
182 | static void | ||
183 | fmr_op_destroy(struct rpcrdma_buffer *buf) | ||
184 | { | ||
185 | struct rpcrdma_mw *r; | ||
186 | int rc; | ||
187 | |||
188 | while (!list_empty(&buf->rb_all)) { | ||
189 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
190 | list_del(&r->mw_all); | ||
191 | rc = ib_dealloc_fmr(r->r.fmr); | ||
192 | if (rc) | ||
193 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
194 | __func__, rc); | ||
195 | kfree(r); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | ||
200 | .ro_map = fmr_op_map, | ||
201 | .ro_unmap = fmr_op_unmap, | ||
202 | .ro_open = fmr_op_open, | ||
203 | .ro_maxpages = fmr_op_maxpages, | ||
204 | .ro_init = fmr_op_init, | ||
205 | .ro_reset = fmr_op_reset, | ||
206 | .ro_destroy = fmr_op_destroy, | ||
207 | .ro_displayname = "fmr", | ||
208 | }; | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c new file mode 100644 index 000000000000..dff0481dbcf8 --- /dev/null +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Registration Work | ||
7 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | ||
8 | * | ||
9 | * FRWR features ordered asynchronous registration and deregistration | ||
10 | * of arbitrarily sized memory regions. This is the fastest and safest | ||
11 | * but most complex memory registration mode. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | static int | ||
21 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | ||
22 | unsigned int depth) | ||
23 | { | ||
24 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
25 | int rc; | ||
26 | |||
27 | f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); | ||
28 | if (IS_ERR(f->fr_mr)) | ||
29 | goto out_mr_err; | ||
30 | f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); | ||
31 | if (IS_ERR(f->fr_pgl)) | ||
32 | goto out_list_err; | ||
33 | return 0; | ||
34 | |||
35 | out_mr_err: | ||
36 | rc = PTR_ERR(f->fr_mr); | ||
37 | dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", | ||
38 | __func__, rc); | ||
39 | return rc; | ||
40 | |||
41 | out_list_err: | ||
42 | rc = PTR_ERR(f->fr_pgl); | ||
43 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", | ||
44 | __func__, rc); | ||
45 | ib_dereg_mr(f->fr_mr); | ||
46 | return rc; | ||
47 | } | ||
48 | |||
49 | static void | ||
50 | __frwr_release(struct rpcrdma_mw *r) | ||
51 | { | ||
52 | int rc; | ||
53 | |||
54 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
55 | if (rc) | ||
56 | dprintk("RPC: %s: ib_dereg_mr status %i\n", | ||
57 | __func__, rc); | ||
58 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
59 | } | ||
60 | |||
61 | static int | ||
62 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
63 | struct rpcrdma_create_data_internal *cdata) | ||
64 | { | ||
65 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
66 | int depth, delta; | ||
67 | |||
68 | ia->ri_max_frmr_depth = | ||
69 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
70 | devattr->max_fast_reg_page_list_len); | ||
71 | dprintk("RPC: %s: device's max FR page list len = %u\n", | ||
72 | __func__, ia->ri_max_frmr_depth); | ||
73 | |||
74 | /* Add room for frmr register and invalidate WRs. | ||
75 | * 1. FRMR reg WR for head | ||
76 | * 2. FRMR invalidate WR for head | ||
77 | * 3. N FRMR reg WRs for pagelist | ||
78 | * 4. N FRMR invalidate WRs for pagelist | ||
79 | * 5. FRMR reg WR for tail | ||
80 | * 6. FRMR invalidate WR for tail | ||
81 | * 7. The RDMA_SEND WR | ||
82 | */ | ||
83 | depth = 7; | ||
84 | |||
85 | /* Calculate N if the device max FRMR depth is smaller than | ||
86 | * RPCRDMA_MAX_DATA_SEGS. | ||
87 | */ | ||
88 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
89 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | ||
90 | do { | ||
91 | depth += 2; /* FRMR reg + invalidate */ | ||
92 | delta -= ia->ri_max_frmr_depth; | ||
93 | } while (delta > 0); | ||
94 | } | ||
95 | |||
96 | ep->rep_attr.cap.max_send_wr *= depth; | ||
97 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
98 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
99 | if (!cdata->max_requests) | ||
100 | return -EINVAL; | ||
101 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
102 | depth; | ||
103 | } | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | /* FRWR mode conveys a list of pages per chunk segment. The | ||
109 | * maximum length of that list is the FRWR page list depth. | ||
110 | */ | ||
111 | static size_t | ||
112 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
113 | { | ||
114 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
115 | |||
116 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
117 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | ||
118 | } | ||
119 | |||
120 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | ||
121 | static void | ||
122 | frwr_sendcompletion(struct ib_wc *wc) | ||
123 | { | ||
124 | struct rpcrdma_mw *r; | ||
125 | |||
126 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
127 | return; | ||
128 | |||
129 | /* WARNING: Only wr_id and status are reliable at this point */ | ||
130 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
131 | dprintk("RPC: %s: frmr %p (stale), status %d\n", | ||
132 | __func__, r, wc->status); | ||
133 | r->r.frmr.fr_state = FRMR_IS_STALE; | ||
134 | } | ||
135 | |||
136 | static int | ||
137 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | ||
138 | { | ||
139 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
140 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
141 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
142 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
143 | int i; | ||
144 | |||
145 | INIT_LIST_HEAD(&buf->rb_mws); | ||
146 | INIT_LIST_HEAD(&buf->rb_all); | ||
147 | |||
148 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
149 | dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); | ||
150 | |||
151 | while (i--) { | ||
152 | struct rpcrdma_mw *r; | ||
153 | int rc; | ||
154 | |||
155 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
156 | if (!r) | ||
157 | return -ENOMEM; | ||
158 | |||
159 | rc = __frwr_init(r, pd, device, depth); | ||
160 | if (rc) { | ||
161 | kfree(r); | ||
162 | return rc; | ||
163 | } | ||
164 | |||
165 | list_add(&r->mw_list, &buf->rb_mws); | ||
166 | list_add(&r->mw_all, &buf->rb_all); | ||
167 | r->mw_sendcompletion = frwr_sendcompletion; | ||
168 | } | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | /* Post a FAST_REG Work Request to register a memory region | ||
174 | * for remote access via RDMA READ or RDMA WRITE. | ||
175 | */ | ||
176 | static int | ||
177 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
178 | int nsegs, bool writing) | ||
179 | { | ||
180 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
181 | struct ib_device *device = ia->ri_id->device; | ||
182 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
183 | struct rpcrdma_mr_seg *seg1 = seg; | ||
184 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
185 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
186 | struct ib_mr *mr = frmr->fr_mr; | ||
187 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
188 | u8 key; | ||
189 | int len, pageoff; | ||
190 | int i, rc; | ||
191 | int seg_len; | ||
192 | u64 pa; | ||
193 | int page_no; | ||
194 | |||
195 | pageoff = offset_in_page(seg1->mr_offset); | ||
196 | seg1->mr_offset -= pageoff; /* start of page */ | ||
197 | seg1->mr_len += pageoff; | ||
198 | len = -pageoff; | ||
199 | if (nsegs > ia->ri_max_frmr_depth) | ||
200 | nsegs = ia->ri_max_frmr_depth; | ||
201 | for (page_no = i = 0; i < nsegs;) { | ||
202 | rpcrdma_map_one(device, seg, direction); | ||
203 | pa = seg->mr_dma; | ||
204 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
205 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
206 | pa += PAGE_SIZE; | ||
207 | } | ||
208 | len += seg->mr_len; | ||
209 | ++seg; | ||
210 | ++i; | ||
211 | /* Check for holes */ | ||
212 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
213 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
214 | break; | ||
215 | } | ||
216 | dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", | ||
217 | __func__, mw, i, len); | ||
218 | |||
219 | frmr->fr_state = FRMR_IS_VALID; | ||
220 | |||
221 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
222 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
223 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
224 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; | ||
225 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
226 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
227 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
228 | fastreg_wr.wr.fast_reg.length = len; | ||
229 | fastreg_wr.wr.fast_reg.access_flags = writing ? | ||
230 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
231 | IB_ACCESS_REMOTE_READ; | ||
232 | key = (u8)(mr->rkey & 0x000000FF); | ||
233 | ib_update_fast_reg_key(mr, ++key); | ||
234 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
235 | |||
236 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
237 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
238 | if (rc) | ||
239 | goto out_senderr; | ||
240 | |||
241 | seg1->mr_rkey = mr->rkey; | ||
242 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
243 | seg1->mr_nsegs = i; | ||
244 | seg1->mr_len = len; | ||
245 | return i; | ||
246 | |||
247 | out_senderr: | ||
248 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
249 | ib_update_fast_reg_key(mr, --key); | ||
250 | frmr->fr_state = FRMR_IS_INVALID; | ||
251 | while (i--) | ||
252 | rpcrdma_unmap_one(device, --seg); | ||
253 | return rc; | ||
254 | } | ||
255 | |||
256 | /* Post a LOCAL_INV Work Request to prevent further remote access | ||
257 | * via RDMA READ or RDMA WRITE. | ||
258 | */ | ||
259 | static int | ||
260 | frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
261 | { | ||
262 | struct rpcrdma_mr_seg *seg1 = seg; | ||
263 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
264 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
265 | int rc, nsegs = seg->mr_nsegs; | ||
266 | struct ib_device *device; | ||
267 | |||
268 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
269 | |||
270 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | ||
271 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
272 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
273 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
274 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
275 | |||
276 | read_lock(&ia->ri_qplock); | ||
277 | device = ia->ri_id->device; | ||
278 | while (seg1->mr_nsegs--) | ||
279 | rpcrdma_unmap_one(device, seg++); | ||
280 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
281 | read_unlock(&ia->ri_qplock); | ||
282 | if (rc) | ||
283 | goto out_err; | ||
284 | return nsegs; | ||
285 | |||
286 | out_err: | ||
287 | /* Force rpcrdma_buffer_get() to retry */ | ||
288 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
289 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
290 | return nsegs; | ||
291 | } | ||
292 | |||
293 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
294 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
295 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
296 | * also torn down. | ||
297 | * | ||
298 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
299 | * | ||
300 | * This is invoked only in the transport connect worker in order | ||
301 | * to serialize with rpcrdma_register_frmr_external(). | ||
302 | */ | ||
303 | static void | ||
304 | frwr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
305 | { | ||
306 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
307 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
308 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
309 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
310 | struct rpcrdma_mw *r; | ||
311 | int rc; | ||
312 | |||
313 | list_for_each_entry(r, &buf->rb_all, mw_all) { | ||
314 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
315 | continue; | ||
316 | |||
317 | __frwr_release(r); | ||
318 | rc = __frwr_init(r, pd, device, depth); | ||
319 | if (rc) { | ||
320 | dprintk("RPC: %s: mw %p left %s\n", | ||
321 | __func__, r, | ||
322 | (r->r.frmr.fr_state == FRMR_IS_STALE ? | ||
323 | "stale" : "valid")); | ||
324 | continue; | ||
325 | } | ||
326 | |||
327 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void | ||
332 | frwr_op_destroy(struct rpcrdma_buffer *buf) | ||
333 | { | ||
334 | struct rpcrdma_mw *r; | ||
335 | |||
336 | while (!list_empty(&buf->rb_all)) { | ||
337 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
338 | list_del(&r->mw_all); | ||
339 | __frwr_release(r); | ||
340 | kfree(r); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | ||
345 | .ro_map = frwr_op_map, | ||
346 | .ro_unmap = frwr_op_unmap, | ||
347 | .ro_open = frwr_op_open, | ||
348 | .ro_maxpages = frwr_op_maxpages, | ||
349 | .ro_init = frwr_op_init, | ||
350 | .ro_reset = frwr_op_reset, | ||
351 | .ro_destroy = frwr_op_destroy, | ||
352 | .ro_displayname = "frwr", | ||
353 | }; | ||
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c new file mode 100644 index 000000000000..ba518af16787 --- /dev/null +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* No-op chunk preparation. All client memory is pre-registered. | ||
7 | * Sometimes referred to as ALLPHYSICAL mode. | ||
8 | * | ||
9 | * Physical registration is simple because all client memory is | ||
10 | * pre-registered and never deregistered. This mode is good for | ||
11 | * adapter bring up, but is considered not safe: the server is | ||
12 | * trusted not to abuse its access to client memory not involved | ||
13 | * in RDMA I/O. | ||
14 | */ | ||
15 | |||
16 | #include "xprt_rdma.h" | ||
17 | |||
18 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
19 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
20 | #endif | ||
21 | |||
22 | static int | ||
23 | physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
24 | struct rpcrdma_create_data_internal *cdata) | ||
25 | { | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | /* PHYSICAL memory registration conveys one page per chunk segment. | ||
30 | */ | ||
31 | static size_t | ||
32 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
33 | { | ||
34 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
35 | rpcrdma_max_segments(r_xprt)); | ||
36 | } | ||
37 | |||
38 | static int | ||
39 | physical_op_init(struct rpcrdma_xprt *r_xprt) | ||
40 | { | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | /* The client's physical memory is already exposed for | ||
45 | * remote access via RDMA READ or RDMA WRITE. | ||
46 | */ | ||
47 | static int | ||
48 | physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
49 | int nsegs, bool writing) | ||
50 | { | ||
51 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
52 | |||
53 | rpcrdma_map_one(ia->ri_id->device, seg, | ||
54 | rpcrdma_data_dir(writing)); | ||
55 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
56 | seg->mr_base = seg->mr_dma; | ||
57 | seg->mr_nsegs = 1; | ||
58 | return 1; | ||
59 | } | ||
60 | |||
61 | /* Unmap a memory region, but leave it registered. | ||
62 | */ | ||
63 | static int | ||
64 | physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
65 | { | ||
66 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
67 | |||
68 | read_lock(&ia->ri_qplock); | ||
69 | rpcrdma_unmap_one(ia->ri_id->device, seg); | ||
70 | read_unlock(&ia->ri_qplock); | ||
71 | |||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | static void | ||
76 | physical_op_reset(struct rpcrdma_xprt *r_xprt) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static void | ||
81 | physical_op_destroy(struct rpcrdma_buffer *buf) | ||
82 | { | ||
83 | } | ||
84 | |||
85 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | ||
86 | .ro_map = physical_op_map, | ||
87 | .ro_unmap = physical_op_unmap, | ||
88 | .ro_open = physical_op_open, | ||
89 | .ro_maxpages = physical_op_maxpages, | ||
90 | .ro_init = physical_op_init, | ||
91 | .ro_reset = physical_op_reset, | ||
92 | .ro_destroy = physical_op_destroy, | ||
93 | .ro_displayname = "physical", | ||
94 | }; | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 91ffde82fa0c..2c53ea9e1b83 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -53,6 +53,14 @@ | |||
53 | # define RPCDBG_FACILITY RPCDBG_TRANS | 53 | # define RPCDBG_FACILITY RPCDBG_TRANS |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | enum rpcrdma_chunktype { | ||
57 | rpcrdma_noch = 0, | ||
58 | rpcrdma_readch, | ||
59 | rpcrdma_areadch, | ||
60 | rpcrdma_writech, | ||
61 | rpcrdma_replych | ||
62 | }; | ||
63 | |||
56 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 64 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
57 | static const char transfertypes[][12] = { | 65 | static const char transfertypes[][12] = { |
58 | "pure inline", /* no chunks */ | 66 | "pure inline", /* no chunks */ |
@@ -179,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
179 | struct rpcrdma_write_array *warray = NULL; | 187 | struct rpcrdma_write_array *warray = NULL; |
180 | struct rpcrdma_write_chunk *cur_wchunk = NULL; | 188 | struct rpcrdma_write_chunk *cur_wchunk = NULL; |
181 | __be32 *iptr = headerp->rm_body.rm_chunks; | 189 | __be32 *iptr = headerp->rm_body.rm_chunks; |
190 | int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool); | ||
182 | 191 | ||
183 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { | 192 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { |
184 | /* a read chunk - server will RDMA Read our memory */ | 193 | /* a read chunk - server will RDMA Read our memory */ |
@@ -201,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
201 | if (nsegs < 0) | 210 | if (nsegs < 0) |
202 | return nsegs; | 211 | return nsegs; |
203 | 212 | ||
213 | map = r_xprt->rx_ia.ri_ops->ro_map; | ||
204 | do { | 214 | do { |
205 | n = rpcrdma_register_external(seg, nsegs, | 215 | n = map(r_xprt, seg, nsegs, cur_wchunk != NULL); |
206 | cur_wchunk != NULL, r_xprt); | ||
207 | if (n <= 0) | 216 | if (n <= 0) |
208 | goto out; | 217 | goto out; |
209 | if (cur_rchunk) { /* read */ | 218 | if (cur_rchunk) { /* read */ |
@@ -275,34 +284,13 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
275 | return (unsigned char *)iptr - (unsigned char *)headerp; | 284 | return (unsigned char *)iptr - (unsigned char *)headerp; |
276 | 285 | ||
277 | out: | 286 | out: |
278 | if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) { | 287 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) |
279 | for (pos = 0; nchunks--;) | 288 | return n; |
280 | pos += rpcrdma_deregister_external( | ||
281 | &req->rl_segments[pos], r_xprt); | ||
282 | } | ||
283 | return n; | ||
284 | } | ||
285 | 289 | ||
286 | /* | 290 | for (pos = 0; nchunks--;) |
287 | * Marshal chunks. This routine returns the header length | 291 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
288 | * consumed by marshaling. | 292 | &req->rl_segments[pos]); |
289 | * | 293 | return n; |
290 | * Returns positive RPC/RDMA header size, or negative errno. | ||
291 | */ | ||
292 | |||
293 | ssize_t | ||
294 | rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) | ||
295 | { | ||
296 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
297 | struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); | ||
298 | |||
299 | if (req->rl_rtype != rpcrdma_noch) | ||
300 | result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
301 | headerp, req->rl_rtype); | ||
302 | else if (req->rl_wtype != rpcrdma_noch) | ||
303 | result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
304 | headerp, req->rl_wtype); | ||
305 | return result; | ||
306 | } | 294 | } |
307 | 295 | ||
308 | /* | 296 | /* |
@@ -397,6 +385,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
397 | char *base; | 385 | char *base; |
398 | size_t rpclen, padlen; | 386 | size_t rpclen, padlen; |
399 | ssize_t hdrlen; | 387 | ssize_t hdrlen; |
388 | enum rpcrdma_chunktype rtype, wtype; | ||
400 | struct rpcrdma_msg *headerp; | 389 | struct rpcrdma_msg *headerp; |
401 | 390 | ||
402 | /* | 391 | /* |
@@ -433,13 +422,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
433 | * into pages; otherwise use reply chunks. | 422 | * into pages; otherwise use reply chunks. |
434 | */ | 423 | */ |
435 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) | 424 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) |
436 | req->rl_wtype = rpcrdma_noch; | 425 | wtype = rpcrdma_noch; |
437 | else if (rqst->rq_rcv_buf.page_len == 0) | 426 | else if (rqst->rq_rcv_buf.page_len == 0) |
438 | req->rl_wtype = rpcrdma_replych; | 427 | wtype = rpcrdma_replych; |
439 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 428 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) |
440 | req->rl_wtype = rpcrdma_writech; | 429 | wtype = rpcrdma_writech; |
441 | else | 430 | else |
442 | req->rl_wtype = rpcrdma_replych; | 431 | wtype = rpcrdma_replych; |
443 | 432 | ||
444 | /* | 433 | /* |
445 | * Chunks needed for arguments? | 434 | * Chunks needed for arguments? |
@@ -456,16 +445,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
456 | * TBD check NFSv4 setacl | 445 | * TBD check NFSv4 setacl |
457 | */ | 446 | */ |
458 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | 447 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) |
459 | req->rl_rtype = rpcrdma_noch; | 448 | rtype = rpcrdma_noch; |
460 | else if (rqst->rq_snd_buf.page_len == 0) | 449 | else if (rqst->rq_snd_buf.page_len == 0) |
461 | req->rl_rtype = rpcrdma_areadch; | 450 | rtype = rpcrdma_areadch; |
462 | else | 451 | else |
463 | req->rl_rtype = rpcrdma_readch; | 452 | rtype = rpcrdma_readch; |
464 | 453 | ||
465 | /* The following simplification is not true forever */ | 454 | /* The following simplification is not true forever */ |
466 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) | 455 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
467 | req->rl_wtype = rpcrdma_noch; | 456 | wtype = rpcrdma_noch; |
468 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { | 457 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
469 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | 458 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", |
470 | __func__); | 459 | __func__); |
471 | return -EIO; | 460 | return -EIO; |
@@ -479,7 +468,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
479 | * When padding is in use and applies to the transfer, insert | 468 | * When padding is in use and applies to the transfer, insert |
480 | * it and change the message type. | 469 | * it and change the message type. |
481 | */ | 470 | */ |
482 | if (req->rl_rtype == rpcrdma_noch) { | 471 | if (rtype == rpcrdma_noch) { |
483 | 472 | ||
484 | padlen = rpcrdma_inline_pullup(rqst, | 473 | padlen = rpcrdma_inline_pullup(rqst, |
485 | RPCRDMA_INLINE_PAD_VALUE(rqst)); | 474 | RPCRDMA_INLINE_PAD_VALUE(rqst)); |
@@ -494,7 +483,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
494 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 483 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
495 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 484 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
496 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 485 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
497 | if (req->rl_wtype != rpcrdma_noch) { | 486 | if (wtype != rpcrdma_noch) { |
498 | dprintk("RPC: %s: invalid chunk list\n", | 487 | dprintk("RPC: %s: invalid chunk list\n", |
499 | __func__); | 488 | __func__); |
500 | return -EIO; | 489 | return -EIO; |
@@ -515,18 +504,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
515 | * on receive. Therefore, we request a reply chunk | 504 | * on receive. Therefore, we request a reply chunk |
516 | * for non-writes wherever feasible and efficient. | 505 | * for non-writes wherever feasible and efficient. |
517 | */ | 506 | */ |
518 | if (req->rl_wtype == rpcrdma_noch) | 507 | if (wtype == rpcrdma_noch) |
519 | req->rl_wtype = rpcrdma_replych; | 508 | wtype = rpcrdma_replych; |
520 | } | 509 | } |
521 | } | 510 | } |
522 | 511 | ||
523 | hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen); | 512 | if (rtype != rpcrdma_noch) { |
513 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
514 | headerp, rtype); | ||
515 | wtype = rtype; /* simplify dprintk */ | ||
516 | |||
517 | } else if (wtype != rpcrdma_noch) { | ||
518 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
519 | headerp, wtype); | ||
520 | } | ||
524 | if (hdrlen < 0) | 521 | if (hdrlen < 0) |
525 | return hdrlen; | 522 | return hdrlen; |
526 | 523 | ||
527 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 524 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
528 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 525 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
529 | __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, | 526 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
530 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); | 527 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); |
531 | 528 | ||
532 | /* | 529 | /* |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2e192baa59f3..54f23b1be986 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = { | |||
157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
158 | 158 | ||
159 | static void | 159 | static void |
160 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
161 | { | ||
162 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
163 | char buf[20]; | ||
164 | |||
165 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
166 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
167 | |||
168 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
173 | { | ||
174 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
175 | char buf[40]; | ||
176 | |||
177 | snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); | ||
178 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
179 | |||
180 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | ||
181 | } | ||
182 | |||
183 | static void | ||
160 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) | 184 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) |
161 | { | 185 | { |
162 | struct sockaddr *sap = (struct sockaddr *) | 186 | struct sockaddr *sap = (struct sockaddr *) |
163 | &rpcx_to_rdmad(xprt).addr; | 187 | &rpcx_to_rdmad(xprt).addr; |
164 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | 188 | char buf[128]; |
165 | char buf[64]; | 189 | |
190 | switch (sap->sa_family) { | ||
191 | case AF_INET: | ||
192 | xprt_rdma_format_addresses4(xprt, sap); | ||
193 | break; | ||
194 | case AF_INET6: | ||
195 | xprt_rdma_format_addresses6(xprt, sap); | ||
196 | break; | ||
197 | default: | ||
198 | pr_err("rpcrdma: Unrecognized address family\n"); | ||
199 | return; | ||
200 | } | ||
166 | 201 | ||
167 | (void)rpc_ntop(sap, buf, sizeof(buf)); | 202 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
168 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); | 203 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); |
@@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) | |||
170 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); | 205 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
171 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | 206 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
172 | 207 | ||
173 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; | ||
174 | |||
175 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
176 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
177 | |||
178 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); | 208 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
179 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | 209 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
180 | 210 | ||
181 | /* netid */ | 211 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
182 | xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; | ||
183 | } | 212 | } |
184 | 213 | ||
185 | static void | 214 | static void |
@@ -377,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args) | |||
377 | xprt_rdma_connect_worker); | 406 | xprt_rdma_connect_worker); |
378 | 407 | ||
379 | xprt_rdma_format_addresses(xprt); | 408 | xprt_rdma_format_addresses(xprt); |
380 | xprt->max_payload = rpcrdma_max_payload(new_xprt); | 409 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
410 | if (xprt->max_payload == 0) | ||
411 | goto out4; | ||
412 | xprt->max_payload <<= PAGE_SHIFT; | ||
381 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", | 413 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
382 | __func__, xprt->max_payload); | 414 | __func__, xprt->max_payload); |
383 | 415 | ||
@@ -552,8 +584,8 @@ xprt_rdma_free(void *buffer) | |||
552 | 584 | ||
553 | for (i = 0; req->rl_nchunks;) { | 585 | for (i = 0; req->rl_nchunks;) { |
554 | --req->rl_nchunks; | 586 | --req->rl_nchunks; |
555 | i += rpcrdma_deregister_external( | 587 | i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
556 | &req->rl_segments[i], r_xprt); | 588 | &req->rl_segments[i]); |
557 | } | 589 | } |
558 | 590 | ||
559 | rpcrdma_buffer_put(req); | 591 | rpcrdma_buffer_put(req); |
@@ -579,10 +611,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
579 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 611 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
580 | int rc = 0; | 612 | int rc = 0; |
581 | 613 | ||
582 | if (req->rl_niovs == 0) | 614 | rc = rpcrdma_marshal_req(rqst); |
583 | rc = rpcrdma_marshal_req(rqst); | ||
584 | else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) | ||
585 | rc = rpcrdma_marshal_chunks(rqst, 0); | ||
586 | if (rc < 0) | 615 | if (rc < 0) |
587 | goto failed_marshal; | 616 | goto failed_marshal; |
588 | 617 | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e28909fddd30..4870d272e006 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | #include <linux/sunrpc/addr.h> | ||
53 | #include <asm/bitops.h> | 54 | #include <asm/bitops.h> |
54 | 55 | ||
55 | #include "xprt_rdma.h" | 56 | #include "xprt_rdma.h" |
@@ -62,9 +63,6 @@ | |||
62 | # define RPCDBG_FACILITY RPCDBG_TRANS | 63 | # define RPCDBG_FACILITY RPCDBG_TRANS |
63 | #endif | 64 | #endif |
64 | 65 | ||
65 | static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); | ||
66 | static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); | ||
67 | |||
68 | /* | 66 | /* |
69 | * internal functions | 67 | * internal functions |
70 | */ | 68 | */ |
@@ -188,7 +186,7 @@ static const char * const wc_status[] = { | |||
188 | "remote access error", | 186 | "remote access error", |
189 | "remote operation error", | 187 | "remote operation error", |
190 | "transport retry counter exceeded", | 188 | "transport retry counter exceeded", |
191 | "RNR retrycounter exceeded", | 189 | "RNR retry counter exceeded", |
192 | "local RDD violation error", | 190 | "local RDD violation error", |
193 | "remove invalid RD request", | 191 | "remove invalid RD request", |
194 | "operation aborted", | 192 | "operation aborted", |
@@ -206,21 +204,17 @@ static const char * const wc_status[] = { | |||
206 | static void | 204 | static void |
207 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) | 205 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
208 | { | 206 | { |
209 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
210 | return; | ||
211 | |||
212 | /* WARNING: Only wr_id and status are reliable at this point */ | 207 | /* WARNING: Only wr_id and status are reliable at this point */ |
213 | if (wc->wr_id == 0ULL) { | 208 | if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { |
214 | if (wc->status != IB_WC_WR_FLUSH_ERR) | 209 | if (wc->status != IB_WC_SUCCESS && |
210 | wc->status != IB_WC_WR_FLUSH_ERR) | ||
215 | pr_err("RPC: %s: SEND: %s\n", | 211 | pr_err("RPC: %s: SEND: %s\n", |
216 | __func__, COMPLETION_MSG(wc->status)); | 212 | __func__, COMPLETION_MSG(wc->status)); |
217 | } else { | 213 | } else { |
218 | struct rpcrdma_mw *r; | 214 | struct rpcrdma_mw *r; |
219 | 215 | ||
220 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | 216 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
221 | r->r.frmr.fr_state = FRMR_IS_STALE; | 217 | r->mw_sendcompletion(wc); |
222 | pr_err("RPC: %s: frmr %p (stale): %s\n", | ||
223 | __func__, r, COMPLETION_MSG(wc->status)); | ||
224 | } | 218 | } |
225 | } | 219 | } |
226 | 220 | ||
@@ -424,7 +418,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
424 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 418 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
425 | struct rpcrdma_ep *ep = &xprt->rx_ep; | 419 | struct rpcrdma_ep *ep = &xprt->rx_ep; |
426 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 420 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
427 | struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; | 421 | struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; |
428 | #endif | 422 | #endif |
429 | struct ib_qp_attr *attr = &ia->ri_qp_attr; | 423 | struct ib_qp_attr *attr = &ia->ri_qp_attr; |
430 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; | 424 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; |
@@ -480,9 +474,8 @@ connected: | |||
480 | wake_up_all(&ep->rep_connect_wait); | 474 | wake_up_all(&ep->rep_connect_wait); |
481 | /*FALLTHROUGH*/ | 475 | /*FALLTHROUGH*/ |
482 | default: | 476 | default: |
483 | dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", | 477 | dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", |
484 | __func__, &addr->sin_addr.s_addr, | 478 | __func__, sap, rpc_get_port(sap), ep, |
485 | ntohs(addr->sin_port), ep, | ||
486 | CONNECTION_MSG(event->event)); | 479 | CONNECTION_MSG(event->event)); |
487 | break; | 480 | break; |
488 | } | 481 | } |
@@ -491,19 +484,16 @@ connected: | |||
491 | if (connstate == 1) { | 484 | if (connstate == 1) { |
492 | int ird = attr->max_dest_rd_atomic; | 485 | int ird = attr->max_dest_rd_atomic; |
493 | int tird = ep->rep_remote_cma.responder_resources; | 486 | int tird = ep->rep_remote_cma.responder_resources; |
494 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u " | 487 | |
495 | "on %s, memreg %d slots %d ird %d%s\n", | 488 | pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", |
496 | &addr->sin_addr.s_addr, | 489 | sap, rpc_get_port(sap), |
497 | ntohs(addr->sin_port), | ||
498 | ia->ri_id->device->name, | 490 | ia->ri_id->device->name, |
499 | ia->ri_memreg_strategy, | 491 | ia->ri_ops->ro_displayname, |
500 | xprt->rx_buf.rb_max_requests, | 492 | xprt->rx_buf.rb_max_requests, |
501 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | 493 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); |
502 | } else if (connstate < 0) { | 494 | } else if (connstate < 0) { |
503 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", | 495 | pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n", |
504 | &addr->sin_addr.s_addr, | 496 | sap, rpc_get_port(sap), connstate); |
505 | ntohs(addr->sin_port), | ||
506 | connstate); | ||
507 | } | 497 | } |
508 | #endif | 498 | #endif |
509 | 499 | ||
@@ -621,17 +611,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
621 | 611 | ||
622 | if (memreg == RPCRDMA_FRMR) { | 612 | if (memreg == RPCRDMA_FRMR) { |
623 | /* Requires both frmr reg and local dma lkey */ | 613 | /* Requires both frmr reg and local dma lkey */ |
624 | if ((devattr->device_cap_flags & | 614 | if (((devattr->device_cap_flags & |
625 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 615 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
626 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 616 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) || |
617 | (devattr->max_fast_reg_page_list_len == 0)) { | ||
627 | dprintk("RPC: %s: FRMR registration " | 618 | dprintk("RPC: %s: FRMR registration " |
628 | "not supported by HCA\n", __func__); | 619 | "not supported by HCA\n", __func__); |
629 | memreg = RPCRDMA_MTHCAFMR; | 620 | memreg = RPCRDMA_MTHCAFMR; |
630 | } else { | ||
631 | /* Mind the ia limit on FRMR page list depth */ | ||
632 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
633 | RPCRDMA_MAX_DATA_SEGS, | ||
634 | devattr->max_fast_reg_page_list_len); | ||
635 | } | 621 | } |
636 | } | 622 | } |
637 | if (memreg == RPCRDMA_MTHCAFMR) { | 623 | if (memreg == RPCRDMA_MTHCAFMR) { |
@@ -652,13 +638,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
652 | */ | 638 | */ |
653 | switch (memreg) { | 639 | switch (memreg) { |
654 | case RPCRDMA_FRMR: | 640 | case RPCRDMA_FRMR: |
641 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | ||
655 | break; | 642 | break; |
656 | case RPCRDMA_ALLPHYSICAL: | 643 | case RPCRDMA_ALLPHYSICAL: |
644 | ia->ri_ops = &rpcrdma_physical_memreg_ops; | ||
657 | mem_priv = IB_ACCESS_LOCAL_WRITE | | 645 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
658 | IB_ACCESS_REMOTE_WRITE | | 646 | IB_ACCESS_REMOTE_WRITE | |
659 | IB_ACCESS_REMOTE_READ; | 647 | IB_ACCESS_REMOTE_READ; |
660 | goto register_setup; | 648 | goto register_setup; |
661 | case RPCRDMA_MTHCAFMR: | 649 | case RPCRDMA_MTHCAFMR: |
650 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; | ||
662 | if (ia->ri_have_dma_lkey) | 651 | if (ia->ri_have_dma_lkey) |
663 | break; | 652 | break; |
664 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 653 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
@@ -678,8 +667,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
678 | rc = -ENOMEM; | 667 | rc = -ENOMEM; |
679 | goto out3; | 668 | goto out3; |
680 | } | 669 | } |
681 | dprintk("RPC: %s: memory registration strategy is %d\n", | 670 | dprintk("RPC: %s: memory registration strategy is '%s'\n", |
682 | __func__, memreg); | 671 | __func__, ia->ri_ops->ro_displayname); |
683 | 672 | ||
684 | /* Else will do memory reg/dereg for each chunk */ | 673 | /* Else will do memory reg/dereg for each chunk */ |
685 | ia->ri_memreg_strategy = memreg; | 674 | ia->ri_memreg_strategy = memreg; |
@@ -743,49 +732,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
743 | 732 | ||
744 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; | 733 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; |
745 | ep->rep_attr.qp_context = ep; | 734 | ep->rep_attr.qp_context = ep; |
746 | /* send_cq and recv_cq initialized below */ | ||
747 | ep->rep_attr.srq = NULL; | 735 | ep->rep_attr.srq = NULL; |
748 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 736 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
749 | switch (ia->ri_memreg_strategy) { | 737 | rc = ia->ri_ops->ro_open(ia, ep, cdata); |
750 | case RPCRDMA_FRMR: { | 738 | if (rc) |
751 | int depth = 7; | 739 | return rc; |
752 | |||
753 | /* Add room for frmr register and invalidate WRs. | ||
754 | * 1. FRMR reg WR for head | ||
755 | * 2. FRMR invalidate WR for head | ||
756 | * 3. N FRMR reg WRs for pagelist | ||
757 | * 4. N FRMR invalidate WRs for pagelist | ||
758 | * 5. FRMR reg WR for tail | ||
759 | * 6. FRMR invalidate WR for tail | ||
760 | * 7. The RDMA_SEND WR | ||
761 | */ | ||
762 | |||
763 | /* Calculate N if the device max FRMR depth is smaller than | ||
764 | * RPCRDMA_MAX_DATA_SEGS. | ||
765 | */ | ||
766 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
767 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
768 | ia->ri_max_frmr_depth; | ||
769 | |||
770 | do { | ||
771 | depth += 2; /* FRMR reg + invalidate */ | ||
772 | delta -= ia->ri_max_frmr_depth; | ||
773 | } while (delta > 0); | ||
774 | |||
775 | } | ||
776 | ep->rep_attr.cap.max_send_wr *= depth; | ||
777 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
778 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
779 | if (!cdata->max_requests) | ||
780 | return -EINVAL; | ||
781 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
782 | depth; | ||
783 | } | ||
784 | break; | ||
785 | } | ||
786 | default: | ||
787 | break; | ||
788 | } | ||
789 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | 740 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; |
790 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); | 741 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); |
791 | ep->rep_attr.cap.max_recv_sge = 1; | 742 | ep->rep_attr.cap.max_recv_sge = 1; |
@@ -944,21 +895,9 @@ retry: | |||
944 | rpcrdma_ep_disconnect(ep, ia); | 895 | rpcrdma_ep_disconnect(ep, ia); |
945 | rpcrdma_flush_cqs(ep); | 896 | rpcrdma_flush_cqs(ep); |
946 | 897 | ||
947 | switch (ia->ri_memreg_strategy) { | ||
948 | case RPCRDMA_FRMR: | ||
949 | rpcrdma_reset_frmrs(ia); | ||
950 | break; | ||
951 | case RPCRDMA_MTHCAFMR: | ||
952 | rpcrdma_reset_fmrs(ia); | ||
953 | break; | ||
954 | case RPCRDMA_ALLPHYSICAL: | ||
955 | break; | ||
956 | default: | ||
957 | rc = -EIO; | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 898 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
899 | ia->ri_ops->ro_reset(xprt); | ||
900 | |||
962 | id = rpcrdma_create_id(xprt, ia, | 901 | id = rpcrdma_create_id(xprt, ia, |
963 | (struct sockaddr *)&xprt->rx_data.addr); | 902 | (struct sockaddr *)&xprt->rx_data.addr); |
964 | if (IS_ERR(id)) { | 903 | if (IS_ERR(id)) { |
@@ -1123,91 +1062,6 @@ out: | |||
1123 | return ERR_PTR(rc); | 1062 | return ERR_PTR(rc); |
1124 | } | 1063 | } |
1125 | 1064 | ||
1126 | static int | ||
1127 | rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1128 | { | ||
1129 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
1130 | struct ib_fmr_attr fmr_attr = { | ||
1131 | .max_pages = RPCRDMA_MAX_DATA_SEGS, | ||
1132 | .max_maps = 1, | ||
1133 | .page_shift = PAGE_SHIFT | ||
1134 | }; | ||
1135 | struct rpcrdma_mw *r; | ||
1136 | int i, rc; | ||
1137 | |||
1138 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1139 | dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); | ||
1140 | |||
1141 | while (i--) { | ||
1142 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1143 | if (r == NULL) | ||
1144 | return -ENOMEM; | ||
1145 | |||
1146 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); | ||
1147 | if (IS_ERR(r->r.fmr)) { | ||
1148 | rc = PTR_ERR(r->r.fmr); | ||
1149 | dprintk("RPC: %s: ib_alloc_fmr failed %i\n", | ||
1150 | __func__, rc); | ||
1151 | goto out_free; | ||
1152 | } | ||
1153 | |||
1154 | list_add(&r->mw_list, &buf->rb_mws); | ||
1155 | list_add(&r->mw_all, &buf->rb_all); | ||
1156 | } | ||
1157 | return 0; | ||
1158 | |||
1159 | out_free: | ||
1160 | kfree(r); | ||
1161 | return rc; | ||
1162 | } | ||
1163 | |||
1164 | static int | ||
1165 | rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1166 | { | ||
1167 | struct rpcrdma_frmr *f; | ||
1168 | struct rpcrdma_mw *r; | ||
1169 | int i, rc; | ||
1170 | |||
1171 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1172 | dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); | ||
1173 | |||
1174 | while (i--) { | ||
1175 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1176 | if (r == NULL) | ||
1177 | return -ENOMEM; | ||
1178 | f = &r->r.frmr; | ||
1179 | |||
1180 | f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1181 | ia->ri_max_frmr_depth); | ||
1182 | if (IS_ERR(f->fr_mr)) { | ||
1183 | rc = PTR_ERR(f->fr_mr); | ||
1184 | dprintk("RPC: %s: ib_alloc_fast_reg_mr " | ||
1185 | "failed %i\n", __func__, rc); | ||
1186 | goto out_free; | ||
1187 | } | ||
1188 | |||
1189 | f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1190 | ia->ri_max_frmr_depth); | ||
1191 | if (IS_ERR(f->fr_pgl)) { | ||
1192 | rc = PTR_ERR(f->fr_pgl); | ||
1193 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list " | ||
1194 | "failed %i\n", __func__, rc); | ||
1195 | |||
1196 | ib_dereg_mr(f->fr_mr); | ||
1197 | goto out_free; | ||
1198 | } | ||
1199 | |||
1200 | list_add(&r->mw_list, &buf->rb_mws); | ||
1201 | list_add(&r->mw_all, &buf->rb_all); | ||
1202 | } | ||
1203 | |||
1204 | return 0; | ||
1205 | |||
1206 | out_free: | ||
1207 | kfree(r); | ||
1208 | return rc; | ||
1209 | } | ||
1210 | |||
1211 | int | 1065 | int |
1212 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | 1066 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
1213 | { | 1067 | { |
@@ -1244,22 +1098,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1244 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; | 1098 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; |
1245 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; | 1099 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; |
1246 | 1100 | ||
1247 | INIT_LIST_HEAD(&buf->rb_mws); | 1101 | rc = ia->ri_ops->ro_init(r_xprt); |
1248 | INIT_LIST_HEAD(&buf->rb_all); | 1102 | if (rc) |
1249 | switch (ia->ri_memreg_strategy) { | 1103 | goto out; |
1250 | case RPCRDMA_FRMR: | ||
1251 | rc = rpcrdma_init_frmrs(ia, buf); | ||
1252 | if (rc) | ||
1253 | goto out; | ||
1254 | break; | ||
1255 | case RPCRDMA_MTHCAFMR: | ||
1256 | rc = rpcrdma_init_fmrs(ia, buf); | ||
1257 | if (rc) | ||
1258 | goto out; | ||
1259 | break; | ||
1260 | default: | ||
1261 | break; | ||
1262 | } | ||
1263 | 1104 | ||
1264 | for (i = 0; i < buf->rb_max_requests; i++) { | 1105 | for (i = 0; i < buf->rb_max_requests; i++) { |
1265 | struct rpcrdma_req *req; | 1106 | struct rpcrdma_req *req; |
@@ -1311,47 +1152,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |||
1311 | kfree(req); | 1152 | kfree(req); |
1312 | } | 1153 | } |
1313 | 1154 | ||
1314 | static void | ||
1315 | rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) | ||
1316 | { | ||
1317 | struct rpcrdma_mw *r; | ||
1318 | int rc; | ||
1319 | |||
1320 | while (!list_empty(&buf->rb_all)) { | ||
1321 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1322 | list_del(&r->mw_all); | ||
1323 | list_del(&r->mw_list); | ||
1324 | |||
1325 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1326 | if (rc) | ||
1327 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
1328 | __func__, rc); | ||
1329 | |||
1330 | kfree(r); | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | static void | ||
1335 | rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) | ||
1336 | { | ||
1337 | struct rpcrdma_mw *r; | ||
1338 | int rc; | ||
1339 | |||
1340 | while (!list_empty(&buf->rb_all)) { | ||
1341 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1342 | list_del(&r->mw_all); | ||
1343 | list_del(&r->mw_list); | ||
1344 | |||
1345 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1346 | if (rc) | ||
1347 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1348 | __func__, rc); | ||
1349 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1350 | |||
1351 | kfree(r); | ||
1352 | } | ||
1353 | } | ||
1354 | |||
1355 | void | 1155 | void |
1356 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1156 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
1357 | { | 1157 | { |
@@ -1372,104 +1172,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1372 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); | 1172 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); |
1373 | } | 1173 | } |
1374 | 1174 | ||
1375 | switch (ia->ri_memreg_strategy) { | 1175 | ia->ri_ops->ro_destroy(buf); |
1376 | case RPCRDMA_FRMR: | ||
1377 | rpcrdma_destroy_frmrs(buf); | ||
1378 | break; | ||
1379 | case RPCRDMA_MTHCAFMR: | ||
1380 | rpcrdma_destroy_fmrs(buf); | ||
1381 | break; | ||
1382 | default: | ||
1383 | break; | ||
1384 | } | ||
1385 | 1176 | ||
1386 | kfree(buf->rb_pool); | 1177 | kfree(buf->rb_pool); |
1387 | } | 1178 | } |
1388 | 1179 | ||
1389 | /* After a disconnect, unmap all FMRs. | ||
1390 | * | ||
1391 | * This is invoked only in the transport connect worker in order | ||
1392 | * to serialize with rpcrdma_register_fmr_external(). | ||
1393 | */ | ||
1394 | static void | ||
1395 | rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) | ||
1396 | { | ||
1397 | struct rpcrdma_xprt *r_xprt = | ||
1398 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1399 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1400 | struct list_head *pos; | ||
1401 | struct rpcrdma_mw *r; | ||
1402 | LIST_HEAD(l); | ||
1403 | int rc; | ||
1404 | |||
1405 | list_for_each(pos, &buf->rb_all) { | ||
1406 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1407 | |||
1408 | INIT_LIST_HEAD(&l); | ||
1409 | list_add(&r->r.fmr->list, &l); | ||
1410 | rc = ib_unmap_fmr(&l); | ||
1411 | if (rc) | ||
1412 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
1413 | __func__, rc); | ||
1414 | } | ||
1415 | } | ||
1416 | |||
1417 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
1418 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
1419 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
1420 | * also torn down. | ||
1421 | * | ||
1422 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
1423 | * | ||
1424 | * This is invoked only in the transport connect worker in order | ||
1425 | * to serialize with rpcrdma_register_frmr_external(). | ||
1426 | */ | ||
1427 | static void | ||
1428 | rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) | ||
1429 | { | ||
1430 | struct rpcrdma_xprt *r_xprt = | ||
1431 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1432 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1433 | struct list_head *pos; | ||
1434 | struct rpcrdma_mw *r; | ||
1435 | int rc; | ||
1436 | |||
1437 | list_for_each(pos, &buf->rb_all) { | ||
1438 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1439 | |||
1440 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
1441 | continue; | ||
1442 | |||
1443 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1444 | if (rc) | ||
1445 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1446 | __func__, rc); | ||
1447 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1448 | |||
1449 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1450 | ia->ri_max_frmr_depth); | ||
1451 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1452 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1453 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1454 | " failed %i\n", __func__, rc); | ||
1455 | continue; | ||
1456 | } | ||
1457 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( | ||
1458 | ia->ri_id->device, | ||
1459 | ia->ri_max_frmr_depth); | ||
1460 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1461 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1462 | dprintk("RPC: %s: " | ||
1463 | "ib_alloc_fast_reg_page_list " | ||
1464 | "failed %i\n", __func__, rc); | ||
1465 | |||
1466 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
1467 | continue; | ||
1468 | } | ||
1469 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1470 | } | ||
1471 | } | ||
1472 | |||
1473 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving | 1180 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving |
1474 | * some req segments uninitialized. | 1181 | * some req segments uninitialized. |
1475 | */ | 1182 | */ |
@@ -1509,7 +1216,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | |||
1509 | } | 1216 | } |
1510 | } | 1217 | } |
1511 | 1218 | ||
1512 | /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external(). | 1219 | /* rpcrdma_unmap_one() was already done during deregistration. |
1513 | * Redo only the ib_post_send(). | 1220 | * Redo only the ib_post_send(). |
1514 | */ | 1221 | */ |
1515 | static void | 1222 | static void |
@@ -1729,6 +1436,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | |||
1729 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. | 1436 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. |
1730 | */ | 1437 | */ |
1731 | 1438 | ||
1439 | void | ||
1440 | rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg) | ||
1441 | { | ||
1442 | dprintk("RPC: map_one: offset %p iova %llx len %zu\n", | ||
1443 | seg->mr_offset, | ||
1444 | (unsigned long long)seg->mr_dma, seg->mr_dmalen); | ||
1445 | } | ||
1446 | |||
1732 | static int | 1447 | static int |
1733 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | 1448 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, |
1734 | struct ib_mr **mrp, struct ib_sge *iov) | 1449 | struct ib_mr **mrp, struct ib_sge *iov) |
@@ -1854,287 +1569,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |||
1854 | } | 1569 | } |
1855 | 1570 | ||
1856 | /* | 1571 | /* |
1857 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
1858 | */ | ||
1859 | |||
1860 | static void | ||
1861 | rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) | ||
1862 | { | ||
1863 | seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
1864 | seg->mr_dmalen = seg->mr_len; | ||
1865 | if (seg->mr_page) | ||
1866 | seg->mr_dma = ib_dma_map_page(ia->ri_id->device, | ||
1867 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
1868 | seg->mr_dmalen, seg->mr_dir); | ||
1869 | else | ||
1870 | seg->mr_dma = ib_dma_map_single(ia->ri_id->device, | ||
1871 | seg->mr_offset, | ||
1872 | seg->mr_dmalen, seg->mr_dir); | ||
1873 | if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { | ||
1874 | dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", | ||
1875 | __func__, | ||
1876 | (unsigned long long)seg->mr_dma, | ||
1877 | seg->mr_offset, seg->mr_dmalen); | ||
1878 | } | ||
1879 | } | ||
1880 | |||
1881 | static void | ||
1882 | rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | ||
1883 | { | ||
1884 | if (seg->mr_page) | ||
1885 | ib_dma_unmap_page(ia->ri_id->device, | ||
1886 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1887 | else | ||
1888 | ib_dma_unmap_single(ia->ri_id->device, | ||
1889 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1890 | } | ||
1891 | |||
1892 | static int | ||
1893 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1894 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1895 | struct rpcrdma_xprt *r_xprt) | ||
1896 | { | ||
1897 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1898 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
1899 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
1900 | struct ib_mr *mr = frmr->fr_mr; | ||
1901 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
1902 | u8 key; | ||
1903 | int len, pageoff; | ||
1904 | int i, rc; | ||
1905 | int seg_len; | ||
1906 | u64 pa; | ||
1907 | int page_no; | ||
1908 | |||
1909 | pageoff = offset_in_page(seg1->mr_offset); | ||
1910 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1911 | seg1->mr_len += pageoff; | ||
1912 | len = -pageoff; | ||
1913 | if (*nsegs > ia->ri_max_frmr_depth) | ||
1914 | *nsegs = ia->ri_max_frmr_depth; | ||
1915 | for (page_no = i = 0; i < *nsegs;) { | ||
1916 | rpcrdma_map_one(ia, seg, writing); | ||
1917 | pa = seg->mr_dma; | ||
1918 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
1919 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
1920 | pa += PAGE_SIZE; | ||
1921 | } | ||
1922 | len += seg->mr_len; | ||
1923 | ++seg; | ||
1924 | ++i; | ||
1925 | /* Check for holes */ | ||
1926 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1927 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1928 | break; | ||
1929 | } | ||
1930 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1931 | __func__, mw, i); | ||
1932 | |||
1933 | frmr->fr_state = FRMR_IS_VALID; | ||
1934 | |||
1935 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
1936 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
1937 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
1938 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma; | ||
1939 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
1940 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
1941 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1942 | fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | ||
1943 | if (fastreg_wr.wr.fast_reg.length < len) { | ||
1944 | rc = -EIO; | ||
1945 | goto out_err; | ||
1946 | } | ||
1947 | |||
1948 | /* Bump the key */ | ||
1949 | key = (u8)(mr->rkey & 0x000000FF); | ||
1950 | ib_update_fast_reg_key(mr, ++key); | ||
1951 | |||
1952 | fastreg_wr.wr.fast_reg.access_flags = (writing ? | ||
1953 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
1954 | IB_ACCESS_REMOTE_READ); | ||
1955 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
1956 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1957 | |||
1958 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
1959 | if (rc) { | ||
1960 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1961 | " status %i\n", __func__, rc); | ||
1962 | ib_update_fast_reg_key(mr, --key); | ||
1963 | goto out_err; | ||
1964 | } else { | ||
1965 | seg1->mr_rkey = mr->rkey; | ||
1966 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1967 | seg1->mr_nsegs = i; | ||
1968 | seg1->mr_len = len; | ||
1969 | } | ||
1970 | *nsegs = i; | ||
1971 | return 0; | ||
1972 | out_err: | ||
1973 | frmr->fr_state = FRMR_IS_INVALID; | ||
1974 | while (i--) | ||
1975 | rpcrdma_unmap_one(ia, --seg); | ||
1976 | return rc; | ||
1977 | } | ||
1978 | |||
1979 | static int | ||
1980 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1981 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1982 | { | ||
1983 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1984 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1985 | int rc; | ||
1986 | |||
1987 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1988 | |||
1989 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1990 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
1991 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1992 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
1993 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1994 | |||
1995 | read_lock(&ia->ri_qplock); | ||
1996 | while (seg1->mr_nsegs--) | ||
1997 | rpcrdma_unmap_one(ia, seg++); | ||
1998 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1999 | read_unlock(&ia->ri_qplock); | ||
2000 | if (rc) { | ||
2001 | /* Force rpcrdma_buffer_get() to retry */ | ||
2002 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
2003 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
2004 | " status %i\n", __func__, rc); | ||
2005 | } | ||
2006 | return rc; | ||
2007 | } | ||
2008 | |||
2009 | static int | ||
2010 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2011 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
2012 | { | ||
2013 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2014 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
2015 | int len, pageoff, i, rc; | ||
2016 | |||
2017 | pageoff = offset_in_page(seg1->mr_offset); | ||
2018 | seg1->mr_offset -= pageoff; /* start of page */ | ||
2019 | seg1->mr_len += pageoff; | ||
2020 | len = -pageoff; | ||
2021 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
2022 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
2023 | for (i = 0; i < *nsegs;) { | ||
2024 | rpcrdma_map_one(ia, seg, writing); | ||
2025 | physaddrs[i] = seg->mr_dma; | ||
2026 | len += seg->mr_len; | ||
2027 | ++seg; | ||
2028 | ++i; | ||
2029 | /* Check for holes */ | ||
2030 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
2031 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
2032 | break; | ||
2033 | } | ||
2034 | rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
2035 | if (rc) { | ||
2036 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
2037 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
2038 | len, (unsigned long long)seg1->mr_dma, | ||
2039 | pageoff, i, rc); | ||
2040 | while (i--) | ||
2041 | rpcrdma_unmap_one(ia, --seg); | ||
2042 | } else { | ||
2043 | seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; | ||
2044 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
2045 | seg1->mr_nsegs = i; | ||
2046 | seg1->mr_len = len; | ||
2047 | } | ||
2048 | *nsegs = i; | ||
2049 | return rc; | ||
2050 | } | ||
2051 | |||
2052 | static int | ||
2053 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2054 | struct rpcrdma_ia *ia) | ||
2055 | { | ||
2056 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2057 | LIST_HEAD(l); | ||
2058 | int rc; | ||
2059 | |||
2060 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
2061 | rc = ib_unmap_fmr(&l); | ||
2062 | read_lock(&ia->ri_qplock); | ||
2063 | while (seg1->mr_nsegs--) | ||
2064 | rpcrdma_unmap_one(ia, seg++); | ||
2065 | read_unlock(&ia->ri_qplock); | ||
2066 | if (rc) | ||
2067 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
2068 | " status %i\n", __func__, rc); | ||
2069 | return rc; | ||
2070 | } | ||
2071 | |||
2072 | int | ||
2073 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | ||
2074 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | ||
2075 | { | ||
2076 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2077 | int rc = 0; | ||
2078 | |||
2079 | switch (ia->ri_memreg_strategy) { | ||
2080 | |||
2081 | case RPCRDMA_ALLPHYSICAL: | ||
2082 | rpcrdma_map_one(ia, seg, writing); | ||
2083 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
2084 | seg->mr_base = seg->mr_dma; | ||
2085 | seg->mr_nsegs = 1; | ||
2086 | nsegs = 1; | ||
2087 | break; | ||
2088 | |||
2089 | /* Registration using frmr registration */ | ||
2090 | case RPCRDMA_FRMR: | ||
2091 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
2092 | break; | ||
2093 | |||
2094 | /* Registration using fmr memory registration */ | ||
2095 | case RPCRDMA_MTHCAFMR: | ||
2096 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | ||
2097 | break; | ||
2098 | |||
2099 | default: | ||
2100 | return -EIO; | ||
2101 | } | ||
2102 | if (rc) | ||
2103 | return rc; | ||
2104 | |||
2105 | return nsegs; | ||
2106 | } | ||
2107 | |||
2108 | int | ||
2109 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | ||
2110 | struct rpcrdma_xprt *r_xprt) | ||
2111 | { | ||
2112 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2113 | int nsegs = seg->mr_nsegs, rc; | ||
2114 | |||
2115 | switch (ia->ri_memreg_strategy) { | ||
2116 | |||
2117 | case RPCRDMA_ALLPHYSICAL: | ||
2118 | read_lock(&ia->ri_qplock); | ||
2119 | rpcrdma_unmap_one(ia, seg); | ||
2120 | read_unlock(&ia->ri_qplock); | ||
2121 | break; | ||
2122 | |||
2123 | case RPCRDMA_FRMR: | ||
2124 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
2125 | break; | ||
2126 | |||
2127 | case RPCRDMA_MTHCAFMR: | ||
2128 | rc = rpcrdma_deregister_fmr_external(seg, ia); | ||
2129 | break; | ||
2130 | |||
2131 | default: | ||
2132 | break; | ||
2133 | } | ||
2134 | return nsegs; | ||
2135 | } | ||
2136 | |||
2137 | /* | ||
2138 | * Prepost any receive buffer, then post send. | 1572 | * Prepost any receive buffer, then post send. |
2139 | * | 1573 | * |
2140 | * Receive buffer is donated to hardware, reclaimed upon recv completion. | 1574 | * Receive buffer is donated to hardware, reclaimed upon recv completion. |
@@ -2156,7 +1590,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
2156 | } | 1590 | } |
2157 | 1591 | ||
2158 | send_wr.next = NULL; | 1592 | send_wr.next = NULL; |
2159 | send_wr.wr_id = 0ULL; /* no send cookie */ | 1593 | send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; |
2160 | send_wr.sg_list = req->rl_send_iov; | 1594 | send_wr.sg_list = req->rl_send_iov; |
2161 | send_wr.num_sge = req->rl_niovs; | 1595 | send_wr.num_sge = req->rl_niovs; |
2162 | send_wr.opcode = IB_WR_SEND; | 1596 | send_wr.opcode = IB_WR_SEND; |
@@ -2215,43 +1649,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
2215 | return rc; | 1649 | return rc; |
2216 | } | 1650 | } |
2217 | 1651 | ||
2218 | /* Physical mapping means one Read/Write list entry per-page. | 1652 | /* How many chunk list items fit within our inline buffers? |
2219 | * All list entries must fit within an inline buffer | ||
2220 | * | ||
2221 | * NB: The server must return a Write list for NFS READ, | ||
2222 | * which has the same constraint. Factor in the inline | ||
2223 | * rsize as well. | ||
2224 | */ | 1653 | */ |
2225 | static size_t | 1654 | unsigned int |
2226 | rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt) | 1655 | rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt) |
2227 | { | 1656 | { |
2228 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | 1657 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
2229 | unsigned int inline_size, pages; | 1658 | int bytes, segments; |
2230 | 1659 | ||
2231 | inline_size = min_t(unsigned int, | 1660 | bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize); |
2232 | cdata->inline_wsize, cdata->inline_rsize); | 1661 | bytes -= RPCRDMA_HDRLEN_MIN; |
2233 | inline_size -= RPCRDMA_HDRLEN_MIN; | 1662 | if (bytes < sizeof(struct rpcrdma_segment) * 2) { |
2234 | pages = inline_size / sizeof(struct rpcrdma_segment); | 1663 | pr_warn("RPC: %s: inline threshold too small\n", |
2235 | return pages << PAGE_SHIFT; | 1664 | __func__); |
2236 | } | 1665 | return 0; |
2237 | |||
2238 | static size_t | ||
2239 | rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2240 | { | ||
2241 | return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; | ||
2242 | } | ||
2243 | |||
2244 | size_t | ||
2245 | rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2246 | { | ||
2247 | size_t result; | ||
2248 | |||
2249 | switch (r_xprt->rx_ia.ri_memreg_strategy) { | ||
2250 | case RPCRDMA_ALLPHYSICAL: | ||
2251 | result = rpcrdma_physical_max_payload(r_xprt); | ||
2252 | break; | ||
2253 | default: | ||
2254 | result = rpcrdma_mr_max_payload(r_xprt); | ||
2255 | } | 1666 | } |
2256 | return result; | 1667 | |
1668 | segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1); | ||
1669 | dprintk("RPC: %s: max chunk list size = %d segments\n", | ||
1670 | __func__, segments); | ||
1671 | return segments; | ||
2257 | } | 1672 | } |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fb6f0885..78e0b8beaa36 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -60,6 +60,7 @@ | |||
60 | * Interface Adapter -- one per transport instance | 60 | * Interface Adapter -- one per transport instance |
61 | */ | 61 | */ |
62 | struct rpcrdma_ia { | 62 | struct rpcrdma_ia { |
63 | const struct rpcrdma_memreg_ops *ri_ops; | ||
63 | rwlock_t ri_qplock; | 64 | rwlock_t ri_qplock; |
64 | struct rdma_cm_id *ri_id; | 65 | struct rdma_cm_id *ri_id; |
65 | struct ib_pd *ri_pd; | 66 | struct ib_pd *ri_pd; |
@@ -105,6 +106,10 @@ struct rpcrdma_ep { | |||
105 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 106 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
106 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 107 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
107 | 108 | ||
109 | /* Force completion handler to ignore the signal | ||
110 | */ | ||
111 | #define RPCRDMA_IGNORE_COMPLETION (0ULL) | ||
112 | |||
108 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV | 113 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
109 | * | 114 | * |
110 | * The below structure appears at the front of a large region of kmalloc'd | 115 | * The below structure appears at the front of a large region of kmalloc'd |
@@ -143,14 +148,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
143 | return (struct rpcrdma_msg *)rb->rg_base; | 148 | return (struct rpcrdma_msg *)rb->rg_base; |
144 | } | 149 | } |
145 | 150 | ||
146 | enum rpcrdma_chunktype { | ||
147 | rpcrdma_noch = 0, | ||
148 | rpcrdma_readch, | ||
149 | rpcrdma_areadch, | ||
150 | rpcrdma_writech, | ||
151 | rpcrdma_replych | ||
152 | }; | ||
153 | |||
154 | /* | 151 | /* |
155 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | 152 | * struct rpcrdma_rep -- this structure encapsulates state required to recv |
156 | * and complete a reply, asychronously. It needs several pieces of | 153 | * and complete a reply, asychronously. It needs several pieces of |
@@ -213,6 +210,7 @@ struct rpcrdma_mw { | |||
213 | struct ib_fmr *fmr; | 210 | struct ib_fmr *fmr; |
214 | struct rpcrdma_frmr frmr; | 211 | struct rpcrdma_frmr frmr; |
215 | } r; | 212 | } r; |
213 | void (*mw_sendcompletion)(struct ib_wc *); | ||
216 | struct list_head mw_list; | 214 | struct list_head mw_list; |
217 | struct list_head mw_all; | 215 | struct list_head mw_all; |
218 | }; | 216 | }; |
@@ -258,7 +256,6 @@ struct rpcrdma_req { | |||
258 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 256 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
259 | unsigned int rl_nchunks; /* non-zero if chunks */ | 257 | unsigned int rl_nchunks; /* non-zero if chunks */ |
260 | unsigned int rl_connect_cookie; /* retry detection */ | 258 | unsigned int rl_connect_cookie; /* retry detection */ |
261 | enum rpcrdma_chunktype rl_rtype, rl_wtype; | ||
262 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 259 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
263 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 260 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
264 | struct ib_sge rl_send_iov[4]; /* for active requests */ | 261 | struct ib_sge rl_send_iov[4]; /* for active requests */ |
@@ -340,6 +337,29 @@ struct rpcrdma_stats { | |||
340 | }; | 337 | }; |
341 | 338 | ||
342 | /* | 339 | /* |
340 | * Per-registration mode operations | ||
341 | */ | ||
342 | struct rpcrdma_xprt; | ||
343 | struct rpcrdma_memreg_ops { | ||
344 | int (*ro_map)(struct rpcrdma_xprt *, | ||
345 | struct rpcrdma_mr_seg *, int, bool); | ||
346 | int (*ro_unmap)(struct rpcrdma_xprt *, | ||
347 | struct rpcrdma_mr_seg *); | ||
348 | int (*ro_open)(struct rpcrdma_ia *, | ||
349 | struct rpcrdma_ep *, | ||
350 | struct rpcrdma_create_data_internal *); | ||
351 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | ||
352 | int (*ro_init)(struct rpcrdma_xprt *); | ||
353 | void (*ro_reset)(struct rpcrdma_xprt *); | ||
354 | void (*ro_destroy)(struct rpcrdma_buffer *); | ||
355 | const char *ro_displayname; | ||
356 | }; | ||
357 | |||
358 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | ||
359 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | ||
360 | extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops; | ||
361 | |||
362 | /* | ||
343 | * RPCRDMA transport -- encapsulates the structures above for | 363 | * RPCRDMA transport -- encapsulates the structures above for |
344 | * integration with RPC. | 364 | * integration with RPC. |
345 | * | 365 | * |
@@ -398,16 +418,56 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); | |||
398 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 418 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
399 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 419 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
400 | 420 | ||
401 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | ||
402 | int, int, struct rpcrdma_xprt *); | ||
403 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | ||
404 | struct rpcrdma_xprt *); | ||
405 | |||
406 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, | 421 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, |
407 | size_t, gfp_t); | 422 | size_t, gfp_t); |
408 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, | 423 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, |
409 | struct rpcrdma_regbuf *); | 424 | struct rpcrdma_regbuf *); |
410 | 425 | ||
426 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); | ||
427 | |||
428 | /* | ||
429 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
430 | */ | ||
431 | |||
432 | void rpcrdma_mapping_error(struct rpcrdma_mr_seg *); | ||
433 | |||
434 | static inline enum dma_data_direction | ||
435 | rpcrdma_data_dir(bool writing) | ||
436 | { | ||
437 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
438 | } | ||
439 | |||
440 | static inline void | ||
441 | rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg, | ||
442 | enum dma_data_direction direction) | ||
443 | { | ||
444 | seg->mr_dir = direction; | ||
445 | seg->mr_dmalen = seg->mr_len; | ||
446 | |||
447 | if (seg->mr_page) | ||
448 | seg->mr_dma = ib_dma_map_page(device, | ||
449 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
450 | seg->mr_dmalen, seg->mr_dir); | ||
451 | else | ||
452 | seg->mr_dma = ib_dma_map_single(device, | ||
453 | seg->mr_offset, | ||
454 | seg->mr_dmalen, seg->mr_dir); | ||
455 | |||
456 | if (ib_dma_mapping_error(device, seg->mr_dma)) | ||
457 | rpcrdma_mapping_error(seg); | ||
458 | } | ||
459 | |||
460 | static inline void | ||
461 | rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg) | ||
462 | { | ||
463 | if (seg->mr_page) | ||
464 | ib_dma_unmap_page(device, | ||
465 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
466 | else | ||
467 | ib_dma_unmap_single(device, | ||
468 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
469 | } | ||
470 | |||
411 | /* | 471 | /* |
412 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 472 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
413 | */ | 473 | */ |
@@ -418,9 +478,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||
418 | /* | 478 | /* |
419 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 479 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
420 | */ | 480 | */ |
421 | ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t); | ||
422 | int rpcrdma_marshal_req(struct rpc_rqst *); | 481 | int rpcrdma_marshal_req(struct rpc_rqst *); |
423 | size_t rpcrdma_max_payload(struct rpcrdma_xprt *); | ||
424 | 482 | ||
425 | /* Temporary NFS request map cache. Created in svc_rdma.c */ | 483 | /* Temporary NFS request map cache. Created in svc_rdma.c */ |
426 | extern struct kmem_cache *svc_rdma_map_cachep; | 484 | extern struct kmem_cache *svc_rdma_map_cachep; |
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3613e72e858e..70e3dacbf84a 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c | |||
@@ -591,14 +591,14 @@ void tipc_bearer_stop(struct net *net) | |||
591 | 591 | ||
592 | /* Caller should hold rtnl_lock to protect the bearer */ | 592 | /* Caller should hold rtnl_lock to protect the bearer */ |
593 | static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, | 593 | static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, |
594 | struct tipc_bearer *bearer) | 594 | struct tipc_bearer *bearer, int nlflags) |
595 | { | 595 | { |
596 | void *hdr; | 596 | void *hdr; |
597 | struct nlattr *attrs; | 597 | struct nlattr *attrs; |
598 | struct nlattr *prop; | 598 | struct nlattr *prop; |
599 | 599 | ||
600 | hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, | 600 | hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, |
601 | NLM_F_MULTI, TIPC_NL_BEARER_GET); | 601 | nlflags, TIPC_NL_BEARER_GET); |
602 | if (!hdr) | 602 | if (!hdr) |
603 | return -EMSGSIZE; | 603 | return -EMSGSIZE; |
604 | 604 | ||
@@ -657,7 +657,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
657 | if (!bearer) | 657 | if (!bearer) |
658 | continue; | 658 | continue; |
659 | 659 | ||
660 | err = __tipc_nl_add_bearer(&msg, bearer); | 660 | err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); |
661 | if (err) | 661 | if (err) |
662 | break; | 662 | break; |
663 | } | 663 | } |
@@ -705,7 +705,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) | |||
705 | goto err_out; | 705 | goto err_out; |
706 | } | 706 | } |
707 | 707 | ||
708 | err = __tipc_nl_add_bearer(&msg, bearer); | 708 | err = __tipc_nl_add_bearer(&msg, bearer, 0); |
709 | if (err) | 709 | if (err) |
710 | goto err_out; | 710 | goto err_out; |
711 | rtnl_unlock(); | 711 | rtnl_unlock(); |
@@ -857,14 +857,14 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) | |||
857 | } | 857 | } |
858 | 858 | ||
859 | static int __tipc_nl_add_media(struct tipc_nl_msg *msg, | 859 | static int __tipc_nl_add_media(struct tipc_nl_msg *msg, |
860 | struct tipc_media *media) | 860 | struct tipc_media *media, int nlflags) |
861 | { | 861 | { |
862 | void *hdr; | 862 | void *hdr; |
863 | struct nlattr *attrs; | 863 | struct nlattr *attrs; |
864 | struct nlattr *prop; | 864 | struct nlattr *prop; |
865 | 865 | ||
866 | hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, | 866 | hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, |
867 | NLM_F_MULTI, TIPC_NL_MEDIA_GET); | 867 | nlflags, TIPC_NL_MEDIA_GET); |
868 | if (!hdr) | 868 | if (!hdr) |
869 | return -EMSGSIZE; | 869 | return -EMSGSIZE; |
870 | 870 | ||
@@ -916,7 +916,8 @@ int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
916 | 916 | ||
917 | rtnl_lock(); | 917 | rtnl_lock(); |
918 | for (; media_info_array[i] != NULL; i++) { | 918 | for (; media_info_array[i] != NULL; i++) { |
919 | err = __tipc_nl_add_media(&msg, media_info_array[i]); | 919 | err = __tipc_nl_add_media(&msg, media_info_array[i], |
920 | NLM_F_MULTI); | ||
920 | if (err) | 921 | if (err) |
921 | break; | 922 | break; |
922 | } | 923 | } |
@@ -963,7 +964,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) | |||
963 | goto err_out; | 964 | goto err_out; |
964 | } | 965 | } |
965 | 966 | ||
966 | err = __tipc_nl_add_media(&msg, media); | 967 | err = __tipc_nl_add_media(&msg, media, 0); |
967 | if (err) | 968 | if (err) |
968 | goto err_out; | 969 | goto err_out; |
969 | rtnl_unlock(); | 970 | rtnl_unlock(); |
diff --git a/net/tipc/link.c b/net/tipc/link.c index a6b30df6ec02..43a515dc97b0 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c | |||
@@ -1145,11 +1145,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) | |||
1145 | } | 1145 | } |
1146 | /* Synchronize with parallel link if applicable */ | 1146 | /* Synchronize with parallel link if applicable */ |
1147 | if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { | 1147 | if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { |
1148 | link_handle_out_of_seq_msg(l_ptr, skb); | 1148 | if (!link_synch(l_ptr)) |
1149 | if (link_synch(l_ptr)) | 1149 | goto unlock; |
1150 | link_retrieve_defq(l_ptr, &head); | ||
1151 | skb = NULL; | ||
1152 | goto unlock; | ||
1153 | } | 1150 | } |
1154 | l_ptr->next_in_no++; | 1151 | l_ptr->next_in_no++; |
1155 | if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) | 1152 | if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) |
@@ -2013,7 +2010,7 @@ msg_full: | |||
2013 | 2010 | ||
2014 | /* Caller should hold appropriate locks to protect the link */ | 2011 | /* Caller should hold appropriate locks to protect the link */ |
2015 | static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, | 2012 | static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, |
2016 | struct tipc_link *link) | 2013 | struct tipc_link *link, int nlflags) |
2017 | { | 2014 | { |
2018 | int err; | 2015 | int err; |
2019 | void *hdr; | 2016 | void *hdr; |
@@ -2022,7 +2019,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, | |||
2022 | struct tipc_net *tn = net_generic(net, tipc_net_id); | 2019 | struct tipc_net *tn = net_generic(net, tipc_net_id); |
2023 | 2020 | ||
2024 | hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, | 2021 | hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, |
2025 | NLM_F_MULTI, TIPC_NL_LINK_GET); | 2022 | nlflags, TIPC_NL_LINK_GET); |
2026 | if (!hdr) | 2023 | if (!hdr) |
2027 | return -EMSGSIZE; | 2024 | return -EMSGSIZE; |
2028 | 2025 | ||
@@ -2095,7 +2092,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, | |||
2095 | if (!node->links[i]) | 2092 | if (!node->links[i]) |
2096 | continue; | 2093 | continue; |
2097 | 2094 | ||
2098 | err = __tipc_nl_add_link(net, msg, node->links[i]); | 2095 | err = __tipc_nl_add_link(net, msg, node->links[i], NLM_F_MULTI); |
2099 | if (err) | 2096 | if (err) |
2100 | return err; | 2097 | return err; |
2101 | } | 2098 | } |
@@ -2143,7 +2140,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2143 | err = __tipc_nl_add_node_links(net, &msg, node, | 2140 | err = __tipc_nl_add_node_links(net, &msg, node, |
2144 | &prev_link); | 2141 | &prev_link); |
2145 | tipc_node_unlock(node); | 2142 | tipc_node_unlock(node); |
2146 | tipc_node_put(node); | ||
2147 | if (err) | 2143 | if (err) |
2148 | goto out; | 2144 | goto out; |
2149 | 2145 | ||
@@ -2210,7 +2206,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) | |||
2210 | goto err_out; | 2206 | goto err_out; |
2211 | } | 2207 | } |
2212 | 2208 | ||
2213 | err = __tipc_nl_add_link(net, &msg, link); | 2209 | err = __tipc_nl_add_link(net, &msg, link, 0); |
2214 | if (err) | 2210 | if (err) |
2215 | goto err_out; | 2211 | goto err_out; |
2216 | 2212 | ||
diff --git a/net/tipc/server.c b/net/tipc/server.c index ab6183cdb121..77ff03ed1e18 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c | |||
@@ -102,7 +102,7 @@ static void tipc_conn_kref_release(struct kref *kref) | |||
102 | } | 102 | } |
103 | saddr->scope = -TIPC_NODE_SCOPE; | 103 | saddr->scope = -TIPC_NODE_SCOPE; |
104 | kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); | 104 | kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); |
105 | sk_release_kernel(sk); | 105 | sock_release(sock); |
106 | con->sock = NULL; | 106 | con->sock = NULL; |
107 | } | 107 | } |
108 | 108 | ||
@@ -321,12 +321,9 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) | |||
321 | struct socket *sock = NULL; | 321 | struct socket *sock = NULL; |
322 | int ret; | 322 | int ret; |
323 | 323 | ||
324 | ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); | 324 | ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); |
325 | if (ret < 0) | 325 | if (ret < 0) |
326 | return NULL; | 326 | return NULL; |
327 | |||
328 | sk_change_net(sock->sk, s->net); | ||
329 | |||
330 | ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, | 327 | ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, |
331 | (char *)&s->imp, sizeof(s->imp)); | 328 | (char *)&s->imp, sizeof(s->imp)); |
332 | if (ret < 0) | 329 | if (ret < 0) |
@@ -376,7 +373,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) | |||
376 | 373 | ||
377 | create_err: | 374 | create_err: |
378 | kernel_sock_shutdown(sock, SHUT_RDWR); | 375 | kernel_sock_shutdown(sock, SHUT_RDWR); |
379 | sk_release_kernel(sock->sk); | 376 | sock_release(sock); |
380 | return NULL; | 377 | return NULL; |
381 | } | 378 | } |
382 | 379 | ||
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ee90d74d7516..9074b5cede38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c | |||
@@ -1764,13 +1764,14 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, | |||
1764 | int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) | 1764 | int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) |
1765 | { | 1765 | { |
1766 | u32 dnode, dport = 0; | 1766 | u32 dnode, dport = 0; |
1767 | int err = -TIPC_ERR_NO_PORT; | 1767 | int err; |
1768 | struct sk_buff *skb; | 1768 | struct sk_buff *skb; |
1769 | struct tipc_sock *tsk; | 1769 | struct tipc_sock *tsk; |
1770 | struct tipc_net *tn; | 1770 | struct tipc_net *tn; |
1771 | struct sock *sk; | 1771 | struct sock *sk; |
1772 | 1772 | ||
1773 | while (skb_queue_len(inputq)) { | 1773 | while (skb_queue_len(inputq)) { |
1774 | err = -TIPC_ERR_NO_PORT; | ||
1774 | skb = NULL; | 1775 | skb = NULL; |
1775 | dport = tipc_skb_peek_port(inputq, dport); | 1776 | dport = tipc_skb_peek_port(inputq, dport); |
1776 | tsk = tipc_sk_lookup(net, dport); | 1777 | tsk = tipc_sk_lookup(net, dport); |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 433f287ee548..5266ea7b922b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -305,7 +305,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) | |||
305 | &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { | 305 | &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { |
306 | struct dentry *dentry = unix_sk(s)->path.dentry; | 306 | struct dentry *dentry = unix_sk(s)->path.dentry; |
307 | 307 | ||
308 | if (dentry && dentry->d_inode == i) { | 308 | if (dentry && d_backing_inode(dentry) == i) { |
309 | sock_hold(s); | 309 | sock_hold(s); |
310 | goto found; | 310 | goto found; |
311 | } | 311 | } |
@@ -778,7 +778,7 @@ static struct sock *unix_find_other(struct net *net, | |||
778 | err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); | 778 | err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); |
779 | if (err) | 779 | if (err) |
780 | goto fail; | 780 | goto fail; |
781 | inode = path.dentry->d_inode; | 781 | inode = d_backing_inode(path.dentry); |
782 | err = inode_permission(inode, MAY_WRITE); | 782 | err = inode_permission(inode, MAY_WRITE); |
783 | if (err) | 783 | if (err) |
784 | goto put_fail; | 784 | goto put_fail; |
@@ -839,7 +839,7 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) | |||
839 | */ | 839 | */ |
840 | err = security_path_mknod(&path, dentry, mode, 0); | 840 | err = security_path_mknod(&path, dentry, mode, 0); |
841 | if (!err) { | 841 | if (!err) { |
842 | err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); | 842 | err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); |
843 | if (!err) { | 843 | if (!err) { |
844 | res->mnt = mntget(path.mnt); | 844 | res->mnt = mntget(path.mnt); |
845 | res->dentry = dget(dentry); | 845 | res->dentry = dget(dentry); |
@@ -905,7 +905,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
905 | goto out_up; | 905 | goto out_up; |
906 | } | 906 | } |
907 | addr->hash = UNIX_HASH_SIZE; | 907 | addr->hash = UNIX_HASH_SIZE; |
908 | hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1); | 908 | hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); |
909 | spin_lock(&unix_table_lock); | 909 | spin_lock(&unix_table_lock); |
910 | u->path = path; | 910 | u->path = path; |
911 | list = &unix_socket_table[hash]; | 911 | list = &unix_socket_table[hash]; |
diff --git a/net/unix/diag.c b/net/unix/diag.c index ef542fbca9fe..c512f64d5287 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c | |||
@@ -25,7 +25,7 @@ static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb) | |||
25 | 25 | ||
26 | if (dentry) { | 26 | if (dentry) { |
27 | struct unix_diag_vfs uv = { | 27 | struct unix_diag_vfs uv = { |
28 | .udiag_vfs_ino = dentry->d_inode->i_ino, | 28 | .udiag_vfs_ino = d_backing_inode(dentry)->i_ino, |
29 | .udiag_vfs_dev = dentry->d_sb->s_dev, | 29 | .udiag_vfs_dev = dentry->d_sb->s_dev, |
30 | }; | 30 | }; |
31 | 31 | ||
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 99f7012b23b9..a73a226f2d33 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
@@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); | |||
95 | 95 | ||
96 | unsigned int unix_tot_inflight; | 96 | unsigned int unix_tot_inflight; |
97 | 97 | ||
98 | |||
99 | struct sock *unix_get_socket(struct file *filp) | 98 | struct sock *unix_get_socket(struct file *filp) |
100 | { | 99 | { |
101 | struct sock *u_sock = NULL; | 100 | struct sock *u_sock = NULL; |
102 | struct inode *inode = file_inode(filp); | 101 | struct inode *inode = file_inode(filp); |
103 | 102 | ||
104 | /* | 103 | /* Socket ? */ |
105 | * Socket ? | ||
106 | */ | ||
107 | if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { | 104 | if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { |
108 | struct socket *sock = SOCKET_I(inode); | 105 | struct socket *sock = SOCKET_I(inode); |
109 | struct sock *s = sock->sk; | 106 | struct sock *s = sock->sk; |
110 | 107 | ||
111 | /* | 108 | /* PF_UNIX ? */ |
112 | * PF_UNIX ? | ||
113 | */ | ||
114 | if (s && sock->ops && sock->ops->family == PF_UNIX) | 109 | if (s && sock->ops && sock->ops->family == PF_UNIX) |
115 | u_sock = s; | 110 | u_sock = s; |
116 | } | 111 | } |
117 | return u_sock; | 112 | return u_sock; |
118 | } | 113 | } |
119 | 114 | ||
120 | /* | 115 | /* Keep the number of times in flight count for the file |
121 | * Keep the number of times in flight count for the file | 116 | * descriptor if it is for an AF_UNIX socket. |
122 | * descriptor if it is for an AF_UNIX socket. | ||
123 | */ | 117 | */ |
124 | 118 | ||
125 | void unix_inflight(struct file *fp) | 119 | void unix_inflight(struct file *fp) |
126 | { | 120 | { |
127 | struct sock *s = unix_get_socket(fp); | 121 | struct sock *s = unix_get_socket(fp); |
122 | |||
128 | if (s) { | 123 | if (s) { |
129 | struct unix_sock *u = unix_sk(s); | 124 | struct unix_sock *u = unix_sk(s); |
125 | |||
130 | spin_lock(&unix_gc_lock); | 126 | spin_lock(&unix_gc_lock); |
127 | |||
131 | if (atomic_long_inc_return(&u->inflight) == 1) { | 128 | if (atomic_long_inc_return(&u->inflight) == 1) { |
132 | BUG_ON(!list_empty(&u->link)); | 129 | BUG_ON(!list_empty(&u->link)); |
133 | list_add_tail(&u->link, &gc_inflight_list); | 130 | list_add_tail(&u->link, &gc_inflight_list); |
@@ -142,10 +139,13 @@ void unix_inflight(struct file *fp) | |||
142 | void unix_notinflight(struct file *fp) | 139 | void unix_notinflight(struct file *fp) |
143 | { | 140 | { |
144 | struct sock *s = unix_get_socket(fp); | 141 | struct sock *s = unix_get_socket(fp); |
142 | |||
145 | if (s) { | 143 | if (s) { |
146 | struct unix_sock *u = unix_sk(s); | 144 | struct unix_sock *u = unix_sk(s); |
145 | |||
147 | spin_lock(&unix_gc_lock); | 146 | spin_lock(&unix_gc_lock); |
148 | BUG_ON(list_empty(&u->link)); | 147 | BUG_ON(list_empty(&u->link)); |
148 | |||
149 | if (atomic_long_dec_and_test(&u->inflight)) | 149 | if (atomic_long_dec_and_test(&u->inflight)) |
150 | list_del_init(&u->link); | 150 | list_del_init(&u->link); |
151 | unix_tot_inflight--; | 151 | unix_tot_inflight--; |
@@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
161 | 161 | ||
162 | spin_lock(&x->sk_receive_queue.lock); | 162 | spin_lock(&x->sk_receive_queue.lock); |
163 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { | 163 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { |
164 | /* | 164 | /* Do we have file descriptors ? */ |
165 | * Do we have file descriptors ? | ||
166 | */ | ||
167 | if (UNIXCB(skb).fp) { | 165 | if (UNIXCB(skb).fp) { |
168 | bool hit = false; | 166 | bool hit = false; |
169 | /* | 167 | /* Process the descriptors of this socket */ |
170 | * Process the descriptors of this socket | ||
171 | */ | ||
172 | int nfd = UNIXCB(skb).fp->count; | 168 | int nfd = UNIXCB(skb).fp->count; |
173 | struct file **fp = UNIXCB(skb).fp->fp; | 169 | struct file **fp = UNIXCB(skb).fp->fp; |
170 | |||
174 | while (nfd--) { | 171 | while (nfd--) { |
175 | /* | 172 | /* Get the socket the fd matches if it indeed does so */ |
176 | * Get the socket the fd matches | ||
177 | * if it indeed does so | ||
178 | */ | ||
179 | struct sock *sk = unix_get_socket(*fp++); | 173 | struct sock *sk = unix_get_socket(*fp++); |
174 | |||
180 | if (sk) { | 175 | if (sk) { |
181 | struct unix_sock *u = unix_sk(sk); | 176 | struct unix_sock *u = unix_sk(sk); |
182 | 177 | ||
183 | /* | 178 | /* Ignore non-candidates, they could |
184 | * Ignore non-candidates, they could | ||
185 | * have been added to the queues after | 179 | * have been added to the queues after |
186 | * starting the garbage collection | 180 | * starting the garbage collection |
187 | */ | 181 | */ |
188 | if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { | 182 | if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { |
189 | hit = true; | 183 | hit = true; |
184 | |||
190 | func(u); | 185 | func(u); |
191 | } | 186 | } |
192 | } | 187 | } |
@@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
203 | static void scan_children(struct sock *x, void (*func)(struct unix_sock *), | 198 | static void scan_children(struct sock *x, void (*func)(struct unix_sock *), |
204 | struct sk_buff_head *hitlist) | 199 | struct sk_buff_head *hitlist) |
205 | { | 200 | { |
206 | if (x->sk_state != TCP_LISTEN) | 201 | if (x->sk_state != TCP_LISTEN) { |
207 | scan_inflight(x, func, hitlist); | 202 | scan_inflight(x, func, hitlist); |
208 | else { | 203 | } else { |
209 | struct sk_buff *skb; | 204 | struct sk_buff *skb; |
210 | struct sk_buff *next; | 205 | struct sk_buff *next; |
211 | struct unix_sock *u; | 206 | struct unix_sock *u; |
212 | LIST_HEAD(embryos); | 207 | LIST_HEAD(embryos); |
213 | 208 | ||
214 | /* | 209 | /* For a listening socket collect the queued embryos |
215 | * For a listening socket collect the queued embryos | ||
216 | * and perform a scan on them as well. | 210 | * and perform a scan on them as well. |
217 | */ | 211 | */ |
218 | spin_lock(&x->sk_receive_queue.lock); | 212 | spin_lock(&x->sk_receive_queue.lock); |
219 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { | 213 | skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { |
220 | u = unix_sk(skb->sk); | 214 | u = unix_sk(skb->sk); |
221 | 215 | ||
222 | /* | 216 | /* An embryo cannot be in-flight, so it's safe |
223 | * An embryo cannot be in-flight, so it's safe | ||
224 | * to use the list link. | 217 | * to use the list link. |
225 | */ | 218 | */ |
226 | BUG_ON(!list_empty(&u->link)); | 219 | BUG_ON(!list_empty(&u->link)); |
@@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk) | |||
249 | static void inc_inflight_move_tail(struct unix_sock *u) | 242 | static void inc_inflight_move_tail(struct unix_sock *u) |
250 | { | 243 | { |
251 | atomic_long_inc(&u->inflight); | 244 | atomic_long_inc(&u->inflight); |
252 | /* | 245 | /* If this still might be part of a cycle, move it to the end |
253 | * If this still might be part of a cycle, move it to the end | ||
254 | * of the list, so that it's checked even if it was already | 246 | * of the list, so that it's checked even if it was already |
255 | * passed over | 247 | * passed over |
256 | */ | 248 | */ |
@@ -263,8 +255,7 @@ static bool gc_in_progress; | |||
263 | 255 | ||
264 | void wait_for_unix_gc(void) | 256 | void wait_for_unix_gc(void) |
265 | { | 257 | { |
266 | /* | 258 | /* If number of inflight sockets is insane, |
267 | * If number of inflight sockets is insane, | ||
268 | * force a garbage collect right now. | 259 | * force a garbage collect right now. |
269 | */ | 260 | */ |
270 | if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) | 261 | if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) |
@@ -288,8 +279,7 @@ void unix_gc(void) | |||
288 | goto out; | 279 | goto out; |
289 | 280 | ||
290 | gc_in_progress = true; | 281 | gc_in_progress = true; |
291 | /* | 282 | /* First, select candidates for garbage collection. Only |
292 | * First, select candidates for garbage collection. Only | ||
293 | * in-flight sockets are considered, and from those only ones | 283 | * in-flight sockets are considered, and from those only ones |
294 | * which don't have any external reference. | 284 | * which don't have any external reference. |
295 | * | 285 | * |
@@ -320,15 +310,13 @@ void unix_gc(void) | |||
320 | } | 310 | } |
321 | } | 311 | } |
322 | 312 | ||
323 | /* | 313 | /* Now remove all internal in-flight reference to children of |
324 | * Now remove all internal in-flight reference to children of | ||
325 | * the candidates. | 314 | * the candidates. |
326 | */ | 315 | */ |
327 | list_for_each_entry(u, &gc_candidates, link) | 316 | list_for_each_entry(u, &gc_candidates, link) |
328 | scan_children(&u->sk, dec_inflight, NULL); | 317 | scan_children(&u->sk, dec_inflight, NULL); |
329 | 318 | ||
330 | /* | 319 | /* Restore the references for children of all candidates, |
331 | * Restore the references for children of all candidates, | ||
332 | * which have remaining references. Do this recursively, so | 320 | * which have remaining references. Do this recursively, so |
333 | * only those remain, which form cyclic references. | 321 | * only those remain, which form cyclic references. |
334 | * | 322 | * |
@@ -350,8 +338,7 @@ void unix_gc(void) | |||
350 | } | 338 | } |
351 | list_del(&cursor); | 339 | list_del(&cursor); |
352 | 340 | ||
353 | /* | 341 | /* not_cycle_list contains those sockets which do not make up a |
354 | * not_cycle_list contains those sockets which do not make up a | ||
355 | * cycle. Restore these to the inflight list. | 342 | * cycle. Restore these to the inflight list. |
356 | */ | 343 | */ |
357 | while (!list_empty(¬_cycle_list)) { | 344 | while (!list_empty(¬_cycle_list)) { |
@@ -360,8 +347,7 @@ void unix_gc(void) | |||
360 | list_move_tail(&u->link, &gc_inflight_list); | 347 | list_move_tail(&u->link, &gc_inflight_list); |
361 | } | 348 | } |
362 | 349 | ||
363 | /* | 350 | /* Now gc_candidates contains only garbage. Restore original |
364 | * Now gc_candidates contains only garbage. Restore original | ||
365 | * inflight counters for these as well, and remove the skbuffs | 351 | * inflight counters for these as well, and remove the skbuffs |
366 | * which are creating the cycle(s). | 352 | * which are creating the cycle(s). |
367 | */ | 353 | */ |