aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc/socket.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-17 19:26:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-17 19:26:30 -0400
commita7fd20d1c476af4563e66865213474a2f9f473a4 (patch)
treefb1399e2f82842450245fb058a8fb23c52865f43 /net/tipc/socket.c
parentb80fed9595513384424cd141923c9161c4b5021b (diff)
parent917fa5353da05e8a0045b8acacba8d50400d5b12 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support SPI based w5100 devices, from Akinobu Mita. 2) Partial Segmentation Offload, from Alexander Duyck. 3) Add GMAC4 support to stmmac driver, from Alexandre TORGUE. 4) Allow cls_flower stats offload, from Amir Vadai. 5) Implement bpf blinding, from Daniel Borkmann. 6) Optimize _ASYNC_ bit twiddling on sockets, unless the socket is actually using FASYNC these atomics are superfluous. From Eric Dumazet. 7) Run TCP more preemptibly, also from Eric Dumazet. 8) Support LED blinking, EEPROM dumps, and rxvlan offloading in mlx5e driver, from Gal Pressman. 9) Allow creating ppp devices via rtnetlink, from Guillaume Nault. 10) Improve BPF usage documentation, from Jesper Dangaard Brouer. 11) Support tunneling offloads in qed, from Manish Chopra. 12) aRFS offloading in mlx5e, from Maor Gottlieb. 13) Add RFS and RPS support to SCTP protocol, from Marcelo Ricardo Leitner. 14) Add MSG_EOR support to TCP, this allows controlling packet coalescing on application record boundaries for more accurate socket timestamp sampling. From Martin KaFai Lau. 15) Fix alignment of 64-bit netlink attributes across the board, from Nicolas Dichtel. 16) Per-vlan stats in bridging, from Nikolay Aleksandrov. 17) Several conversions of drivers to ethtool ksettings, from Philippe Reynes. 18) Checksum neutral ILA in ipv6, from Tom Herbert. 19) Factorize all of the various marvell dsa drivers into one, from Vivien Didelot 20) Add VF support to qed driver, from Yuval Mintz" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1649 commits) Revert "phy dp83867: Fix compilation with CONFIG_OF_MDIO=m" Revert "phy dp83867: Make rgmii parameters optional" r8169: default to 64-bit DMA on recent PCIe chips phy dp83867: Make rgmii parameters optional phy dp83867: Fix compilation with CONFIG_OF_MDIO=m bpf: arm64: remove callee-save registers use for tmp registers asix: Fix offset calculation in asix_rx_fixup() causing slow transmissions switchdev: pass pointer to fib_info instead of copy net_sched: close another race condition in tcf_mirred_release() tipc: fix nametable publication field in nl compat drivers: net: Don't print unpopulated net_device name qed: add support for dcbx. ravb: Add missing free_irq() calls to ravb_close() qed: Remove a stray tab net: ethernet: fec-mpc52xx: use phy_ethtool_{get|set}_link_ksettings net: ethernet: fec-mpc52xx: use phydev from struct net_device bpf, doc: fix typo on bpf_asm descriptions stmmac: hardware TX COE doesn't work when force_thresh_dma_mode is set net: ethernet: fs-enet: use phy_ethtool_{get|set}_link_ksettings net: ethernet: fs-enet: use phydev from struct net_device ...
Diffstat (limited to 'net/tipc/socket.c')
-rw-r--r--net/tipc/socket.c147
1 files changed, 98 insertions, 49 deletions
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3eeb50a27b89..3b7a79991d55 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -96,8 +96,11 @@ struct tipc_sock {
96 uint conn_timeout; 96 uint conn_timeout;
97 atomic_t dupl_rcvcnt; 97 atomic_t dupl_rcvcnt;
98 bool link_cong; 98 bool link_cong;
99 uint sent_unacked; 99 u16 snt_unacked;
100 uint rcv_unacked; 100 u16 snd_win;
101 u16 peer_caps;
102 u16 rcv_unacked;
103 u16 rcv_win;
101 struct sockaddr_tipc remote; 104 struct sockaddr_tipc remote;
102 struct rhash_head node; 105 struct rhash_head node;
103 struct rcu_head rcu; 106 struct rcu_head rcu;
@@ -227,9 +230,29 @@ static struct tipc_sock *tipc_sk(const struct sock *sk)
227 return container_of(sk, struct tipc_sock, sk); 230 return container_of(sk, struct tipc_sock, sk);
228} 231}
229 232
230static int tsk_conn_cong(struct tipc_sock *tsk) 233static bool tsk_conn_cong(struct tipc_sock *tsk)
231{ 234{
232 return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; 235 return tsk->snt_unacked >= tsk->snd_win;
236}
237
238/* tsk_blocks(): translate a buffer size in bytes to number of
239 * advertisable blocks, taking into account the ratio truesize(len)/len
240 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
241 */
242static u16 tsk_adv_blocks(int len)
243{
244 return len / FLOWCTL_BLK_SZ / 4;
245}
246
247/* tsk_inc(): increment counter for sent or received data
248 * - If block based flow control is not supported by peer we
249 * fall back to message based ditto, incrementing the counter
250 */
251static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
252{
253 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
254 return ((msglen / FLOWCTL_BLK_SZ) + 1);
255 return 1;
233} 256}
234 257
235/** 258/**
@@ -377,9 +400,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
377 sk->sk_write_space = tipc_write_space; 400 sk->sk_write_space = tipc_write_space;
378 sk->sk_destruct = tipc_sock_destruct; 401 sk->sk_destruct = tipc_sock_destruct;
379 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; 402 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
380 tsk->sent_unacked = 0;
381 atomic_set(&tsk->dupl_rcvcnt, 0); 403 atomic_set(&tsk->dupl_rcvcnt, 0);
382 404
405 /* Start out with safe limits until we receive an advertised window */
406 tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
407 tsk->rcv_win = tsk->snd_win;
408
383 if (sock->state == SS_READY) { 409 if (sock->state == SS_READY) {
384 tsk_set_unreturnable(tsk, true); 410 tsk_set_unreturnable(tsk, true);
385 if (sock->type == SOCK_DGRAM) 411 if (sock->type == SOCK_DGRAM)
@@ -775,7 +801,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
775 struct sock *sk = &tsk->sk; 801 struct sock *sk = &tsk->sk;
776 struct tipc_msg *hdr = buf_msg(skb); 802 struct tipc_msg *hdr = buf_msg(skb);
777 int mtyp = msg_type(hdr); 803 int mtyp = msg_type(hdr);
778 int conn_cong; 804 bool conn_cong;
779 805
780 /* Ignore if connection cannot be validated: */ 806 /* Ignore if connection cannot be validated: */
781 if (!tsk_peer_msg(tsk, hdr)) 807 if (!tsk_peer_msg(tsk, hdr))
@@ -789,7 +815,9 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
789 return; 815 return;
790 } else if (mtyp == CONN_ACK) { 816 } else if (mtyp == CONN_ACK) {
791 conn_cong = tsk_conn_cong(tsk); 817 conn_cong = tsk_conn_cong(tsk);
792 tsk->sent_unacked -= msg_msgcnt(hdr); 818 tsk->snt_unacked -= msg_conn_ack(hdr);
819 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
820 tsk->snd_win = msg_adv_win(hdr);
793 if (conn_cong) 821 if (conn_cong)
794 sk->sk_write_space(sk); 822 sk->sk_write_space(sk);
795 } else if (mtyp != CONN_PROBE_REPLY) { 823 } else if (mtyp != CONN_PROBE_REPLY) {
@@ -1020,12 +1048,14 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
1020 u32 dnode; 1048 u32 dnode;
1021 uint mtu, send, sent = 0; 1049 uint mtu, send, sent = 0;
1022 struct iov_iter save; 1050 struct iov_iter save;
1051 int hlen = MIN_H_SIZE;
1023 1052
1024 /* Handle implied connection establishment */ 1053 /* Handle implied connection establishment */
1025 if (unlikely(dest)) { 1054 if (unlikely(dest)) {
1026 rc = __tipc_sendmsg(sock, m, dsz); 1055 rc = __tipc_sendmsg(sock, m, dsz);
1056 hlen = msg_hdr_sz(mhdr);
1027 if (dsz && (dsz == rc)) 1057 if (dsz && (dsz == rc))
1028 tsk->sent_unacked = 1; 1058 tsk->snt_unacked = tsk_inc(tsk, dsz + hlen);
1029 return rc; 1059 return rc;
1030 } 1060 }
1031 if (dsz > (uint)INT_MAX) 1061 if (dsz > (uint)INT_MAX)
@@ -1054,7 +1084,7 @@ next:
1054 if (likely(!tsk_conn_cong(tsk))) { 1084 if (likely(!tsk_conn_cong(tsk))) {
1055 rc = tipc_node_xmit(net, &pktchain, dnode, portid); 1085 rc = tipc_node_xmit(net, &pktchain, dnode, portid);
1056 if (likely(!rc)) { 1086 if (likely(!rc)) {
1057 tsk->sent_unacked++; 1087 tsk->snt_unacked += tsk_inc(tsk, send + hlen);
1058 sent += send; 1088 sent += send;
1059 if (sent == dsz) 1089 if (sent == dsz)
1060 return dsz; 1090 return dsz;
@@ -1118,6 +1148,13 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1118 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); 1148 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
1119 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); 1149 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
1120 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); 1150 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1151 tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1152 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
1153 return;
1154
1155 /* Fall back to message based flow control */
1156 tsk->rcv_win = FLOWCTL_MSG_WIN;
1157 tsk->snd_win = FLOWCTL_MSG_WIN;
1121} 1158}
1122 1159
1123/** 1160/**
@@ -1214,7 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
1214 return 0; 1251 return 0;
1215} 1252}
1216 1253
1217static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) 1254static void tipc_sk_send_ack(struct tipc_sock *tsk)
1218{ 1255{
1219 struct net *net = sock_net(&tsk->sk); 1256 struct net *net = sock_net(&tsk->sk);
1220 struct sk_buff *skb = NULL; 1257 struct sk_buff *skb = NULL;
@@ -1230,7 +1267,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
1230 if (!skb) 1267 if (!skb)
1231 return; 1268 return;
1232 msg = buf_msg(skb); 1269 msg = buf_msg(skb);
1233 msg_set_msgcnt(msg, ack); 1270 msg_set_conn_ack(msg, tsk->rcv_unacked);
1271 tsk->rcv_unacked = 0;
1272
1273 /* Adjust to and advertize the correct window limit */
1274 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
1275 tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
1276 msg_set_adv_win(msg, tsk->rcv_win);
1277 }
1234 tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); 1278 tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1235} 1279}
1236 1280
@@ -1288,7 +1332,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
1288 long timeo; 1332 long timeo;
1289 unsigned int sz; 1333 unsigned int sz;
1290 u32 err; 1334 u32 err;
1291 int res; 1335 int res, hlen;
1292 1336
1293 /* Catch invalid receive requests */ 1337 /* Catch invalid receive requests */
1294 if (unlikely(!buf_len)) 1338 if (unlikely(!buf_len))
@@ -1313,6 +1357,7 @@ restart:
1313 buf = skb_peek(&sk->sk_receive_queue); 1357 buf = skb_peek(&sk->sk_receive_queue);
1314 msg = buf_msg(buf); 1358 msg = buf_msg(buf);
1315 sz = msg_data_sz(msg); 1359 sz = msg_data_sz(msg);
1360 hlen = msg_hdr_sz(msg);
1316 err = msg_errcode(msg); 1361 err = msg_errcode(msg);
1317 1362
1318 /* Discard an empty non-errored message & try again */ 1363 /* Discard an empty non-errored message & try again */
@@ -1335,7 +1380,7 @@ restart:
1335 sz = buf_len; 1380 sz = buf_len;
1336 m->msg_flags |= MSG_TRUNC; 1381 m->msg_flags |= MSG_TRUNC;
1337 } 1382 }
1338 res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz); 1383 res = skb_copy_datagram_msg(buf, hlen, m, sz);
1339 if (res) 1384 if (res)
1340 goto exit; 1385 goto exit;
1341 res = sz; 1386 res = sz;
@@ -1347,15 +1392,15 @@ restart:
1347 res = -ECONNRESET; 1392 res = -ECONNRESET;
1348 } 1393 }
1349 1394
1350 /* Consume received message (optional) */ 1395 if (unlikely(flags & MSG_PEEK))
1351 if (likely(!(flags & MSG_PEEK))) { 1396 goto exit;
1352 if ((sock->state != SS_READY) && 1397
1353 (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { 1398 if (likely(sock->state != SS_READY)) {
1354 tipc_sk_send_ack(tsk, tsk->rcv_unacked); 1399 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
1355 tsk->rcv_unacked = 0; 1400 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
1356 } 1401 tipc_sk_send_ack(tsk);
1357 tsk_advance_rx_queue(sk);
1358 } 1402 }
1403 tsk_advance_rx_queue(sk);
1359exit: 1404exit:
1360 release_sock(sk); 1405 release_sock(sk);
1361 return res; 1406 return res;
@@ -1384,7 +1429,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
1384 int sz_to_copy, target, needed; 1429 int sz_to_copy, target, needed;
1385 int sz_copied = 0; 1430 int sz_copied = 0;
1386 u32 err; 1431 u32 err;
1387 int res = 0; 1432 int res = 0, hlen;
1388 1433
1389 /* Catch invalid receive attempts */ 1434 /* Catch invalid receive attempts */
1390 if (unlikely(!buf_len)) 1435 if (unlikely(!buf_len))
@@ -1410,6 +1455,7 @@ restart:
1410 buf = skb_peek(&sk->sk_receive_queue); 1455 buf = skb_peek(&sk->sk_receive_queue);
1411 msg = buf_msg(buf); 1456 msg = buf_msg(buf);
1412 sz = msg_data_sz(msg); 1457 sz = msg_data_sz(msg);
1458 hlen = msg_hdr_sz(msg);
1413 err = msg_errcode(msg); 1459 err = msg_errcode(msg);
1414 1460
1415 /* Discard an empty non-errored message & try again */ 1461 /* Discard an empty non-errored message & try again */
@@ -1434,8 +1480,7 @@ restart:
1434 needed = (buf_len - sz_copied); 1480 needed = (buf_len - sz_copied);
1435 sz_to_copy = (sz <= needed) ? sz : needed; 1481 sz_to_copy = (sz <= needed) ? sz : needed;
1436 1482
1437 res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset, 1483 res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
1438 m, sz_to_copy);
1439 if (res) 1484 if (res)
1440 goto exit; 1485 goto exit;
1441 1486
@@ -1457,20 +1502,18 @@ restart:
1457 res = -ECONNRESET; 1502 res = -ECONNRESET;
1458 } 1503 }
1459 1504
1460 /* Consume received message (optional) */ 1505 if (unlikely(flags & MSG_PEEK))
1461 if (likely(!(flags & MSG_PEEK))) { 1506 goto exit;
1462 if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { 1507
1463 tipc_sk_send_ack(tsk, tsk->rcv_unacked); 1508 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
1464 tsk->rcv_unacked = 0; 1509 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
1465 } 1510 tipc_sk_send_ack(tsk);
1466 tsk_advance_rx_queue(sk); 1511 tsk_advance_rx_queue(sk);
1467 }
1468 1512
1469 /* Loop around if more data is required */ 1513 /* Loop around if more data is required */
1470 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1514 if ((sz_copied < buf_len) && /* didn't get all requested data */
1471 (!skb_queue_empty(&sk->sk_receive_queue) || 1515 (!skb_queue_empty(&sk->sk_receive_queue) ||
1472 (sz_copied < target)) && /* and more is ready or required */ 1516 (sz_copied < target)) && /* and more is ready or required */
1473 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
1474 (!err)) /* and haven't reached a FIN */ 1517 (!err)) /* and haven't reached a FIN */
1475 goto restart; 1518 goto restart;
1476 1519
@@ -1602,30 +1645,33 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1602/** 1645/**
1603 * rcvbuf_limit - get proper overload limit of socket receive queue 1646 * rcvbuf_limit - get proper overload limit of socket receive queue
1604 * @sk: socket 1647 * @sk: socket
1605 * @buf: message 1648 * @skb: message
1606 * 1649 *
1607 * For all connection oriented messages, irrespective of importance, 1650 * For connection oriented messages, irrespective of importance,
1608 * the default overload value (i.e. 67MB) is set as limit. 1651 * default queue limit is 2 MB.
1609 * 1652 *
1610 * For all connectionless messages, by default new queue limits are 1653 * For connectionless messages, queue limits are based on message
1611 * as belows: 1654 * importance as follows:
1612 * 1655 *
1613 * TIPC_LOW_IMPORTANCE (4 MB) 1656 * TIPC_LOW_IMPORTANCE (2 MB)
1614 * TIPC_MEDIUM_IMPORTANCE (8 MB) 1657 * TIPC_MEDIUM_IMPORTANCE (4 MB)
1615 * TIPC_HIGH_IMPORTANCE (16 MB) 1658 * TIPC_HIGH_IMPORTANCE (8 MB)
1616 * TIPC_CRITICAL_IMPORTANCE (32 MB) 1659 * TIPC_CRITICAL_IMPORTANCE (16 MB)
1617 * 1660 *
1618 * Returns overload limit according to corresponding message importance 1661 * Returns overload limit according to corresponding message importance
1619 */ 1662 */
1620static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) 1663static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1621{ 1664{
1622 struct tipc_msg *msg = buf_msg(buf); 1665 struct tipc_sock *tsk = tipc_sk(sk);
1666 struct tipc_msg *hdr = buf_msg(skb);
1623 1667
1624 if (msg_connected(msg)) 1668 if (unlikely(!msg_connected(hdr)))
1625 return sysctl_tipc_rmem[2]; 1669 return sk->sk_rcvbuf << msg_importance(hdr);
1626 1670
1627 return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << 1671 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
1628 msg_importance(msg); 1672 return sk->sk_rcvbuf;
1673
1674 return FLOWCTL_MSG_LIM;
1629} 1675}
1630 1676
1631/** 1677/**
@@ -1748,7 +1794,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
1748 1794
1749 /* Try backlog, compensating for double-counted bytes */ 1795 /* Try backlog, compensating for double-counted bytes */
1750 dcnt = &tipc_sk(sk)->dupl_rcvcnt; 1796 dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1751 if (sk->sk_backlog.len) 1797 if (!sk->sk_backlog.len)
1752 atomic_set(dcnt, 0); 1798 atomic_set(dcnt, 0);
1753 lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); 1799 lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
1754 if (likely(!sk_add_backlog(sk, skb, lim))) 1800 if (likely(!sk_add_backlog(sk, skb, lim)))
@@ -2807,6 +2853,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
2807 if (err) 2853 if (err)
2808 return err; 2854 return err;
2809 2855
2856 if (!attrs[TIPC_NLA_SOCK])
2857 return -EINVAL;
2858
2810 err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, 2859 err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
2811 attrs[TIPC_NLA_SOCK], 2860 attrs[TIPC_NLA_SOCK],
2812 tipc_nl_sock_policy); 2861 tipc_nl_sock_policy);