aboutsummaryrefslogtreecommitdiffstats
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
authorDenis V. Lunev <den@openvz.org>2007-10-11 00:15:29 -0400
committerDavid S. Miller <davem@davemloft.net>2007-10-11 00:15:29 -0400
commitcd40b7d3983c708aabe3d3008ec64ffce56d33b0 (patch)
tree0d6fe9cfd2f03fdeee126e317d4bfb145afc458d /net/netlink/af_netlink.c
parentaed815601f3f95281ab3a01f7e2cbe1bd54285a0 (diff)
[NET]: make netlink user -> kernel interface synchronious
This patch make processing netlink user -> kernel messages synchronious. This change was inspired by the talk with Alexey Kuznetsov about current netlink messages processing. He says that he was badly wrong when introduced asynchronious user -> kernel communication. The call netlink_unicast is the only path to send message to the kernel netlink socket. But, unfortunately, it is also used to send data to the user. Before this change the user message has been attached to the socket queue and sk->sk_data_ready was called. The process has been blocked until all pending messages were processed. The bad thing is that this processing may occur in the arbitrary process context. This patch changes nlk->data_ready callback to get 1 skb and force packet processing right in the netlink_unicast. Kernel -> user path in netlink_unicast remains untouched. EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock drop, but the process remains in the cycle until the message will be fully processed. So, there is no need to use this kludges now. Signed-off-by: Denis V. Lunev <den@openvz.org> Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c152
1 files changed, 47 insertions, 105 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4ce7dcbcb6ef..c776bcd9f825 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -80,7 +80,7 @@ struct netlink_sock {
80 struct netlink_callback *cb; 80 struct netlink_callback *cb;
81 struct mutex *cb_mutex; 81 struct mutex *cb_mutex;
82 struct mutex cb_def_mutex; 82 struct mutex cb_def_mutex;
83 void (*data_ready)(struct sock *sk, int bytes); 83 void (*netlink_rcv)(struct sk_buff *skb);
84 struct module *module; 84 struct module *module;
85}; 85};
86 86
@@ -127,7 +127,6 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
127 127
128static int netlink_dump(struct sock *sk); 128static int netlink_dump(struct sock *sk);
129static void netlink_destroy_callback(struct netlink_callback *cb); 129static void netlink_destroy_callback(struct netlink_callback *cb);
130static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
131 130
132static DEFINE_RWLOCK(nl_table_lock); 131static DEFINE_RWLOCK(nl_table_lock);
133static atomic_t nl_table_users = ATOMIC_INIT(0); 132static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -709,21 +708,17 @@ static void netlink_overrun(struct sock *sk)
709 708
710static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 709static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
711{ 710{
712 int protocol = ssk->sk_protocol;
713 struct net *net;
714 struct sock *sock; 711 struct sock *sock;
715 struct netlink_sock *nlk; 712 struct netlink_sock *nlk;
716 713
717 net = ssk->sk_net; 714 sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid);
718 sock = netlink_lookup(net, protocol, pid);
719 if (!sock) 715 if (!sock)
720 return ERR_PTR(-ECONNREFUSED); 716 return ERR_PTR(-ECONNREFUSED);
721 717
722 /* Don't bother queuing skb if kernel socket has no input function */ 718 /* Don't bother queuing skb if kernel socket has no input function */
723 nlk = nlk_sk(sock); 719 nlk = nlk_sk(sock);
724 if ((netlink_is_kernel(sock) && !nlk->data_ready) || 720 if (sock->sk_state == NETLINK_CONNECTED &&
725 (sock->sk_state == NETLINK_CONNECTED && 721 nlk->dst_pid != nlk_sk(ssk)->pid) {
726 nlk->dst_pid != nlk_sk(ssk)->pid)) {
727 sock_put(sock); 722 sock_put(sock);
728 return ERR_PTR(-ECONNREFUSED); 723 return ERR_PTR(-ECONNREFUSED);
729 } 724 }
@@ -837,7 +832,34 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
837 return skb; 832 return skb;
838} 833}
839 834
840int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) 835static inline void netlink_rcv_wake(struct sock *sk)
836{
837 struct netlink_sock *nlk = nlk_sk(sk);
838
839 if (skb_queue_empty(&sk->sk_receive_queue))
840 clear_bit(0, &nlk->state);
841 if (!test_bit(0, &nlk->state))
842 wake_up_interruptible(&nlk->wait);
843}
844
845static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
846{
847 int ret;
848 struct netlink_sock *nlk = nlk_sk(sk);
849
850 ret = -ECONNREFUSED;
851 if (nlk->netlink_rcv != NULL) {
852 ret = skb->len;
853 skb_set_owner_r(skb, sk);
854 nlk->netlink_rcv(skb);
855 }
856 kfree_skb(skb);
857 sock_put(sk);
858 return ret;
859}
860
861int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
862 u32 pid, int nonblock)
841{ 863{
842 struct sock *sk; 864 struct sock *sk;
843 int err; 865 int err;
@@ -852,6 +874,9 @@ retry:
852 kfree_skb(skb); 874 kfree_skb(skb);
853 return PTR_ERR(sk); 875 return PTR_ERR(sk);
854 } 876 }
877 if (netlink_is_kernel(sk))
878 return netlink_unicast_kernel(sk, skb);
879
855 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); 880 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
856 if (err == 1) 881 if (err == 1)
857 goto retry; 882 goto retry;
@@ -1151,16 +1176,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1151 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1176 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1152} 1177}
1153 1178
1154static inline void netlink_rcv_wake(struct sock *sk)
1155{
1156 struct netlink_sock *nlk = nlk_sk(sk);
1157
1158 if (skb_queue_empty(&sk->sk_receive_queue))
1159 clear_bit(0, &nlk->state);
1160 if (!test_bit(0, &nlk->state))
1161 wake_up_interruptible(&nlk->wait);
1162}
1163
1164static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1179static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1165 struct msghdr *msg, size_t len) 1180 struct msghdr *msg, size_t len)
1166{ 1181{
@@ -1308,11 +1323,7 @@ out:
1308 1323
1309static void netlink_data_ready(struct sock *sk, int len) 1324static void netlink_data_ready(struct sock *sk, int len)
1310{ 1325{
1311 struct netlink_sock *nlk = nlk_sk(sk); 1326 BUG();
1312
1313 if (nlk->data_ready)
1314 nlk->data_ready(sk, len);
1315 netlink_rcv_wake(sk);
1316} 1327}
1317 1328
1318/* 1329/*
@@ -1323,7 +1334,7 @@ static void netlink_data_ready(struct sock *sk, int len)
1323 1334
1324struct sock * 1335struct sock *
1325netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1336netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1326 void (*input)(struct sock *sk, int len), 1337 void (*input)(struct sk_buff *skb),
1327 struct mutex *cb_mutex, struct module *module) 1338 struct mutex *cb_mutex, struct module *module)
1328{ 1339{
1329 struct socket *sock; 1340 struct socket *sock;
@@ -1352,7 +1363,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1352 sk = sock->sk; 1363 sk = sock->sk;
1353 sk->sk_data_ready = netlink_data_ready; 1364 sk->sk_data_ready = netlink_data_ready;
1354 if (input) 1365 if (input)
1355 nlk_sk(sk)->data_ready = input; 1366 nlk_sk(sk)->netlink_rcv = input;
1356 1367
1357 if (netlink_insert(sk, net, 0)) 1368 if (netlink_insert(sk, net, 0))
1358 goto out_sock_release; 1369 goto out_sock_release;
@@ -1552,12 +1563,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1552 1563
1553 netlink_dump(sk); 1564 netlink_dump(sk);
1554 sock_put(sk); 1565 sock_put(sk);
1555 1566 return 0;
1556 /* We successfully started a dump, by returning -EINTR we
1557 * signal the queue mangement to interrupt processing of
1558 * any netlink messages so userspace gets a chance to read
1559 * the results. */
1560 return -EINTR;
1561} 1567}
1562 1568
1563void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1569void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
@@ -1594,13 +1600,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1594 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1600 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1595} 1601}
1596 1602
1597static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1603int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1598 struct nlmsghdr *)) 1604 struct nlmsghdr *))
1599{ 1605{
1600 struct nlmsghdr *nlh; 1606 struct nlmsghdr *nlh;
1601 int err; 1607 int err;
1602 1608
1603 while (skb->len >= nlmsg_total_size(0)) { 1609 while (skb->len >= nlmsg_total_size(0)) {
1610 int msglen;
1611
1604 nlh = nlmsg_hdr(skb); 1612 nlh = nlmsg_hdr(skb);
1605 err = 0; 1613 err = 0;
1606 1614
@@ -1616,86 +1624,20 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1616 goto skip; 1624 goto skip;
1617 1625
1618 err = cb(skb, nlh); 1626 err = cb(skb, nlh);
1619 if (err == -EINTR) {
1620 /* Not an error, but we interrupt processing */
1621 netlink_queue_skip(nlh, skb);
1622 return err;
1623 }
1624skip: 1627skip:
1625 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1628 if (nlh->nlmsg_flags & NLM_F_ACK || err)
1626 netlink_ack(skb, nlh, err); 1629 netlink_ack(skb, nlh, err);
1627 1630
1628 netlink_queue_skip(nlh, skb); 1631 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1632 if (msglen > skb->len)
1633 msglen = skb->len;
1634 skb_pull(skb, msglen);
1629 } 1635 }
1630 1636
1631 return 0; 1637 return 0;
1632} 1638}
1633 1639
1634/** 1640/**
1635 * nelink_run_queue - Process netlink receive queue.
1636 * @sk: Netlink socket containing the queue
1637 * @qlen: Initial queue length
1638 * @cb: Callback function invoked for each netlink message found
1639 *
1640 * Processes as much as there was in the queue upon entry and invokes
1641 * a callback function for each netlink message found. The callback
1642 * function may refuse a message by returning a negative error code
1643 * but setting the error pointer to 0 in which case this function
1644 * returns with a qlen != 0.
1645 *
1646 * qlen must be initialized to 0 before the initial entry, afterwards
1647 * the function may be called repeatedly until the returned qlen is 0.
1648 *
1649 * The callback function may return -EINTR to signal that processing
1650 * of netlink messages shall be interrupted. In this case the message
1651 * currently being processed will NOT be requeued onto the receive
1652 * queue.
1653 */
1654unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen,
1655 int (*cb)(struct sk_buff *, struct nlmsghdr *))
1656{
1657 struct sk_buff *skb;
1658
1659 if (!qlen || qlen > skb_queue_len(&sk->sk_receive_queue))
1660 qlen = skb_queue_len(&sk->sk_receive_queue);
1661
1662 for (; qlen; qlen--) {
1663 skb = skb_dequeue(&sk->sk_receive_queue);
1664 if (netlink_rcv_skb(skb, cb)) {
1665 if (skb->len)
1666 skb_queue_head(&sk->sk_receive_queue, skb);
1667 else {
1668 kfree_skb(skb);
1669 qlen--;
1670 }
1671 break;
1672 }
1673
1674 kfree_skb(skb);
1675 }
1676
1677 return qlen;
1678}
1679
1680/**
1681 * netlink_queue_skip - Skip netlink message while processing queue.
1682 * @nlh: Netlink message to be skipped
1683 * @skb: Socket buffer containing the netlink messages.
1684 *
1685 * Pulls the given netlink message off the socket buffer so the next
1686 * call to netlink_queue_run() will not reconsider the message.
1687 */
1688static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
1689{
1690 int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1691
1692 if (msglen > skb->len)
1693 msglen = skb->len;
1694
1695 skb_pull(skb, msglen);
1696}
1697
1698/**
1699 * nlmsg_notify - send a notification netlink message 1641 * nlmsg_notify - send a notification netlink message
1700 * @sk: netlink socket to use 1642 * @sk: netlink socket to use
1701 * @skb: notification message 1643 * @skb: notification message
@@ -1998,7 +1940,7 @@ panic:
1998core_initcall(netlink_proto_init); 1940core_initcall(netlink_proto_init);
1999 1941
2000EXPORT_SYMBOL(netlink_ack); 1942EXPORT_SYMBOL(netlink_ack);
2001EXPORT_SYMBOL(netlink_run_queue); 1943EXPORT_SYMBOL(netlink_rcv_skb);
2002EXPORT_SYMBOL(netlink_broadcast); 1944EXPORT_SYMBOL(netlink_broadcast);
2003EXPORT_SYMBOL(netlink_dump_start); 1945EXPORT_SYMBOL(netlink_dump_start);
2004EXPORT_SYMBOL(netlink_kernel_create); 1946EXPORT_SYMBOL(netlink_kernel_create);