diff options
author | Denis V. Lunev <den@openvz.org> | 2007-10-11 00:15:29 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2007-10-11 00:15:29 -0400 |
commit | cd40b7d3983c708aabe3d3008ec64ffce56d33b0 (patch) | |
tree | 0d6fe9cfd2f03fdeee126e317d4bfb145afc458d /net/netlink/af_netlink.c | |
parent | aed815601f3f95281ab3a01f7e2cbe1bd54285a0 (diff) |
[NET]: make netlink user -> kernel interface synchronious
This patch make processing netlink user -> kernel messages synchronious.
This change was inspired by the talk with Alexey Kuznetsov about current
netlink messages processing. He says that he was badly wrong when introduced
asynchronious user -> kernel communication.
The call netlink_unicast is the only path to send message to the kernel
netlink socket. But, unfortunately, it is also used to send data to the
user.
Before this change the user message has been attached to the socket queue
and sk->sk_data_ready was called. The process has been blocked until all
pending messages were processed. The bad thing is that this processing
may occur in the arbitrary process context.
This patch changes nlk->data_ready callback to get 1 skb and force packet
processing right in the netlink_unicast.
Kernel -> user path in netlink_unicast remains untouched.
EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock
drop, but the process remains in the cycle until the message will be fully
processed. So, there is no need to use this kludges now.
Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r-- | net/netlink/af_netlink.c | 152 |
1 files changed, 47 insertions, 105 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4ce7dcbcb6ef..c776bcd9f825 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -80,7 +80,7 @@ struct netlink_sock { | |||
80 | struct netlink_callback *cb; | 80 | struct netlink_callback *cb; |
81 | struct mutex *cb_mutex; | 81 | struct mutex *cb_mutex; |
82 | struct mutex cb_def_mutex; | 82 | struct mutex cb_def_mutex; |
83 | void (*data_ready)(struct sock *sk, int bytes); | 83 | void (*netlink_rcv)(struct sk_buff *skb); |
84 | struct module *module; | 84 | struct module *module; |
85 | }; | 85 | }; |
86 | 86 | ||
@@ -127,7 +127,6 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); | |||
127 | 127 | ||
128 | static int netlink_dump(struct sock *sk); | 128 | static int netlink_dump(struct sock *sk); |
129 | static void netlink_destroy_callback(struct netlink_callback *cb); | 129 | static void netlink_destroy_callback(struct netlink_callback *cb); |
130 | static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); | ||
131 | 130 | ||
132 | static DEFINE_RWLOCK(nl_table_lock); | 131 | static DEFINE_RWLOCK(nl_table_lock); |
133 | static atomic_t nl_table_users = ATOMIC_INIT(0); | 132 | static atomic_t nl_table_users = ATOMIC_INIT(0); |
@@ -709,21 +708,17 @@ static void netlink_overrun(struct sock *sk) | |||
709 | 708 | ||
710 | static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) | 709 | static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) |
711 | { | 710 | { |
712 | int protocol = ssk->sk_protocol; | ||
713 | struct net *net; | ||
714 | struct sock *sock; | 711 | struct sock *sock; |
715 | struct netlink_sock *nlk; | 712 | struct netlink_sock *nlk; |
716 | 713 | ||
717 | net = ssk->sk_net; | 714 | sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid); |
718 | sock = netlink_lookup(net, protocol, pid); | ||
719 | if (!sock) | 715 | if (!sock) |
720 | return ERR_PTR(-ECONNREFUSED); | 716 | return ERR_PTR(-ECONNREFUSED); |
721 | 717 | ||
722 | /* Don't bother queuing skb if kernel socket has no input function */ | 718 | /* Don't bother queuing skb if kernel socket has no input function */ |
723 | nlk = nlk_sk(sock); | 719 | nlk = nlk_sk(sock); |
724 | if ((netlink_is_kernel(sock) && !nlk->data_ready) || | 720 | if (sock->sk_state == NETLINK_CONNECTED && |
725 | (sock->sk_state == NETLINK_CONNECTED && | 721 | nlk->dst_pid != nlk_sk(ssk)->pid) { |
726 | nlk->dst_pid != nlk_sk(ssk)->pid)) { | ||
727 | sock_put(sock); | 722 | sock_put(sock); |
728 | return ERR_PTR(-ECONNREFUSED); | 723 | return ERR_PTR(-ECONNREFUSED); |
729 | } | 724 | } |
@@ -837,7 +832,34 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb, | |||
837 | return skb; | 832 | return skb; |
838 | } | 833 | } |
839 | 834 | ||
840 | int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) | 835 | static inline void netlink_rcv_wake(struct sock *sk) |
836 | { | ||
837 | struct netlink_sock *nlk = nlk_sk(sk); | ||
838 | |||
839 | if (skb_queue_empty(&sk->sk_receive_queue)) | ||
840 | clear_bit(0, &nlk->state); | ||
841 | if (!test_bit(0, &nlk->state)) | ||
842 | wake_up_interruptible(&nlk->wait); | ||
843 | } | ||
844 | |||
845 | static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) | ||
846 | { | ||
847 | int ret; | ||
848 | struct netlink_sock *nlk = nlk_sk(sk); | ||
849 | |||
850 | ret = -ECONNREFUSED; | ||
851 | if (nlk->netlink_rcv != NULL) { | ||
852 | ret = skb->len; | ||
853 | skb_set_owner_r(skb, sk); | ||
854 | nlk->netlink_rcv(skb); | ||
855 | } | ||
856 | kfree_skb(skb); | ||
857 | sock_put(sk); | ||
858 | return ret; | ||
859 | } | ||
860 | |||
861 | int netlink_unicast(struct sock *ssk, struct sk_buff *skb, | ||
862 | u32 pid, int nonblock) | ||
841 | { | 863 | { |
842 | struct sock *sk; | 864 | struct sock *sk; |
843 | int err; | 865 | int err; |
@@ -852,6 +874,9 @@ retry: | |||
852 | kfree_skb(skb); | 874 | kfree_skb(skb); |
853 | return PTR_ERR(sk); | 875 | return PTR_ERR(sk); |
854 | } | 876 | } |
877 | if (netlink_is_kernel(sk)) | ||
878 | return netlink_unicast_kernel(sk, skb); | ||
879 | |||
855 | err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); | 880 | err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); |
856 | if (err == 1) | 881 | if (err == 1) |
857 | goto retry; | 882 | goto retry; |
@@ -1151,16 +1176,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) | |||
1151 | put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); | 1176 | put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); |
1152 | } | 1177 | } |
1153 | 1178 | ||
1154 | static inline void netlink_rcv_wake(struct sock *sk) | ||
1155 | { | ||
1156 | struct netlink_sock *nlk = nlk_sk(sk); | ||
1157 | |||
1158 | if (skb_queue_empty(&sk->sk_receive_queue)) | ||
1159 | clear_bit(0, &nlk->state); | ||
1160 | if (!test_bit(0, &nlk->state)) | ||
1161 | wake_up_interruptible(&nlk->wait); | ||
1162 | } | ||
1163 | |||
1164 | static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | 1179 | static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, |
1165 | struct msghdr *msg, size_t len) | 1180 | struct msghdr *msg, size_t len) |
1166 | { | 1181 | { |
@@ -1308,11 +1323,7 @@ out: | |||
1308 | 1323 | ||
1309 | static void netlink_data_ready(struct sock *sk, int len) | 1324 | static void netlink_data_ready(struct sock *sk, int len) |
1310 | { | 1325 | { |
1311 | struct netlink_sock *nlk = nlk_sk(sk); | 1326 | BUG(); |
1312 | |||
1313 | if (nlk->data_ready) | ||
1314 | nlk->data_ready(sk, len); | ||
1315 | netlink_rcv_wake(sk); | ||
1316 | } | 1327 | } |
1317 | 1328 | ||
1318 | /* | 1329 | /* |
@@ -1323,7 +1334,7 @@ static void netlink_data_ready(struct sock *sk, int len) | |||
1323 | 1334 | ||
1324 | struct sock * | 1335 | struct sock * |
1325 | netlink_kernel_create(struct net *net, int unit, unsigned int groups, | 1336 | netlink_kernel_create(struct net *net, int unit, unsigned int groups, |
1326 | void (*input)(struct sock *sk, int len), | 1337 | void (*input)(struct sk_buff *skb), |
1327 | struct mutex *cb_mutex, struct module *module) | 1338 | struct mutex *cb_mutex, struct module *module) |
1328 | { | 1339 | { |
1329 | struct socket *sock; | 1340 | struct socket *sock; |
@@ -1352,7 +1363,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, | |||
1352 | sk = sock->sk; | 1363 | sk = sock->sk; |
1353 | sk->sk_data_ready = netlink_data_ready; | 1364 | sk->sk_data_ready = netlink_data_ready; |
1354 | if (input) | 1365 | if (input) |
1355 | nlk_sk(sk)->data_ready = input; | 1366 | nlk_sk(sk)->netlink_rcv = input; |
1356 | 1367 | ||
1357 | if (netlink_insert(sk, net, 0)) | 1368 | if (netlink_insert(sk, net, 0)) |
1358 | goto out_sock_release; | 1369 | goto out_sock_release; |
@@ -1552,12 +1563,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, | |||
1552 | 1563 | ||
1553 | netlink_dump(sk); | 1564 | netlink_dump(sk); |
1554 | sock_put(sk); | 1565 | sock_put(sk); |
1555 | 1566 | return 0; | |
1556 | /* We successfully started a dump, by returning -EINTR we | ||
1557 | * signal the queue mangement to interrupt processing of | ||
1558 | * any netlink messages so userspace gets a chance to read | ||
1559 | * the results. */ | ||
1560 | return -EINTR; | ||
1561 | } | 1567 | } |
1562 | 1568 | ||
1563 | void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) | 1569 | void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) |
@@ -1594,13 +1600,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) | |||
1594 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 1600 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); |
1595 | } | 1601 | } |
1596 | 1602 | ||
1597 | static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | 1603 | int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, |
1598 | struct nlmsghdr *)) | 1604 | struct nlmsghdr *)) |
1599 | { | 1605 | { |
1600 | struct nlmsghdr *nlh; | 1606 | struct nlmsghdr *nlh; |
1601 | int err; | 1607 | int err; |
1602 | 1608 | ||
1603 | while (skb->len >= nlmsg_total_size(0)) { | 1609 | while (skb->len >= nlmsg_total_size(0)) { |
1610 | int msglen; | ||
1611 | |||
1604 | nlh = nlmsg_hdr(skb); | 1612 | nlh = nlmsg_hdr(skb); |
1605 | err = 0; | 1613 | err = 0; |
1606 | 1614 | ||
@@ -1616,86 +1624,20 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | |||
1616 | goto skip; | 1624 | goto skip; |
1617 | 1625 | ||
1618 | err = cb(skb, nlh); | 1626 | err = cb(skb, nlh); |
1619 | if (err == -EINTR) { | ||
1620 | /* Not an error, but we interrupt processing */ | ||
1621 | netlink_queue_skip(nlh, skb); | ||
1622 | return err; | ||
1623 | } | ||
1624 | skip: | 1627 | skip: |
1625 | if (nlh->nlmsg_flags & NLM_F_ACK || err) | 1628 | if (nlh->nlmsg_flags & NLM_F_ACK || err) |
1626 | netlink_ack(skb, nlh, err); | 1629 | netlink_ack(skb, nlh, err); |
1627 | 1630 | ||
1628 | netlink_queue_skip(nlh, skb); | 1631 | msglen = NLMSG_ALIGN(nlh->nlmsg_len); |
1632 | if (msglen > skb->len) | ||
1633 | msglen = skb->len; | ||
1634 | skb_pull(skb, msglen); | ||
1629 | } | 1635 | } |
1630 | 1636 | ||
1631 | return 0; | 1637 | return 0; |
1632 | } | 1638 | } |
1633 | 1639 | ||
1634 | /** | 1640 | /** |
1635 | * nelink_run_queue - Process netlink receive queue. | ||
1636 | * @sk: Netlink socket containing the queue | ||
1637 | * @qlen: Initial queue length | ||
1638 | * @cb: Callback function invoked for each netlink message found | ||
1639 | * | ||
1640 | * Processes as much as there was in the queue upon entry and invokes | ||
1641 | * a callback function for each netlink message found. The callback | ||
1642 | * function may refuse a message by returning a negative error code | ||
1643 | * but setting the error pointer to 0 in which case this function | ||
1644 | * returns with a qlen != 0. | ||
1645 | * | ||
1646 | * qlen must be initialized to 0 before the initial entry, afterwards | ||
1647 | * the function may be called repeatedly until the returned qlen is 0. | ||
1648 | * | ||
1649 | * The callback function may return -EINTR to signal that processing | ||
1650 | * of netlink messages shall be interrupted. In this case the message | ||
1651 | * currently being processed will NOT be requeued onto the receive | ||
1652 | * queue. | ||
1653 | */ | ||
1654 | unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen, | ||
1655 | int (*cb)(struct sk_buff *, struct nlmsghdr *)) | ||
1656 | { | ||
1657 | struct sk_buff *skb; | ||
1658 | |||
1659 | if (!qlen || qlen > skb_queue_len(&sk->sk_receive_queue)) | ||
1660 | qlen = skb_queue_len(&sk->sk_receive_queue); | ||
1661 | |||
1662 | for (; qlen; qlen--) { | ||
1663 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
1664 | if (netlink_rcv_skb(skb, cb)) { | ||
1665 | if (skb->len) | ||
1666 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
1667 | else { | ||
1668 | kfree_skb(skb); | ||
1669 | qlen--; | ||
1670 | } | ||
1671 | break; | ||
1672 | } | ||
1673 | |||
1674 | kfree_skb(skb); | ||
1675 | } | ||
1676 | |||
1677 | return qlen; | ||
1678 | } | ||
1679 | |||
1680 | /** | ||
1681 | * netlink_queue_skip - Skip netlink message while processing queue. | ||
1682 | * @nlh: Netlink message to be skipped | ||
1683 | * @skb: Socket buffer containing the netlink messages. | ||
1684 | * | ||
1685 | * Pulls the given netlink message off the socket buffer so the next | ||
1686 | * call to netlink_queue_run() will not reconsider the message. | ||
1687 | */ | ||
1688 | static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) | ||
1689 | { | ||
1690 | int msglen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
1691 | |||
1692 | if (msglen > skb->len) | ||
1693 | msglen = skb->len; | ||
1694 | |||
1695 | skb_pull(skb, msglen); | ||
1696 | } | ||
1697 | |||
1698 | /** | ||
1699 | * nlmsg_notify - send a notification netlink message | 1641 | * nlmsg_notify - send a notification netlink message |
1700 | * @sk: netlink socket to use | 1642 | * @sk: netlink socket to use |
1701 | * @skb: notification message | 1643 | * @skb: notification message |
@@ -1998,7 +1940,7 @@ panic: | |||
1998 | core_initcall(netlink_proto_init); | 1940 | core_initcall(netlink_proto_init); |
1999 | 1941 | ||
2000 | EXPORT_SYMBOL(netlink_ack); | 1942 | EXPORT_SYMBOL(netlink_ack); |
2001 | EXPORT_SYMBOL(netlink_run_queue); | 1943 | EXPORT_SYMBOL(netlink_rcv_skb); |
2002 | EXPORT_SYMBOL(netlink_broadcast); | 1944 | EXPORT_SYMBOL(netlink_broadcast); |
2003 | EXPORT_SYMBOL(netlink_dump_start); | 1945 | EXPORT_SYMBOL(netlink_dump_start); |
2004 | EXPORT_SYMBOL(netlink_kernel_create); | 1946 | EXPORT_SYMBOL(netlink_kernel_create); |