aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDenis V. Lunev <den@openvz.org>2007-10-11 00:15:29 -0400
committerDavid S. Miller <davem@davemloft.net>2007-10-11 00:15:29 -0400
commitcd40b7d3983c708aabe3d3008ec64ffce56d33b0 (patch)
tree0d6fe9cfd2f03fdeee126e317d4bfb145afc458d
parentaed815601f3f95281ab3a01f7e2cbe1bd54285a0 (diff)
[NET]: make netlink user -> kernel interface synchronious
This patch make processing netlink user -> kernel messages synchronious. This change was inspired by the talk with Alexey Kuznetsov about current netlink messages processing. He says that he was badly wrong when introduced asynchronious user -> kernel communication. The call netlink_unicast is the only path to send message to the kernel netlink socket. But, unfortunately, it is also used to send data to the user. Before this change the user message has been attached to the socket queue and sk->sk_data_ready was called. The process has been blocked until all pending messages were processed. The bad thing is that this processing may occur in the arbitrary process context. This patch changes nlk->data_ready callback to get 1 skb and force packet processing right in the netlink_unicast. Kernel -> user path in netlink_unicast remains untouched. EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock drop, but the process remains in the cycle until the message will be fully processed. So, there is no need to use this kludges now. Signed-off-by: Denis V. Lunev <den@openvz.org> Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/connector/connector.c14
-rw-r--r--drivers/scsi/scsi_netlink.c25
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c82
-rw-r--r--fs/ecryptfs/netlink.c14
-rw-r--r--include/linux/connector.h2
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/net/netlink.h6
-rw-r--r--kernel/audit.c12
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c14
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/inet_diag.c12
-rw-r--r--net/ipv4/netfilter/ip_queue.c17
-rw-r--r--net/ipv6/netfilter/ip6_queue.c19
-rw-r--r--net/netfilter/nfnetlink.c12
-rw-r--r--net/netlink/af_netlink.c152
-rw-r--r--net/netlink/genetlink.c12
-rw-r--r--net/xfrm/xfrm_user.c13
18 files changed, 130 insertions, 299 deletions
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 569070997cc1..0e328d387af4 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -235,18 +235,6 @@ out:
235} 235}
236 236
237/* 237/*
238 * Netlink socket input callback - dequeues the skbs and calls the
239 * main netlink receiving function.
240 */
241static void cn_input(struct sock *sk, int len)
242{
243 struct sk_buff *skb;
244
245 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
246 cn_rx_skb(skb);
247}
248
249/*
250 * Notification routing. 238 * Notification routing.
251 * 239 *
252 * Gets id and checks if there are notification request for it's idx 240 * Gets id and checks if there are notification request for it's idx
@@ -442,7 +430,7 @@ static int __devinit cn_init(void)
442 struct cn_dev *dev = &cdev; 430 struct cn_dev *dev = &cdev;
443 int err; 431 int err;
444 432
445 dev->input = cn_input; 433 dev->input = cn_rx_skb;
446 dev->id.idx = cn_idx; 434 dev->id.idx = cn_idx;
447 dev->id.val = cn_val; 435 dev->id.val = cn_val;
448 436
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 163acf6ad2d3..40579edca101 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -64,7 +64,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
64 64
65 if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) { 65 if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) {
66 err = -EBADMSG; 66 err = -EBADMSG;
67 goto next_msg; 67 return;
68 } 68 }
69 69
70 hdr = NLMSG_DATA(nlh); 70 hdr = NLMSG_DATA(nlh);
@@ -99,27 +99,6 @@ next_msg:
99 99
100 100
101/** 101/**
102 * scsi_nl_rcv_msg -
103 * Receive handler for a socket. Extracts a received message buffer from
104 * the socket, and starts message processing.
105 *
106 * @sk: socket
107 * @len: unused
108 *
109 **/
110static void
111scsi_nl_rcv(struct sock *sk, int len)
112{
113 struct sk_buff *skb;
114
115 while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
116 scsi_nl_rcv_msg(skb);
117 kfree_skb(skb);
118 }
119}
120
121
122/**
123 * scsi_nl_rcv_event - 102 * scsi_nl_rcv_event -
124 * Event handler for a netlink socket. 103 * Event handler for a netlink socket.
125 * 104 *
@@ -168,7 +147,7 @@ scsi_netlink_init(void)
168 } 147 }
169 148
170 scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT, 149 scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT,
171 SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL, 150 SCSI_NL_GRP_CNT, scsi_nl_rcv_msg, NULL,
172 THIS_MODULE); 151 THIS_MODULE);
173 if (!scsi_nl_sock) { 152 if (!scsi_nl_sock) {
174 printk(KERN_ERR "%s: register of recieve handler failed\n", 153 printk(KERN_ERR "%s: register of recieve handler failed\n",
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 4916f01230dc..5428d15f23c6 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1097,61 +1097,49 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1097} 1097}
1098 1098
1099/* 1099/*
1100 * Get message from skb (based on rtnetlink_rcv_skb). Each message is 1100 * Get message from skb. Each message is processed by iscsi_if_recv_msg.
1101 * processed by iscsi_if_recv_msg. Malformed skbs with wrong lengths or 1101 * Malformed skbs with wrong lengths or invalid creds are not processed.
1102 * invalid creds are discarded silently.
1103 */ 1102 */
1104static void 1103static void
1105iscsi_if_rx(struct sock *sk, int len) 1104iscsi_if_rx(struct sk_buff *skb)
1106{ 1105{
1107 struct sk_buff *skb;
1108
1109 mutex_lock(&rx_queue_mutex); 1106 mutex_lock(&rx_queue_mutex);
1110 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 1107 while (skb->len >= NLMSG_SPACE(0)) {
1111 if (NETLINK_CREDS(skb)->uid) { 1108 int err;
1112 skb_pull(skb, skb->len); 1109 uint32_t rlen;
1113 goto free_skb; 1110 struct nlmsghdr *nlh;
1111 struct iscsi_uevent *ev;
1112
1113 nlh = nlmsg_hdr(skb);
1114 if (nlh->nlmsg_len < sizeof(*nlh) ||
1115 skb->len < nlh->nlmsg_len) {
1116 break;
1114 } 1117 }
1115 1118
1116 while (skb->len >= NLMSG_SPACE(0)) { 1119 ev = NLMSG_DATA(nlh);
1117 int err; 1120 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1118 uint32_t rlen; 1121 if (rlen > skb->len)
1119 struct nlmsghdr *nlh; 1122 rlen = skb->len;
1120 struct iscsi_uevent *ev;
1121 1123
1122 nlh = nlmsg_hdr(skb); 1124 err = iscsi_if_recv_msg(skb, nlh);
1123 if (nlh->nlmsg_len < sizeof(*nlh) || 1125 if (err) {
1124 skb->len < nlh->nlmsg_len) { 1126 ev->type = ISCSI_KEVENT_IF_ERROR;
1125 break; 1127 ev->iferror = err;
1126 }
1127
1128 ev = NLMSG_DATA(nlh);
1129 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1130 if (rlen > skb->len)
1131 rlen = skb->len;
1132
1133 err = iscsi_if_recv_msg(skb, nlh);
1134 if (err) {
1135 ev->type = ISCSI_KEVENT_IF_ERROR;
1136 ev->iferror = err;
1137 }
1138 do {
1139 /*
1140 * special case for GET_STATS:
1141 * on success - sending reply and stats from
1142 * inside of if_recv_msg(),
1143 * on error - fall through.
1144 */
1145 if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
1146 break;
1147 err = iscsi_if_send_reply(
1148 NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
1149 nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
1150 } while (err < 0 && err != -ECONNREFUSED);
1151 skb_pull(skb, rlen);
1152 } 1128 }
1153free_skb: 1129 do {
1154 kfree_skb(skb); 1130 /*
1131 * special case for GET_STATS:
1132 * on success - sending reply and stats from
1133 * inside of if_recv_msg(),
1134 * on error - fall through.
1135 */
1136 if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
1137 break;
1138 err = iscsi_if_send_reply(
1139 NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
1140 nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
1141 } while (err < 0 && err != -ECONNREFUSED);
1142 skb_pull(skb, rlen);
1155 } 1143 }
1156 mutex_unlock(&rx_queue_mutex); 1144 mutex_unlock(&rx_queue_mutex);
1157} 1145}
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index 056519cd92bc..9aa345121e09 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -165,22 +165,10 @@ static int ecryptfs_process_nl_quit(struct sk_buff *skb)
165 * it to its desired netlink context element and wake up the process 165 * it to its desired netlink context element and wake up the process
166 * that is waiting for a response. 166 * that is waiting for a response.
167 */ 167 */
168static void ecryptfs_receive_nl_message(struct sock *sk, int len) 168static void ecryptfs_receive_nl_message(struct sk_buff *skb)
169{ 169{
170 struct sk_buff *skb;
171 struct nlmsghdr *nlh; 170 struct nlmsghdr *nlh;
172 int rc = 0; /* skb_recv_datagram requires this */
173 171
174receive:
175 skb = skb_recv_datagram(sk, 0, 0, &rc);
176 if (rc == -EINTR)
177 goto receive;
178 else if (rc < 0) {
179 ecryptfs_printk(KERN_ERR, "Error occurred while "
180 "receiving eCryptfs netlink message; "
181 "rc = [%d]\n", rc);
182 return;
183 }
184 nlh = nlmsg_hdr(skb); 172 nlh = nlmsg_hdr(skb);
185 if (!NLMSG_OK(nlh, skb->len)) { 173 if (!NLMSG_OK(nlh, skb->len)) {
186 ecryptfs_printk(KERN_ERR, "Received corrupt netlink " 174 ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 10eb56b2940a..b62f823e90cf 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -153,7 +153,7 @@ struct cn_dev {
153 153
154 u32 seq, groups; 154 u32 seq, groups;
155 struct sock *nls; 155 struct sock *nls;
156 void (*input) (struct sock * sk, int len); 156 void (*input) (struct sk_buff *skb);
157 157
158 struct cn_queue_dev *cbdev; 158 struct cn_queue_dev *cbdev;
159}; 159};
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 7b552b6c2c19..7c1f3b1d2ee5 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -175,7 +175,7 @@ struct netlink_skb_parms
175 175
176extern struct sock *netlink_kernel_create(struct net *net, 176extern struct sock *netlink_kernel_create(struct net *net,
177 int unit,unsigned int groups, 177 int unit,unsigned int groups,
178 void (*input)(struct sock *sk, int len), 178 void (*input)(struct sk_buff *skb),
179 struct mutex *cb_mutex, 179 struct mutex *cb_mutex,
180 struct module *module); 180 struct module *module);
181extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); 181extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 1afd3e837d23..9298218c07f9 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -220,9 +220,9 @@ struct nl_info {
220 u32 pid; 220 u32 pid;
221}; 221};
222 222
223extern unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen, 223extern int netlink_rcv_skb(struct sk_buff *skb,
224 int (*cb)(struct sk_buff *, 224 int (*cb)(struct sk_buff *,
225 struct nlmsghdr *)); 225 struct nlmsghdr *));
226extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, 226extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb,
227 u32 pid, unsigned int group, int report, 227 u32 pid, unsigned int group, int report,
228 gfp_t flags); 228 gfp_t flags);
diff --git a/kernel/audit.c b/kernel/audit.c
index f3c390f6c0b4..2924251a6547 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -847,18 +847,10 @@ static void audit_receive_skb(struct sk_buff *skb)
847} 847}
848 848
849/* Receive messages from netlink socket. */ 849/* Receive messages from netlink socket. */
850static void audit_receive(struct sock *sk, int length) 850static void audit_receive(struct sk_buff *skb)
851{ 851{
852 struct sk_buff *skb;
853 unsigned int qlen;
854
855 mutex_lock(&audit_cmd_mutex); 852 mutex_lock(&audit_cmd_mutex);
856 853 audit_receive_skb(skb);
857 for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
858 skb = skb_dequeue(&sk->sk_receive_queue);
859 audit_receive_skb(skb);
860 kfree_skb(skb);
861 }
862 mutex_unlock(&audit_cmd_mutex); 854 mutex_unlock(&audit_cmd_mutex);
863} 855}
864 856
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 471d2d9f8eae..1072d16696c3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1312,15 +1312,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1312 return doit(skb, nlh, (void *)&rta_buf[0]); 1312 return doit(skb, nlh, (void *)&rta_buf[0]);
1313} 1313}
1314 1314
1315static void rtnetlink_rcv(struct sock *sk, int len) 1315static void rtnetlink_rcv(struct sk_buff *skb)
1316{ 1316{
1317 unsigned int qlen = 0; 1317 rtnl_lock();
1318 1318 netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
1319 do { 1319 rtnl_unlock();
1320 rtnl_lock();
1321 qlen = netlink_run_queue(sk, qlen, &rtnetlink_rcv_msg);
1322 rtnl_unlock();
1323 } while (qlen);
1324} 1320}
1325 1321
1326static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 1322static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index ebb38feb4df3..f7fba7721e63 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,17 +115,6 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
115 RCV_SKB_FAIL(-EINVAL); 115 RCV_SKB_FAIL(-EINVAL);
116} 116}
117 117
118static void dnrmg_receive_user_sk(struct sock *sk, int len)
119{
120 struct sk_buff *skb;
121 unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
122
123 for (; qlen && (skb = skb_dequeue(&sk->sk_receive_queue)); qlen--) {
124 dnrmg_receive_user_skb(skb);
125 kfree_skb(skb);
126 }
127}
128
129static struct nf_hook_ops dnrmg_ops = { 118static struct nf_hook_ops dnrmg_ops = {
130 .hook = dnrmg_hook, 119 .hook = dnrmg_hook,
131 .pf = PF_DECnet, 120 .pf = PF_DECnet,
@@ -139,7 +128,8 @@ static int __init dn_rtmsg_init(void)
139 128
140 dnrmg = netlink_kernel_create(&init_net, 129 dnrmg = netlink_kernel_create(&init_net,
141 NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, 130 NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
142 dnrmg_receive_user_sk, NULL, THIS_MODULE); 131 dnrmg_receive_user_skb,
132 NULL, THIS_MODULE);
143 if (dnrmg == NULL) { 133 if (dnrmg == NULL) {
144 printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); 134 printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
145 return -ENOMEM; 135 return -ENOMEM;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f823ca34cb12..a5cba2349605 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -62,6 +62,9 @@ static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
62#define FIB_TABLE_HASHSZ 256 62#define FIB_TABLE_HASHSZ 256
63static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; 63static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64 64
65static struct sock *fibnl = NULL;
66
67
65struct fib_table *fib_new_table(u32 id) 68struct fib_table *fib_new_table(u32 id)
66{ 69{
67 struct fib_table *tb; 70 struct fib_table *tb;
@@ -811,13 +814,13 @@ static void nl_fib_input(struct sock *sk, int len)
811 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 814 pid = NETLINK_CB(skb).pid; /* pid of sending process */
812 NETLINK_CB(skb).pid = 0; /* from kernel */ 815 NETLINK_CB(skb).pid = 0; /* from kernel */
813 NETLINK_CB(skb).dst_group = 0; /* unicast */ 816 NETLINK_CB(skb).dst_group = 0; /* unicast */
814 netlink_unicast(sk, skb, pid, MSG_DONTWAIT); 817 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
815} 818}
816 819
817static void nl_fib_lookup_init(void) 820static void nl_fib_lookup_init(void)
818{ 821{
819 netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, 822 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
820 NULL, THIS_MODULE); 823 nl_fib_input, NULL, THIS_MODULE);
821} 824}
822 825
823static void fib_disable_ip(struct net_device *dev, int force) 826static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b04a6ee5a9a1..7eb83ebed2ec 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -839,15 +839,11 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
839 839
840static DEFINE_MUTEX(inet_diag_mutex); 840static DEFINE_MUTEX(inet_diag_mutex);
841 841
842static void inet_diag_rcv(struct sock *sk, int len) 842static void inet_diag_rcv(struct sk_buff *skb)
843{ 843{
844 unsigned int qlen = 0; 844 mutex_lock(&inet_diag_mutex);
845 845 netlink_rcv_skb(skb, &inet_diag_rcv_msg);
846 do { 846 mutex_unlock(&inet_diag_mutex);
847 mutex_lock(&inet_diag_mutex);
848 qlen = netlink_run_queue(sk, qlen, &inet_diag_rcv_msg);
849 mutex_unlock(&inet_diag_mutex);
850 } while (qlen);
851} 847}
852 848
853static DEFINE_SPINLOCK(inet_diag_register_lock); 849static DEFINE_SPINLOCK(inet_diag_register_lock);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index aaa3f5c56761..23cbfc7c80fd 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -475,7 +475,7 @@ ipq_dev_drop(int ifindex)
475#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 475#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
476 476
477static inline void 477static inline void
478ipq_rcv_skb(struct sk_buff *skb) 478__ipq_rcv_skb(struct sk_buff *skb)
479{ 479{
480 int status, type, pid, flags, nlmsglen, skblen; 480 int status, type, pid, flags, nlmsglen, skblen;
481 struct nlmsghdr *nlh; 481 struct nlmsghdr *nlh;
@@ -533,19 +533,10 @@ ipq_rcv_skb(struct sk_buff *skb)
533} 533}
534 534
535static void 535static void
536ipq_rcv_sk(struct sock *sk, int len) 536ipq_rcv_skb(struct sk_buff *skb)
537{ 537{
538 struct sk_buff *skb;
539 unsigned int qlen;
540
541 mutex_lock(&ipqnl_mutex); 538 mutex_lock(&ipqnl_mutex);
542 539 __ipq_rcv_skb(skb);
543 for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
544 skb = skb_dequeue(&sk->sk_receive_queue);
545 ipq_rcv_skb(skb);
546 kfree_skb(skb);
547 }
548
549 mutex_unlock(&ipqnl_mutex); 540 mutex_unlock(&ipqnl_mutex);
550} 541}
551 542
@@ -670,7 +661,7 @@ static int __init ip_queue_init(void)
670 661
671 netlink_register_notifier(&ipq_nl_notifier); 662 netlink_register_notifier(&ipq_nl_notifier);
672 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, 663 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
673 ipq_rcv_sk, NULL, THIS_MODULE); 664 ipq_rcv_skb, NULL, THIS_MODULE);
674 if (ipqnl == NULL) { 665 if (ipqnl == NULL) {
675 printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); 666 printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
676 goto cleanup_netlink_notifier; 667 goto cleanup_netlink_notifier;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index c75f467a8f51..0473145ac534 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -464,7 +464,7 @@ ipq_dev_drop(int ifindex)
464#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 464#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
465 465
466static inline void 466static inline void
467ipq_rcv_skb(struct sk_buff *skb) 467__ipq_rcv_skb(struct sk_buff *skb)
468{ 468{
469 int status, type, pid, flags, nlmsglen, skblen; 469 int status, type, pid, flags, nlmsglen, skblen;
470 struct nlmsghdr *nlh; 470 struct nlmsghdr *nlh;
@@ -522,19 +522,10 @@ ipq_rcv_skb(struct sk_buff *skb)
522} 522}
523 523
524static void 524static void
525ipq_rcv_sk(struct sock *sk, int len) 525ipq_rcv_skb(struct sk_buff *skb)
526{ 526{
527 struct sk_buff *skb;
528 unsigned int qlen;
529
530 mutex_lock(&ipqnl_mutex); 527 mutex_lock(&ipqnl_mutex);
531 528 __ipq_rcv_skb(skb);
532 for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
533 skb = skb_dequeue(&sk->sk_receive_queue);
534 ipq_rcv_skb(skb);
535 kfree_skb(skb);
536 }
537
538 mutex_unlock(&ipqnl_mutex); 529 mutex_unlock(&ipqnl_mutex);
539} 530}
540 531
@@ -658,8 +649,8 @@ static int __init ip6_queue_init(void)
658 struct proc_dir_entry *proc; 649 struct proc_dir_entry *proc;
659 650
660 netlink_register_notifier(&ipq_nl_notifier); 651 netlink_register_notifier(&ipq_nl_notifier);
661 ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk, 652 ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
662 NULL, THIS_MODULE); 653 ipq_rcv_skb, NULL, THIS_MODULE);
663 if (ipqnl == NULL) { 654 if (ipqnl == NULL) {
664 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); 655 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
665 goto cleanup_netlink_notifier; 656 goto cleanup_netlink_notifier;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 99775af19ff4..2128542995f7 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -169,15 +169,11 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
169 } 169 }
170} 170}
171 171
172static void nfnetlink_rcv(struct sock *sk, int len) 172static void nfnetlink_rcv(struct sk_buff *skb)
173{ 173{
174 unsigned int qlen = 0; 174 nfnl_lock();
175 175 netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
176 do { 176 nfnl_unlock();
177 nfnl_lock();
178 qlen = netlink_run_queue(sk, qlen, nfnetlink_rcv_msg);
179 nfnl_unlock();
180 } while (qlen);
181} 177}
182 178
183static void __exit nfnetlink_exit(void) 179static void __exit nfnetlink_exit(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4ce7dcbcb6ef..c776bcd9f825 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -80,7 +80,7 @@ struct netlink_sock {
80 struct netlink_callback *cb; 80 struct netlink_callback *cb;
81 struct mutex *cb_mutex; 81 struct mutex *cb_mutex;
82 struct mutex cb_def_mutex; 82 struct mutex cb_def_mutex;
83 void (*data_ready)(struct sock *sk, int bytes); 83 void (*netlink_rcv)(struct sk_buff *skb);
84 struct module *module; 84 struct module *module;
85}; 85};
86 86
@@ -127,7 +127,6 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
127 127
128static int netlink_dump(struct sock *sk); 128static int netlink_dump(struct sock *sk);
129static void netlink_destroy_callback(struct netlink_callback *cb); 129static void netlink_destroy_callback(struct netlink_callback *cb);
130static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
131 130
132static DEFINE_RWLOCK(nl_table_lock); 131static DEFINE_RWLOCK(nl_table_lock);
133static atomic_t nl_table_users = ATOMIC_INIT(0); 132static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -709,21 +708,17 @@ static void netlink_overrun(struct sock *sk)
709 708
710static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 709static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
711{ 710{
712 int protocol = ssk->sk_protocol;
713 struct net *net;
714 struct sock *sock; 711 struct sock *sock;
715 struct netlink_sock *nlk; 712 struct netlink_sock *nlk;
716 713
717 net = ssk->sk_net; 714 sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid);
718 sock = netlink_lookup(net, protocol, pid);
719 if (!sock) 715 if (!sock)
720 return ERR_PTR(-ECONNREFUSED); 716 return ERR_PTR(-ECONNREFUSED);
721 717
722 /* Don't bother queuing skb if kernel socket has no input function */ 718 /* Don't bother queuing skb if kernel socket has no input function */
723 nlk = nlk_sk(sock); 719 nlk = nlk_sk(sock);
724 if ((netlink_is_kernel(sock) && !nlk->data_ready) || 720 if (sock->sk_state == NETLINK_CONNECTED &&
725 (sock->sk_state == NETLINK_CONNECTED && 721 nlk->dst_pid != nlk_sk(ssk)->pid) {
726 nlk->dst_pid != nlk_sk(ssk)->pid)) {
727 sock_put(sock); 722 sock_put(sock);
728 return ERR_PTR(-ECONNREFUSED); 723 return ERR_PTR(-ECONNREFUSED);
729 } 724 }
@@ -837,7 +832,34 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
837 return skb; 832 return skb;
838} 833}
839 834
840int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) 835static inline void netlink_rcv_wake(struct sock *sk)
836{
837 struct netlink_sock *nlk = nlk_sk(sk);
838
839 if (skb_queue_empty(&sk->sk_receive_queue))
840 clear_bit(0, &nlk->state);
841 if (!test_bit(0, &nlk->state))
842 wake_up_interruptible(&nlk->wait);
843}
844
845static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
846{
847 int ret;
848 struct netlink_sock *nlk = nlk_sk(sk);
849
850 ret = -ECONNREFUSED;
851 if (nlk->netlink_rcv != NULL) {
852 ret = skb->len;
853 skb_set_owner_r(skb, sk);
854 nlk->netlink_rcv(skb);
855 }
856 kfree_skb(skb);
857 sock_put(sk);
858 return ret;
859}
860
861int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
862 u32 pid, int nonblock)
841{ 863{
842 struct sock *sk; 864 struct sock *sk;
843 int err; 865 int err;
@@ -852,6 +874,9 @@ retry:
852 kfree_skb(skb); 874 kfree_skb(skb);
853 return PTR_ERR(sk); 875 return PTR_ERR(sk);
854 } 876 }
877 if (netlink_is_kernel(sk))
878 return netlink_unicast_kernel(sk, skb);
879
855 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); 880 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
856 if (err == 1) 881 if (err == 1)
857 goto retry; 882 goto retry;
@@ -1151,16 +1176,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1151 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1176 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1152} 1177}
1153 1178
1154static inline void netlink_rcv_wake(struct sock *sk)
1155{
1156 struct netlink_sock *nlk = nlk_sk(sk);
1157
1158 if (skb_queue_empty(&sk->sk_receive_queue))
1159 clear_bit(0, &nlk->state);
1160 if (!test_bit(0, &nlk->state))
1161 wake_up_interruptible(&nlk->wait);
1162}
1163
1164static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1179static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1165 struct msghdr *msg, size_t len) 1180 struct msghdr *msg, size_t len)
1166{ 1181{
@@ -1308,11 +1323,7 @@ out:
1308 1323
1309static void netlink_data_ready(struct sock *sk, int len) 1324static void netlink_data_ready(struct sock *sk, int len)
1310{ 1325{
1311 struct netlink_sock *nlk = nlk_sk(sk); 1326 BUG();
1312
1313 if (nlk->data_ready)
1314 nlk->data_ready(sk, len);
1315 netlink_rcv_wake(sk);
1316} 1327}
1317 1328
1318/* 1329/*
@@ -1323,7 +1334,7 @@ static void netlink_data_ready(struct sock *sk, int len)
1323 1334
1324struct sock * 1335struct sock *
1325netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1336netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1326 void (*input)(struct sock *sk, int len), 1337 void (*input)(struct sk_buff *skb),
1327 struct mutex *cb_mutex, struct module *module) 1338 struct mutex *cb_mutex, struct module *module)
1328{ 1339{
1329 struct socket *sock; 1340 struct socket *sock;
@@ -1352,7 +1363,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1352 sk = sock->sk; 1363 sk = sock->sk;
1353 sk->sk_data_ready = netlink_data_ready; 1364 sk->sk_data_ready = netlink_data_ready;
1354 if (input) 1365 if (input)
1355 nlk_sk(sk)->data_ready = input; 1366 nlk_sk(sk)->netlink_rcv = input;
1356 1367
1357 if (netlink_insert(sk, net, 0)) 1368 if (netlink_insert(sk, net, 0))
1358 goto out_sock_release; 1369 goto out_sock_release;
@@ -1552,12 +1563,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1552 1563
1553 netlink_dump(sk); 1564 netlink_dump(sk);
1554 sock_put(sk); 1565 sock_put(sk);
1555 1566 return 0;
1556 /* We successfully started a dump, by returning -EINTR we
1557 * signal the queue mangement to interrupt processing of
1558 * any netlink messages so userspace gets a chance to read
1559 * the results. */
1560 return -EINTR;
1561} 1567}
1562 1568
1563void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1569void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
@@ -1594,13 +1600,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1594 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1600 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1595} 1601}
1596 1602
1597static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1603int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1598 struct nlmsghdr *)) 1604 struct nlmsghdr *))
1599{ 1605{
1600 struct nlmsghdr *nlh; 1606 struct nlmsghdr *nlh;
1601 int err; 1607 int err;
1602 1608
1603 while (skb->len >= nlmsg_total_size(0)) { 1609 while (skb->len >= nlmsg_total_size(0)) {
1610 int msglen;
1611
1604 nlh = nlmsg_hdr(skb); 1612 nlh = nlmsg_hdr(skb);
1605 err = 0; 1613 err = 0;
1606 1614
@@ -1616,86 +1624,20 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1616 goto skip; 1624 goto skip;
1617 1625
1618 err = cb(skb, nlh); 1626 err = cb(skb, nlh);
1619 if (err == -EINTR) {
1620 /* Not an error, but we interrupt processing */
1621 netlink_queue_skip(nlh, skb);
1622 return err;
1623 }
1624skip: 1627skip:
1625 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1628 if (nlh->nlmsg_flags & NLM_F_ACK || err)
1626 netlink_ack(skb, nlh, err); 1629 netlink_ack(skb, nlh, err);
1627 1630
1628 netlink_queue_skip(nlh, skb); 1631 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1632 if (msglen > skb->len)
1633 msglen = skb->len;
1634 skb_pull(skb, msglen);
1629 } 1635 }
1630 1636
1631 return 0; 1637 return 0;
1632} 1638}
1633 1639
1634/** 1640/**
1635 * nelink_run_queue - Process netlink receive queue.
1636 * @sk: Netlink socket containing the queue
1637 * @qlen: Initial queue length
1638 * @cb: Callback function invoked for each netlink message found
1639 *
1640 * Processes as much as there was in the queue upon entry and invokes
1641 * a callback function for each netlink message found. The callback
1642 * function may refuse a message by returning a negative error code
1643 * but setting the error pointer to 0 in which case this function
1644 * returns with a qlen != 0.
1645 *
1646 * qlen must be initialized to 0 before the initial entry, afterwards
1647 * the function may be called repeatedly until the returned qlen is 0.
1648 *
1649 * The callback function may return -EINTR to signal that processing
1650 * of netlink messages shall be interrupted. In this case the message
1651 * currently being processed will NOT be requeued onto the receive
1652 * queue.
1653 */
1654unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen,
1655 int (*cb)(struct sk_buff *, struct nlmsghdr *))
1656{
1657 struct sk_buff *skb;
1658
1659 if (!qlen || qlen > skb_queue_len(&sk->sk_receive_queue))
1660 qlen = skb_queue_len(&sk->sk_receive_queue);
1661
1662 for (; qlen; qlen--) {
1663 skb = skb_dequeue(&sk->sk_receive_queue);
1664 if (netlink_rcv_skb(skb, cb)) {
1665 if (skb->len)
1666 skb_queue_head(&sk->sk_receive_queue, skb);
1667 else {
1668 kfree_skb(skb);
1669 qlen--;
1670 }
1671 break;
1672 }
1673
1674 kfree_skb(skb);
1675 }
1676
1677 return qlen;
1678}
1679
1680/**
1681 * netlink_queue_skip - Skip netlink message while processing queue.
1682 * @nlh: Netlink message to be skipped
1683 * @skb: Socket buffer containing the netlink messages.
1684 *
1685 * Pulls the given netlink message off the socket buffer so the next
1686 * call to netlink_queue_run() will not reconsider the message.
1687 */
1688static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
1689{
1690 int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1691
1692 if (msglen > skb->len)
1693 msglen = skb->len;
1694
1695 skb_pull(skb, msglen);
1696}
1697
1698/**
1699 * nlmsg_notify - send a notification netlink message 1641 * nlmsg_notify - send a notification netlink message
1700 * @sk: netlink socket to use 1642 * @sk: netlink socket to use
1701 * @skb: notification message 1643 * @skb: notification message
@@ -1998,7 +1940,7 @@ panic:
1998core_initcall(netlink_proto_init); 1940core_initcall(netlink_proto_init);
1999 1941
2000EXPORT_SYMBOL(netlink_ack); 1942EXPORT_SYMBOL(netlink_ack);
2001EXPORT_SYMBOL(netlink_run_queue); 1943EXPORT_SYMBOL(netlink_rcv_skb);
2002EXPORT_SYMBOL(netlink_broadcast); 1944EXPORT_SYMBOL(netlink_broadcast);
2003EXPORT_SYMBOL(netlink_dump_start); 1945EXPORT_SYMBOL(netlink_dump_start);
2004EXPORT_SYMBOL(netlink_kernel_create); 1946EXPORT_SYMBOL(netlink_kernel_create);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 3f1104dc128b..150579a21469 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -470,15 +470,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
470 return ops->doit(skb, &info); 470 return ops->doit(skb, &info);
471} 471}
472 472
473static void genl_rcv(struct sock *sk, int len) 473static void genl_rcv(struct sk_buff *skb)
474{ 474{
475 unsigned int qlen = 0; 475 genl_lock();
476 476 netlink_rcv_skb(skb, &genl_rcv_msg);
477 do { 477 genl_unlock();
478 genl_lock();
479 qlen = netlink_run_queue(sk, qlen, genl_rcv_msg);
480 genl_unlock();
481 } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen);
482} 478}
483 479
484/************************************************************************** 480/**************************************************************************
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5238f6a8dfad..d41588d101d0 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1895,16 +1895,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1895 return link->doit(skb, nlh, attrs); 1895 return link->doit(skb, nlh, attrs);
1896} 1896}
1897 1897
1898static void xfrm_netlink_rcv(struct sock *sk, int len) 1898static void xfrm_netlink_rcv(struct sk_buff *skb)
1899{ 1899{
1900 unsigned int qlen = 0; 1900 mutex_lock(&xfrm_cfg_mutex);
1901 1901 netlink_rcv_skb(skb, &xfrm_user_rcv_msg);
1902 do { 1902 mutex_unlock(&xfrm_cfg_mutex);
1903 mutex_lock(&xfrm_cfg_mutex);
1904 qlen = netlink_run_queue(sk, qlen, &xfrm_user_rcv_msg);
1905 mutex_unlock(&xfrm_cfg_mutex);
1906
1907 } while (qlen);
1908} 1903}
1909 1904
1910static inline size_t xfrm_expire_msgsize(void) 1905static inline size_t xfrm_expire_msgsize(void)