diff options
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r-- | net/netlink/af_netlink.c | 900 |
1 files changed, 804 insertions, 96 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1e3fd5bfcd86..12ac6b47a35c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> | 4 | * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> |
5 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | 5 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
6 | * Patrick McHardy <kaber@trash.net> | ||
6 | * | 7 | * |
7 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License | 9 | * modify it under the terms of the GNU General Public License |
@@ -55,87 +56,45 @@ | |||
55 | #include <linux/types.h> | 56 | #include <linux/types.h> |
56 | #include <linux/audit.h> | 57 | #include <linux/audit.h> |
57 | #include <linux/mutex.h> | 58 | #include <linux/mutex.h> |
59 | #include <linux/vmalloc.h> | ||
60 | #include <asm/cacheflush.h> | ||
58 | 61 | ||
59 | #include <net/net_namespace.h> | 62 | #include <net/net_namespace.h> |
60 | #include <net/sock.h> | 63 | #include <net/sock.h> |
61 | #include <net/scm.h> | 64 | #include <net/scm.h> |
62 | #include <net/netlink.h> | 65 | #include <net/netlink.h> |
63 | 66 | ||
64 | #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) | 67 | #include "af_netlink.h" |
65 | #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) | ||
66 | |||
67 | struct netlink_sock { | ||
68 | /* struct sock has to be the first member of netlink_sock */ | ||
69 | struct sock sk; | ||
70 | u32 portid; | ||
71 | u32 dst_portid; | ||
72 | u32 dst_group; | ||
73 | u32 flags; | ||
74 | u32 subscriptions; | ||
75 | u32 ngroups; | ||
76 | unsigned long *groups; | ||
77 | unsigned long state; | ||
78 | wait_queue_head_t wait; | ||
79 | struct netlink_callback *cb; | ||
80 | struct mutex *cb_mutex; | ||
81 | struct mutex cb_def_mutex; | ||
82 | void (*netlink_rcv)(struct sk_buff *skb); | ||
83 | void (*netlink_bind)(int group); | ||
84 | struct module *module; | ||
85 | }; | ||
86 | 68 | ||
87 | struct listeners { | 69 | struct listeners { |
88 | struct rcu_head rcu; | 70 | struct rcu_head rcu; |
89 | unsigned long masks[0]; | 71 | unsigned long masks[0]; |
90 | }; | 72 | }; |
91 | 73 | ||
74 | /* state bits */ | ||
75 | #define NETLINK_CONGESTED 0x0 | ||
76 | |||
77 | /* flags */ | ||
92 | #define NETLINK_KERNEL_SOCKET 0x1 | 78 | #define NETLINK_KERNEL_SOCKET 0x1 |
93 | #define NETLINK_RECV_PKTINFO 0x2 | 79 | #define NETLINK_RECV_PKTINFO 0x2 |
94 | #define NETLINK_BROADCAST_SEND_ERROR 0x4 | 80 | #define NETLINK_BROADCAST_SEND_ERROR 0x4 |
95 | #define NETLINK_RECV_NO_ENOBUFS 0x8 | 81 | #define NETLINK_RECV_NO_ENOBUFS 0x8 |
96 | 82 | ||
97 | static inline struct netlink_sock *nlk_sk(struct sock *sk) | ||
98 | { | ||
99 | return container_of(sk, struct netlink_sock, sk); | ||
100 | } | ||
101 | |||
102 | static inline int netlink_is_kernel(struct sock *sk) | 83 | static inline int netlink_is_kernel(struct sock *sk) |
103 | { | 84 | { |
104 | return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; | 85 | return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; |
105 | } | 86 | } |
106 | 87 | ||
107 | struct nl_portid_hash { | 88 | struct netlink_table *nl_table; |
108 | struct hlist_head *table; | 89 | EXPORT_SYMBOL_GPL(nl_table); |
109 | unsigned long rehash_time; | ||
110 | |||
111 | unsigned int mask; | ||
112 | unsigned int shift; | ||
113 | |||
114 | unsigned int entries; | ||
115 | unsigned int max_shift; | ||
116 | |||
117 | u32 rnd; | ||
118 | }; | ||
119 | |||
120 | struct netlink_table { | ||
121 | struct nl_portid_hash hash; | ||
122 | struct hlist_head mc_list; | ||
123 | struct listeners __rcu *listeners; | ||
124 | unsigned int flags; | ||
125 | unsigned int groups; | ||
126 | struct mutex *cb_mutex; | ||
127 | struct module *module; | ||
128 | void (*bind)(int group); | ||
129 | int registered; | ||
130 | }; | ||
131 | |||
132 | static struct netlink_table *nl_table; | ||
133 | 90 | ||
134 | static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); | 91 | static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); |
135 | 92 | ||
136 | static int netlink_dump(struct sock *sk); | 93 | static int netlink_dump(struct sock *sk); |
94 | static void netlink_skb_destructor(struct sk_buff *skb); | ||
137 | 95 | ||
138 | static DEFINE_RWLOCK(nl_table_lock); | 96 | DEFINE_RWLOCK(nl_table_lock); |
97 | EXPORT_SYMBOL_GPL(nl_table_lock); | ||
139 | static atomic_t nl_table_users = ATOMIC_INIT(0); | 98 | static atomic_t nl_table_users = ATOMIC_INIT(0); |
140 | 99 | ||
141 | #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); | 100 | #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); |
@@ -152,6 +111,599 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u | |||
152 | return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; | 111 | return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; |
153 | } | 112 | } |
154 | 113 | ||
114 | static void netlink_overrun(struct sock *sk) | ||
115 | { | ||
116 | struct netlink_sock *nlk = nlk_sk(sk); | ||
117 | |||
118 | if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { | ||
119 | if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) { | ||
120 | sk->sk_err = ENOBUFS; | ||
121 | sk->sk_error_report(sk); | ||
122 | } | ||
123 | } | ||
124 | atomic_inc(&sk->sk_drops); | ||
125 | } | ||
126 | |||
127 | static void netlink_rcv_wake(struct sock *sk) | ||
128 | { | ||
129 | struct netlink_sock *nlk = nlk_sk(sk); | ||
130 | |||
131 | if (skb_queue_empty(&sk->sk_receive_queue)) | ||
132 | clear_bit(NETLINK_CONGESTED, &nlk->state); | ||
133 | if (!test_bit(NETLINK_CONGESTED, &nlk->state)) | ||
134 | wake_up_interruptible(&nlk->wait); | ||
135 | } | ||
136 | |||
137 | #ifdef CONFIG_NETLINK_MMAP | ||
138 | static bool netlink_skb_is_mmaped(const struct sk_buff *skb) | ||
139 | { | ||
140 | return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; | ||
141 | } | ||
142 | |||
143 | static bool netlink_rx_is_mmaped(struct sock *sk) | ||
144 | { | ||
145 | return nlk_sk(sk)->rx_ring.pg_vec != NULL; | ||
146 | } | ||
147 | |||
148 | static bool netlink_tx_is_mmaped(struct sock *sk) | ||
149 | { | ||
150 | return nlk_sk(sk)->tx_ring.pg_vec != NULL; | ||
151 | } | ||
152 | |||
153 | static __pure struct page *pgvec_to_page(const void *addr) | ||
154 | { | ||
155 | if (is_vmalloc_addr(addr)) | ||
156 | return vmalloc_to_page(addr); | ||
157 | else | ||
158 | return virt_to_page(addr); | ||
159 | } | ||
160 | |||
161 | static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) | ||
162 | { | ||
163 | unsigned int i; | ||
164 | |||
165 | for (i = 0; i < len; i++) { | ||
166 | if (pg_vec[i] != NULL) { | ||
167 | if (is_vmalloc_addr(pg_vec[i])) | ||
168 | vfree(pg_vec[i]); | ||
169 | else | ||
170 | free_pages((unsigned long)pg_vec[i], order); | ||
171 | } | ||
172 | } | ||
173 | kfree(pg_vec); | ||
174 | } | ||
175 | |||
176 | static void *alloc_one_pg_vec_page(unsigned long order) | ||
177 | { | ||
178 | void *buffer; | ||
179 | gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | | ||
180 | __GFP_NOWARN | __GFP_NORETRY; | ||
181 | |||
182 | buffer = (void *)__get_free_pages(gfp_flags, order); | ||
183 | if (buffer != NULL) | ||
184 | return buffer; | ||
185 | |||
186 | buffer = vzalloc((1 << order) * PAGE_SIZE); | ||
187 | if (buffer != NULL) | ||
188 | return buffer; | ||
189 | |||
190 | gfp_flags &= ~__GFP_NORETRY; | ||
191 | return (void *)__get_free_pages(gfp_flags, order); | ||
192 | } | ||
193 | |||
194 | static void **alloc_pg_vec(struct netlink_sock *nlk, | ||
195 | struct nl_mmap_req *req, unsigned int order) | ||
196 | { | ||
197 | unsigned int block_nr = req->nm_block_nr; | ||
198 | unsigned int i; | ||
199 | void **pg_vec, *ptr; | ||
200 | |||
201 | pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); | ||
202 | if (pg_vec == NULL) | ||
203 | return NULL; | ||
204 | |||
205 | for (i = 0; i < block_nr; i++) { | ||
206 | pg_vec[i] = ptr = alloc_one_pg_vec_page(order); | ||
207 | if (pg_vec[i] == NULL) | ||
208 | goto err1; | ||
209 | } | ||
210 | |||
211 | return pg_vec; | ||
212 | err1: | ||
213 | free_pg_vec(pg_vec, order, block_nr); | ||
214 | return NULL; | ||
215 | } | ||
216 | |||
217 | static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, | ||
218 | bool closing, bool tx_ring) | ||
219 | { | ||
220 | struct netlink_sock *nlk = nlk_sk(sk); | ||
221 | struct netlink_ring *ring; | ||
222 | struct sk_buff_head *queue; | ||
223 | void **pg_vec = NULL; | ||
224 | unsigned int order = 0; | ||
225 | int err; | ||
226 | |||
227 | ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; | ||
228 | queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; | ||
229 | |||
230 | if (!closing) { | ||
231 | if (atomic_read(&nlk->mapped)) | ||
232 | return -EBUSY; | ||
233 | if (atomic_read(&ring->pending)) | ||
234 | return -EBUSY; | ||
235 | } | ||
236 | |||
237 | if (req->nm_block_nr) { | ||
238 | if (ring->pg_vec != NULL) | ||
239 | return -EBUSY; | ||
240 | |||
241 | if ((int)req->nm_block_size <= 0) | ||
242 | return -EINVAL; | ||
243 | if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) | ||
244 | return -EINVAL; | ||
245 | if (req->nm_frame_size < NL_MMAP_HDRLEN) | ||
246 | return -EINVAL; | ||
247 | if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) | ||
248 | return -EINVAL; | ||
249 | |||
250 | ring->frames_per_block = req->nm_block_size / | ||
251 | req->nm_frame_size; | ||
252 | if (ring->frames_per_block == 0) | ||
253 | return -EINVAL; | ||
254 | if (ring->frames_per_block * req->nm_block_nr != | ||
255 | req->nm_frame_nr) | ||
256 | return -EINVAL; | ||
257 | |||
258 | order = get_order(req->nm_block_size); | ||
259 | pg_vec = alloc_pg_vec(nlk, req, order); | ||
260 | if (pg_vec == NULL) | ||
261 | return -ENOMEM; | ||
262 | } else { | ||
263 | if (req->nm_frame_nr) | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | |||
267 | err = -EBUSY; | ||
268 | mutex_lock(&nlk->pg_vec_lock); | ||
269 | if (closing || atomic_read(&nlk->mapped) == 0) { | ||
270 | err = 0; | ||
271 | spin_lock_bh(&queue->lock); | ||
272 | |||
273 | ring->frame_max = req->nm_frame_nr - 1; | ||
274 | ring->head = 0; | ||
275 | ring->frame_size = req->nm_frame_size; | ||
276 | ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; | ||
277 | |||
278 | swap(ring->pg_vec_len, req->nm_block_nr); | ||
279 | swap(ring->pg_vec_order, order); | ||
280 | swap(ring->pg_vec, pg_vec); | ||
281 | |||
282 | __skb_queue_purge(queue); | ||
283 | spin_unlock_bh(&queue->lock); | ||
284 | |||
285 | WARN_ON(atomic_read(&nlk->mapped)); | ||
286 | } | ||
287 | mutex_unlock(&nlk->pg_vec_lock); | ||
288 | |||
289 | if (pg_vec) | ||
290 | free_pg_vec(pg_vec, order, req->nm_block_nr); | ||
291 | return err; | ||
292 | } | ||
293 | |||
294 | static void netlink_mm_open(struct vm_area_struct *vma) | ||
295 | { | ||
296 | struct file *file = vma->vm_file; | ||
297 | struct socket *sock = file->private_data; | ||
298 | struct sock *sk = sock->sk; | ||
299 | |||
300 | if (sk) | ||
301 | atomic_inc(&nlk_sk(sk)->mapped); | ||
302 | } | ||
303 | |||
304 | static void netlink_mm_close(struct vm_area_struct *vma) | ||
305 | { | ||
306 | struct file *file = vma->vm_file; | ||
307 | struct socket *sock = file->private_data; | ||
308 | struct sock *sk = sock->sk; | ||
309 | |||
310 | if (sk) | ||
311 | atomic_dec(&nlk_sk(sk)->mapped); | ||
312 | } | ||
313 | |||
314 | static const struct vm_operations_struct netlink_mmap_ops = { | ||
315 | .open = netlink_mm_open, | ||
316 | .close = netlink_mm_close, | ||
317 | }; | ||
318 | |||
319 | static int netlink_mmap(struct file *file, struct socket *sock, | ||
320 | struct vm_area_struct *vma) | ||
321 | { | ||
322 | struct sock *sk = sock->sk; | ||
323 | struct netlink_sock *nlk = nlk_sk(sk); | ||
324 | struct netlink_ring *ring; | ||
325 | unsigned long start, size, expected; | ||
326 | unsigned int i; | ||
327 | int err = -EINVAL; | ||
328 | |||
329 | if (vma->vm_pgoff) | ||
330 | return -EINVAL; | ||
331 | |||
332 | mutex_lock(&nlk->pg_vec_lock); | ||
333 | |||
334 | expected = 0; | ||
335 | for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { | ||
336 | if (ring->pg_vec == NULL) | ||
337 | continue; | ||
338 | expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; | ||
339 | } | ||
340 | |||
341 | if (expected == 0) | ||
342 | goto out; | ||
343 | |||
344 | size = vma->vm_end - vma->vm_start; | ||
345 | if (size != expected) | ||
346 | goto out; | ||
347 | |||
348 | start = vma->vm_start; | ||
349 | for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { | ||
350 | if (ring->pg_vec == NULL) | ||
351 | continue; | ||
352 | |||
353 | for (i = 0; i < ring->pg_vec_len; i++) { | ||
354 | struct page *page; | ||
355 | void *kaddr = ring->pg_vec[i]; | ||
356 | unsigned int pg_num; | ||
357 | |||
358 | for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { | ||
359 | page = pgvec_to_page(kaddr); | ||
360 | err = vm_insert_page(vma, start, page); | ||
361 | if (err < 0) | ||
362 | goto out; | ||
363 | start += PAGE_SIZE; | ||
364 | kaddr += PAGE_SIZE; | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | |||
369 | atomic_inc(&nlk->mapped); | ||
370 | vma->vm_ops = &netlink_mmap_ops; | ||
371 | err = 0; | ||
372 | out: | ||
373 | mutex_unlock(&nlk->pg_vec_lock); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) | ||
378 | { | ||
379 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | ||
380 | struct page *p_start, *p_end; | ||
381 | |||
382 | /* First page is flushed through netlink_{get,set}_status */ | ||
383 | p_start = pgvec_to_page(hdr + PAGE_SIZE); | ||
384 | p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); | ||
385 | while (p_start <= p_end) { | ||
386 | flush_dcache_page(p_start); | ||
387 | p_start++; | ||
388 | } | ||
389 | #endif | ||
390 | } | ||
391 | |||
392 | static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) | ||
393 | { | ||
394 | smp_rmb(); | ||
395 | flush_dcache_page(pgvec_to_page(hdr)); | ||
396 | return hdr->nm_status; | ||
397 | } | ||
398 | |||
399 | static void netlink_set_status(struct nl_mmap_hdr *hdr, | ||
400 | enum nl_mmap_status status) | ||
401 | { | ||
402 | hdr->nm_status = status; | ||
403 | flush_dcache_page(pgvec_to_page(hdr)); | ||
404 | smp_wmb(); | ||
405 | } | ||
406 | |||
407 | static struct nl_mmap_hdr * | ||
408 | __netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) | ||
409 | { | ||
410 | unsigned int pg_vec_pos, frame_off; | ||
411 | |||
412 | pg_vec_pos = pos / ring->frames_per_block; | ||
413 | frame_off = pos % ring->frames_per_block; | ||
414 | |||
415 | return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); | ||
416 | } | ||
417 | |||
418 | static struct nl_mmap_hdr * | ||
419 | netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, | ||
420 | enum nl_mmap_status status) | ||
421 | { | ||
422 | struct nl_mmap_hdr *hdr; | ||
423 | |||
424 | hdr = __netlink_lookup_frame(ring, pos); | ||
425 | if (netlink_get_status(hdr) != status) | ||
426 | return NULL; | ||
427 | |||
428 | return hdr; | ||
429 | } | ||
430 | |||
431 | static struct nl_mmap_hdr * | ||
432 | netlink_current_frame(const struct netlink_ring *ring, | ||
433 | enum nl_mmap_status status) | ||
434 | { | ||
435 | return netlink_lookup_frame(ring, ring->head, status); | ||
436 | } | ||
437 | |||
438 | static struct nl_mmap_hdr * | ||
439 | netlink_previous_frame(const struct netlink_ring *ring, | ||
440 | enum nl_mmap_status status) | ||
441 | { | ||
442 | unsigned int prev; | ||
443 | |||
444 | prev = ring->head ? ring->head - 1 : ring->frame_max; | ||
445 | return netlink_lookup_frame(ring, prev, status); | ||
446 | } | ||
447 | |||
448 | static void netlink_increment_head(struct netlink_ring *ring) | ||
449 | { | ||
450 | ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; | ||
451 | } | ||
452 | |||
453 | static void netlink_forward_ring(struct netlink_ring *ring) | ||
454 | { | ||
455 | unsigned int head = ring->head, pos = head; | ||
456 | const struct nl_mmap_hdr *hdr; | ||
457 | |||
458 | do { | ||
459 | hdr = __netlink_lookup_frame(ring, pos); | ||
460 | if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) | ||
461 | break; | ||
462 | if (hdr->nm_status != NL_MMAP_STATUS_SKIP) | ||
463 | break; | ||
464 | netlink_increment_head(ring); | ||
465 | } while (ring->head != head); | ||
466 | } | ||
467 | |||
468 | static bool netlink_dump_space(struct netlink_sock *nlk) | ||
469 | { | ||
470 | struct netlink_ring *ring = &nlk->rx_ring; | ||
471 | struct nl_mmap_hdr *hdr; | ||
472 | unsigned int n; | ||
473 | |||
474 | hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); | ||
475 | if (hdr == NULL) | ||
476 | return false; | ||
477 | |||
478 | n = ring->head + ring->frame_max / 2; | ||
479 | if (n > ring->frame_max) | ||
480 | n -= ring->frame_max; | ||
481 | |||
482 | hdr = __netlink_lookup_frame(ring, n); | ||
483 | |||
484 | return hdr->nm_status == NL_MMAP_STATUS_UNUSED; | ||
485 | } | ||
486 | |||
487 | static unsigned int netlink_poll(struct file *file, struct socket *sock, | ||
488 | poll_table *wait) | ||
489 | { | ||
490 | struct sock *sk = sock->sk; | ||
491 | struct netlink_sock *nlk = nlk_sk(sk); | ||
492 | unsigned int mask; | ||
493 | int err; | ||
494 | |||
495 | if (nlk->rx_ring.pg_vec != NULL) { | ||
496 | /* Memory mapped sockets don't call recvmsg(), so flow control | ||
497 | * for dumps is performed here. A dump is allowed to continue | ||
498 | * if at least half the ring is unused. | ||
499 | */ | ||
500 | while (nlk->cb != NULL && netlink_dump_space(nlk)) { | ||
501 | err = netlink_dump(sk); | ||
502 | if (err < 0) { | ||
503 | sk->sk_err = err; | ||
504 | sk->sk_error_report(sk); | ||
505 | break; | ||
506 | } | ||
507 | } | ||
508 | netlink_rcv_wake(sk); | ||
509 | } | ||
510 | |||
511 | mask = datagram_poll(file, sock, wait); | ||
512 | |||
513 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
514 | if (nlk->rx_ring.pg_vec) { | ||
515 | netlink_forward_ring(&nlk->rx_ring); | ||
516 | if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) | ||
517 | mask |= POLLIN | POLLRDNORM; | ||
518 | } | ||
519 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
520 | |||
521 | spin_lock_bh(&sk->sk_write_queue.lock); | ||
522 | if (nlk->tx_ring.pg_vec) { | ||
523 | if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) | ||
524 | mask |= POLLOUT | POLLWRNORM; | ||
525 | } | ||
526 | spin_unlock_bh(&sk->sk_write_queue.lock); | ||
527 | |||
528 | return mask; | ||
529 | } | ||
530 | |||
531 | static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) | ||
532 | { | ||
533 | return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); | ||
534 | } | ||
535 | |||
536 | static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, | ||
537 | struct netlink_ring *ring, | ||
538 | struct nl_mmap_hdr *hdr) | ||
539 | { | ||
540 | unsigned int size; | ||
541 | void *data; | ||
542 | |||
543 | size = ring->frame_size - NL_MMAP_HDRLEN; | ||
544 | data = (void *)hdr + NL_MMAP_HDRLEN; | ||
545 | |||
546 | skb->head = data; | ||
547 | skb->data = data; | ||
548 | skb_reset_tail_pointer(skb); | ||
549 | skb->end = skb->tail + size; | ||
550 | skb->len = 0; | ||
551 | |||
552 | skb->destructor = netlink_skb_destructor; | ||
553 | NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; | ||
554 | NETLINK_CB(skb).sk = sk; | ||
555 | } | ||
556 | |||
557 | static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, | ||
558 | u32 dst_portid, u32 dst_group, | ||
559 | struct sock_iocb *siocb) | ||
560 | { | ||
561 | struct netlink_sock *nlk = nlk_sk(sk); | ||
562 | struct netlink_ring *ring; | ||
563 | struct nl_mmap_hdr *hdr; | ||
564 | struct sk_buff *skb; | ||
565 | unsigned int maxlen; | ||
566 | bool excl = true; | ||
567 | int err = 0, len = 0; | ||
568 | |||
569 | /* Netlink messages are validated by the receiver before processing. | ||
570 | * In order to avoid userspace changing the contents of the message | ||
571 | * after validation, the socket and the ring may only be used by a | ||
572 | * single process, otherwise we fall back to copying. | ||
573 | */ | ||
574 | if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || | ||
575 | atomic_read(&nlk->mapped) > 1) | ||
576 | excl = false; | ||
577 | |||
578 | mutex_lock(&nlk->pg_vec_lock); | ||
579 | |||
580 | ring = &nlk->tx_ring; | ||
581 | maxlen = ring->frame_size - NL_MMAP_HDRLEN; | ||
582 | |||
583 | do { | ||
584 | hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); | ||
585 | if (hdr == NULL) { | ||
586 | if (!(msg->msg_flags & MSG_DONTWAIT) && | ||
587 | atomic_read(&nlk->tx_ring.pending)) | ||
588 | schedule(); | ||
589 | continue; | ||
590 | } | ||
591 | if (hdr->nm_len > maxlen) { | ||
592 | err = -EINVAL; | ||
593 | goto out; | ||
594 | } | ||
595 | |||
596 | netlink_frame_flush_dcache(hdr); | ||
597 | |||
598 | if (likely(dst_portid == 0 && dst_group == 0 && excl)) { | ||
599 | skb = alloc_skb_head(GFP_KERNEL); | ||
600 | if (skb == NULL) { | ||
601 | err = -ENOBUFS; | ||
602 | goto out; | ||
603 | } | ||
604 | sock_hold(sk); | ||
605 | netlink_ring_setup_skb(skb, sk, ring, hdr); | ||
606 | NETLINK_CB(skb).flags |= NETLINK_SKB_TX; | ||
607 | __skb_put(skb, hdr->nm_len); | ||
608 | netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); | ||
609 | atomic_inc(&ring->pending); | ||
610 | } else { | ||
611 | skb = alloc_skb(hdr->nm_len, GFP_KERNEL); | ||
612 | if (skb == NULL) { | ||
613 | err = -ENOBUFS; | ||
614 | goto out; | ||
615 | } | ||
616 | __skb_put(skb, hdr->nm_len); | ||
617 | memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); | ||
618 | netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); | ||
619 | } | ||
620 | |||
621 | netlink_increment_head(ring); | ||
622 | |||
623 | NETLINK_CB(skb).portid = nlk->portid; | ||
624 | NETLINK_CB(skb).dst_group = dst_group; | ||
625 | NETLINK_CB(skb).creds = siocb->scm->creds; | ||
626 | |||
627 | err = security_netlink_send(sk, skb); | ||
628 | if (err) { | ||
629 | kfree_skb(skb); | ||
630 | goto out; | ||
631 | } | ||
632 | |||
633 | if (unlikely(dst_group)) { | ||
634 | atomic_inc(&skb->users); | ||
635 | netlink_broadcast(sk, skb, dst_portid, dst_group, | ||
636 | GFP_KERNEL); | ||
637 | } | ||
638 | err = netlink_unicast(sk, skb, dst_portid, | ||
639 | msg->msg_flags & MSG_DONTWAIT); | ||
640 | if (err < 0) | ||
641 | goto out; | ||
642 | len += err; | ||
643 | |||
644 | } while (hdr != NULL || | ||
645 | (!(msg->msg_flags & MSG_DONTWAIT) && | ||
646 | atomic_read(&nlk->tx_ring.pending))); | ||
647 | |||
648 | if (len > 0) | ||
649 | err = len; | ||
650 | out: | ||
651 | mutex_unlock(&nlk->pg_vec_lock); | ||
652 | return err; | ||
653 | } | ||
654 | |||
655 | static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) | ||
656 | { | ||
657 | struct nl_mmap_hdr *hdr; | ||
658 | |||
659 | hdr = netlink_mmap_hdr(skb); | ||
660 | hdr->nm_len = skb->len; | ||
661 | hdr->nm_group = NETLINK_CB(skb).dst_group; | ||
662 | hdr->nm_pid = NETLINK_CB(skb).creds.pid; | ||
663 | hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); | ||
664 | hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); | ||
665 | netlink_frame_flush_dcache(hdr); | ||
666 | netlink_set_status(hdr, NL_MMAP_STATUS_VALID); | ||
667 | |||
668 | NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; | ||
669 | kfree_skb(skb); | ||
670 | } | ||
671 | |||
672 | static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) | ||
673 | { | ||
674 | struct netlink_sock *nlk = nlk_sk(sk); | ||
675 | struct netlink_ring *ring = &nlk->rx_ring; | ||
676 | struct nl_mmap_hdr *hdr; | ||
677 | |||
678 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
679 | hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); | ||
680 | if (hdr == NULL) { | ||
681 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
682 | kfree_skb(skb); | ||
683 | netlink_overrun(sk); | ||
684 | return; | ||
685 | } | ||
686 | netlink_increment_head(ring); | ||
687 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
688 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
689 | |||
690 | hdr->nm_len = skb->len; | ||
691 | hdr->nm_group = NETLINK_CB(skb).dst_group; | ||
692 | hdr->nm_pid = NETLINK_CB(skb).creds.pid; | ||
693 | hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); | ||
694 | hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); | ||
695 | netlink_set_status(hdr, NL_MMAP_STATUS_COPY); | ||
696 | } | ||
697 | |||
698 | #else /* CONFIG_NETLINK_MMAP */ | ||
699 | #define netlink_skb_is_mmaped(skb) false | ||
700 | #define netlink_rx_is_mmaped(sk) false | ||
701 | #define netlink_tx_is_mmaped(sk) false | ||
702 | #define netlink_mmap sock_no_mmap | ||
703 | #define netlink_poll datagram_poll | ||
704 | #define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 | ||
705 | #endif /* CONFIG_NETLINK_MMAP */ | ||
706 | |||
155 | static void netlink_destroy_callback(struct netlink_callback *cb) | 707 | static void netlink_destroy_callback(struct netlink_callback *cb) |
156 | { | 708 | { |
157 | kfree_skb(cb->skb); | 709 | kfree_skb(cb->skb); |
@@ -164,6 +716,53 @@ static void netlink_consume_callback(struct netlink_callback *cb) | |||
164 | kfree(cb); | 716 | kfree(cb); |
165 | } | 717 | } |
166 | 718 | ||
719 | static void netlink_skb_destructor(struct sk_buff *skb) | ||
720 | { | ||
721 | #ifdef CONFIG_NETLINK_MMAP | ||
722 | struct nl_mmap_hdr *hdr; | ||
723 | struct netlink_ring *ring; | ||
724 | struct sock *sk; | ||
725 | |||
726 | /* If a packet from the kernel to userspace was freed because of an | ||
727 | * error without being delivered to userspace, the kernel must reset | ||
728 | * the status. In the direction userspace to kernel, the status is | ||
729 | * always reset here after the packet was processed and freed. | ||
730 | */ | ||
731 | if (netlink_skb_is_mmaped(skb)) { | ||
732 | hdr = netlink_mmap_hdr(skb); | ||
733 | sk = NETLINK_CB(skb).sk; | ||
734 | |||
735 | if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { | ||
736 | netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); | ||
737 | ring = &nlk_sk(sk)->tx_ring; | ||
738 | } else { | ||
739 | if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { | ||
740 | hdr->nm_len = 0; | ||
741 | netlink_set_status(hdr, NL_MMAP_STATUS_VALID); | ||
742 | } | ||
743 | ring = &nlk_sk(sk)->rx_ring; | ||
744 | } | ||
745 | |||
746 | WARN_ON(atomic_read(&ring->pending) == 0); | ||
747 | atomic_dec(&ring->pending); | ||
748 | sock_put(sk); | ||
749 | |||
750 | skb->data = NULL; | ||
751 | } | ||
752 | #endif | ||
753 | if (skb->sk != NULL) | ||
754 | sock_rfree(skb); | ||
755 | } | ||
756 | |||
757 | static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) | ||
758 | { | ||
759 | WARN_ON(skb->sk != NULL); | ||
760 | skb->sk = sk; | ||
761 | skb->destructor = netlink_skb_destructor; | ||
762 | atomic_add(skb->truesize, &sk->sk_rmem_alloc); | ||
763 | sk_mem_charge(sk, skb->truesize); | ||
764 | } | ||
765 | |||
167 | static void netlink_sock_destruct(struct sock *sk) | 766 | static void netlink_sock_destruct(struct sock *sk) |
168 | { | 767 | { |
169 | struct netlink_sock *nlk = nlk_sk(sk); | 768 | struct netlink_sock *nlk = nlk_sk(sk); |
@@ -177,6 +776,18 @@ static void netlink_sock_destruct(struct sock *sk) | |||
177 | } | 776 | } |
178 | 777 | ||
179 | skb_queue_purge(&sk->sk_receive_queue); | 778 | skb_queue_purge(&sk->sk_receive_queue); |
779 | #ifdef CONFIG_NETLINK_MMAP | ||
780 | if (1) { | ||
781 | struct nl_mmap_req req; | ||
782 | |||
783 | memset(&req, 0, sizeof(req)); | ||
784 | if (nlk->rx_ring.pg_vec) | ||
785 | netlink_set_ring(sk, &req, true, false); | ||
786 | memset(&req, 0, sizeof(req)); | ||
787 | if (nlk->tx_ring.pg_vec) | ||
788 | netlink_set_ring(sk, &req, true, true); | ||
789 | } | ||
790 | #endif /* CONFIG_NETLINK_MMAP */ | ||
180 | 791 | ||
181 | if (!sock_flag(sk, SOCK_DEAD)) { | 792 | if (!sock_flag(sk, SOCK_DEAD)) { |
182 | printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); | 793 | printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); |
@@ -440,6 +1051,9 @@ static int __netlink_create(struct net *net, struct socket *sock, | |||
440 | mutex_init(nlk->cb_mutex); | 1051 | mutex_init(nlk->cb_mutex); |
441 | } | 1052 | } |
442 | init_waitqueue_head(&nlk->wait); | 1053 | init_waitqueue_head(&nlk->wait); |
1054 | #ifdef CONFIG_NETLINK_MMAP | ||
1055 | mutex_init(&nlk->pg_vec_lock); | ||
1056 | #endif | ||
443 | 1057 | ||
444 | sk->sk_destruct = netlink_sock_destruct; | 1058 | sk->sk_destruct = netlink_sock_destruct; |
445 | sk->sk_protocol = protocol; | 1059 | sk->sk_protocol = protocol; |
@@ -771,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, | |||
771 | return 0; | 1385 | return 0; |
772 | } | 1386 | } |
773 | 1387 | ||
774 | static void netlink_overrun(struct sock *sk) | ||
775 | { | ||
776 | struct netlink_sock *nlk = nlk_sk(sk); | ||
777 | |||
778 | if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { | ||
779 | if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { | ||
780 | sk->sk_err = ENOBUFS; | ||
781 | sk->sk_error_report(sk); | ||
782 | } | ||
783 | } | ||
784 | atomic_inc(&sk->sk_drops); | ||
785 | } | ||
786 | |||
787 | static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) | 1388 | static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) |
788 | { | 1389 | { |
789 | struct sock *sock; | 1390 | struct sock *sock; |
@@ -836,8 +1437,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, | |||
836 | 1437 | ||
837 | nlk = nlk_sk(sk); | 1438 | nlk = nlk_sk(sk); |
838 | 1439 | ||
839 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 1440 | if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || |
840 | test_bit(0, &nlk->state)) { | 1441 | test_bit(NETLINK_CONGESTED, &nlk->state)) && |
1442 | !netlink_skb_is_mmaped(skb)) { | ||
841 | DECLARE_WAITQUEUE(wait, current); | 1443 | DECLARE_WAITQUEUE(wait, current); |
842 | if (!*timeo) { | 1444 | if (!*timeo) { |
843 | if (!ssk || netlink_is_kernel(ssk)) | 1445 | if (!ssk || netlink_is_kernel(ssk)) |
@@ -851,7 +1453,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, | |||
851 | add_wait_queue(&nlk->wait, &wait); | 1453 | add_wait_queue(&nlk->wait, &wait); |
852 | 1454 | ||
853 | if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 1455 | if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || |
854 | test_bit(0, &nlk->state)) && | 1456 | test_bit(NETLINK_CONGESTED, &nlk->state)) && |
855 | !sock_flag(sk, SOCK_DEAD)) | 1457 | !sock_flag(sk, SOCK_DEAD)) |
856 | *timeo = schedule_timeout(*timeo); | 1458 | *timeo = schedule_timeout(*timeo); |
857 | 1459 | ||
@@ -865,7 +1467,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, | |||
865 | } | 1467 | } |
866 | return 1; | 1468 | return 1; |
867 | } | 1469 | } |
868 | skb_set_owner_r(skb, sk); | 1470 | netlink_skb_set_owner_r(skb, sk); |
869 | return 0; | 1471 | return 0; |
870 | } | 1472 | } |
871 | 1473 | ||
@@ -873,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) | |||
873 | { | 1475 | { |
874 | int len = skb->len; | 1476 | int len = skb->len; |
875 | 1477 | ||
876 | skb_queue_tail(&sk->sk_receive_queue, skb); | 1478 | #ifdef CONFIG_NETLINK_MMAP |
1479 | if (netlink_skb_is_mmaped(skb)) | ||
1480 | netlink_queue_mmaped_skb(sk, skb); | ||
1481 | else if (netlink_rx_is_mmaped(sk)) | ||
1482 | netlink_ring_set_copied(sk, skb); | ||
1483 | else | ||
1484 | #endif /* CONFIG_NETLINK_MMAP */ | ||
1485 | skb_queue_tail(&sk->sk_receive_queue, skb); | ||
877 | sk->sk_data_ready(sk, len); | 1486 | sk->sk_data_ready(sk, len); |
878 | return len; | 1487 | return len; |
879 | } | 1488 | } |
@@ -896,7 +1505,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) | |||
896 | { | 1505 | { |
897 | int delta; | 1506 | int delta; |
898 | 1507 | ||
899 | skb_orphan(skb); | 1508 | WARN_ON(skb->sk != NULL); |
1509 | if (netlink_skb_is_mmaped(skb)) | ||
1510 | return skb; | ||
900 | 1511 | ||
901 | delta = skb->end - skb->tail; | 1512 | delta = skb->end - skb->tail; |
902 | if (delta * 2 < skb->truesize) | 1513 | if (delta * 2 < skb->truesize) |
@@ -916,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) | |||
916 | return skb; | 1527 | return skb; |
917 | } | 1528 | } |
918 | 1529 | ||
919 | static void netlink_rcv_wake(struct sock *sk) | ||
920 | { | ||
921 | struct netlink_sock *nlk = nlk_sk(sk); | ||
922 | |||
923 | if (skb_queue_empty(&sk->sk_receive_queue)) | ||
924 | clear_bit(0, &nlk->state); | ||
925 | if (!test_bit(0, &nlk->state)) | ||
926 | wake_up_interruptible(&nlk->wait); | ||
927 | } | ||
928 | |||
929 | static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, | 1530 | static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, |
930 | struct sock *ssk) | 1531 | struct sock *ssk) |
931 | { | 1532 | { |
@@ -935,8 +1536,8 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, | |||
935 | ret = -ECONNREFUSED; | 1536 | ret = -ECONNREFUSED; |
936 | if (nlk->netlink_rcv != NULL) { | 1537 | if (nlk->netlink_rcv != NULL) { |
937 | ret = skb->len; | 1538 | ret = skb->len; |
938 | skb_set_owner_r(skb, sk); | 1539 | netlink_skb_set_owner_r(skb, sk); |
939 | NETLINK_CB(skb).ssk = ssk; | 1540 | NETLINK_CB(skb).sk = ssk; |
940 | nlk->netlink_rcv(skb); | 1541 | nlk->netlink_rcv(skb); |
941 | consume_skb(skb); | 1542 | consume_skb(skb); |
942 | } else { | 1543 | } else { |
@@ -982,6 +1583,69 @@ retry: | |||
982 | } | 1583 | } |
983 | EXPORT_SYMBOL(netlink_unicast); | 1584 | EXPORT_SYMBOL(netlink_unicast); |
984 | 1585 | ||
1586 | struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, | ||
1587 | u32 dst_portid, gfp_t gfp_mask) | ||
1588 | { | ||
1589 | #ifdef CONFIG_NETLINK_MMAP | ||
1590 | struct sock *sk = NULL; | ||
1591 | struct sk_buff *skb; | ||
1592 | struct netlink_ring *ring; | ||
1593 | struct nl_mmap_hdr *hdr; | ||
1594 | unsigned int maxlen; | ||
1595 | |||
1596 | sk = netlink_getsockbyportid(ssk, dst_portid); | ||
1597 | if (IS_ERR(sk)) | ||
1598 | goto out; | ||
1599 | |||
1600 | ring = &nlk_sk(sk)->rx_ring; | ||
1601 | /* fast-path without atomic ops for common case: non-mmaped receiver */ | ||
1602 | if (ring->pg_vec == NULL) | ||
1603 | goto out_put; | ||
1604 | |||
1605 | skb = alloc_skb_head(gfp_mask); | ||
1606 | if (skb == NULL) | ||
1607 | goto err1; | ||
1608 | |||
1609 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
1610 | /* check again under lock */ | ||
1611 | if (ring->pg_vec == NULL) | ||
1612 | goto out_free; | ||
1613 | |||
1614 | maxlen = ring->frame_size - NL_MMAP_HDRLEN; | ||
1615 | if (maxlen < size) | ||
1616 | goto out_free; | ||
1617 | |||
1618 | netlink_forward_ring(ring); | ||
1619 | hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); | ||
1620 | if (hdr == NULL) | ||
1621 | goto err2; | ||
1622 | netlink_ring_setup_skb(skb, sk, ring, hdr); | ||
1623 | netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); | ||
1624 | atomic_inc(&ring->pending); | ||
1625 | netlink_increment_head(ring); | ||
1626 | |||
1627 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1628 | return skb; | ||
1629 | |||
1630 | err2: | ||
1631 | kfree_skb(skb); | ||
1632 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1633 | netlink_overrun(sk); | ||
1634 | err1: | ||
1635 | sock_put(sk); | ||
1636 | return NULL; | ||
1637 | |||
1638 | out_free: | ||
1639 | kfree_skb(skb); | ||
1640 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
1641 | out_put: | ||
1642 | sock_put(sk); | ||
1643 | out: | ||
1644 | #endif | ||
1645 | return alloc_skb(size, gfp_mask); | ||
1646 | } | ||
1647 | EXPORT_SYMBOL_GPL(netlink_alloc_skb); | ||
1648 | |||
985 | int netlink_has_listeners(struct sock *sk, unsigned int group) | 1649 | int netlink_has_listeners(struct sock *sk, unsigned int group) |
986 | { | 1650 | { |
987 | int res = 0; | 1651 | int res = 0; |
@@ -1006,8 +1670,8 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) | |||
1006 | struct netlink_sock *nlk = nlk_sk(sk); | 1670 | struct netlink_sock *nlk = nlk_sk(sk); |
1007 | 1671 | ||
1008 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && | 1672 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && |
1009 | !test_bit(0, &nlk->state)) { | 1673 | !test_bit(NETLINK_CONGESTED, &nlk->state)) { |
1010 | skb_set_owner_r(skb, sk); | 1674 | netlink_skb_set_owner_r(skb, sk); |
1011 | __netlink_sendskb(sk, skb); | 1675 | __netlink_sendskb(sk, skb); |
1012 | return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); | 1676 | return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); |
1013 | } | 1677 | } |
@@ -1242,7 +1906,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, | |||
1242 | if (level != SOL_NETLINK) | 1906 | if (level != SOL_NETLINK) |
1243 | return -ENOPROTOOPT; | 1907 | return -ENOPROTOOPT; |
1244 | 1908 | ||
1245 | if (optlen >= sizeof(int) && | 1909 | if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && |
1910 | optlen >= sizeof(int) && | ||
1246 | get_user(val, (unsigned int __user *)optval)) | 1911 | get_user(val, (unsigned int __user *)optval)) |
1247 | return -EFAULT; | 1912 | return -EFAULT; |
1248 | 1913 | ||
@@ -1284,13 +1949,32 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, | |||
1284 | case NETLINK_NO_ENOBUFS: | 1949 | case NETLINK_NO_ENOBUFS: |
1285 | if (val) { | 1950 | if (val) { |
1286 | nlk->flags |= NETLINK_RECV_NO_ENOBUFS; | 1951 | nlk->flags |= NETLINK_RECV_NO_ENOBUFS; |
1287 | clear_bit(0, &nlk->state); | 1952 | clear_bit(NETLINK_CONGESTED, &nlk->state); |
1288 | wake_up_interruptible(&nlk->wait); | 1953 | wake_up_interruptible(&nlk->wait); |
1289 | } else { | 1954 | } else { |
1290 | nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; | 1955 | nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; |
1291 | } | 1956 | } |
1292 | err = 0; | 1957 | err = 0; |
1293 | break; | 1958 | break; |
1959 | #ifdef CONFIG_NETLINK_MMAP | ||
1960 | case NETLINK_RX_RING: | ||
1961 | case NETLINK_TX_RING: { | ||
1962 | struct nl_mmap_req req; | ||
1963 | |||
1964 | /* Rings might consume more memory than queue limits, require | ||
1965 | * CAP_NET_ADMIN. | ||
1966 | */ | ||
1967 | if (!capable(CAP_NET_ADMIN)) | ||
1968 | return -EPERM; | ||
1969 | if (optlen < sizeof(req)) | ||
1970 | return -EINVAL; | ||
1971 | if (copy_from_user(&req, optval, sizeof(req))) | ||
1972 | return -EFAULT; | ||
1973 | err = netlink_set_ring(sk, &req, false, | ||
1974 | optname == NETLINK_TX_RING); | ||
1975 | break; | ||
1976 | } | ||
1977 | #endif /* CONFIG_NETLINK_MMAP */ | ||
1294 | default: | 1978 | default: |
1295 | err = -ENOPROTOOPT; | 1979 | err = -ENOPROTOOPT; |
1296 | } | 1980 | } |
@@ -1401,6 +2085,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
1401 | goto out; | 2085 | goto out; |
1402 | } | 2086 | } |
1403 | 2087 | ||
2088 | if (netlink_tx_is_mmaped(sk) && | ||
2089 | msg->msg_iov->iov_base == NULL) { | ||
2090 | err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, | ||
2091 | siocb); | ||
2092 | goto out; | ||
2093 | } | ||
2094 | |||
1404 | err = -EMSGSIZE; | 2095 | err = -EMSGSIZE; |
1405 | if (len > sk->sk_sndbuf - 32) | 2096 | if (len > sk->sk_sndbuf - 32) |
1406 | goto out; | 2097 | goto out; |
@@ -1695,7 +2386,7 @@ struct nlmsghdr * | |||
1695 | __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) | 2386 | __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) |
1696 | { | 2387 | { |
1697 | struct nlmsghdr *nlh; | 2388 | struct nlmsghdr *nlh; |
1698 | int size = NLMSG_LENGTH(len); | 2389 | int size = nlmsg_msg_size(len); |
1699 | 2390 | ||
1700 | nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); | 2391 | nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); |
1701 | nlh->nlmsg_type = type; | 2392 | nlh->nlmsg_type = type; |
@@ -1704,7 +2395,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla | |||
1704 | nlh->nlmsg_pid = portid; | 2395 | nlh->nlmsg_pid = portid; |
1705 | nlh->nlmsg_seq = seq; | 2396 | nlh->nlmsg_seq = seq; |
1706 | if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) | 2397 | if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) |
1707 | memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); | 2398 | memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); |
1708 | return nlh; | 2399 | return nlh; |
1709 | } | 2400 | } |
1710 | EXPORT_SYMBOL(__nlmsg_put); | 2401 | EXPORT_SYMBOL(__nlmsg_put); |
@@ -1733,9 +2424,13 @@ static int netlink_dump(struct sock *sk) | |||
1733 | 2424 | ||
1734 | alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); | 2425 | alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); |
1735 | 2426 | ||
1736 | skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); | 2427 | if (!netlink_rx_is_mmaped(sk) && |
2428 | atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) | ||
2429 | goto errout_skb; | ||
2430 | skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); | ||
1737 | if (!skb) | 2431 | if (!skb) |
1738 | goto errout_skb; | 2432 | goto errout_skb; |
2433 | netlink_skb_set_owner_r(skb, sk); | ||
1739 | 2434 | ||
1740 | len = cb->dump(skb, cb); | 2435 | len = cb->dump(skb, cb); |
1741 | 2436 | ||
@@ -1790,13 +2485,25 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, | |||
1790 | if (cb == NULL) | 2485 | if (cb == NULL) |
1791 | return -ENOBUFS; | 2486 | return -ENOBUFS; |
1792 | 2487 | ||
2488 | /* Memory mapped dump requests need to be copied to avoid looping | ||
2489 | * on the pending state in netlink_mmap_sendmsg() while the CB hold | ||
2490 | * a reference to the skb. | ||
2491 | */ | ||
2492 | if (netlink_skb_is_mmaped(skb)) { | ||
2493 | skb = skb_copy(skb, GFP_KERNEL); | ||
2494 | if (skb == NULL) { | ||
2495 | kfree(cb); | ||
2496 | return -ENOBUFS; | ||
2497 | } | ||
2498 | } else | ||
2499 | atomic_inc(&skb->users); | ||
2500 | |||
1793 | cb->dump = control->dump; | 2501 | cb->dump = control->dump; |
1794 | cb->done = control->done; | 2502 | cb->done = control->done; |
1795 | cb->nlh = nlh; | 2503 | cb->nlh = nlh; |
1796 | cb->data = control->data; | 2504 | cb->data = control->data; |
1797 | cb->module = control->module; | 2505 | cb->module = control->module; |
1798 | cb->min_dump_alloc = control->min_dump_alloc; | 2506 | cb->min_dump_alloc = control->min_dump_alloc; |
1799 | atomic_inc(&skb->users); | ||
1800 | cb->skb = skb; | 2507 | cb->skb = skb; |
1801 | 2508 | ||
1802 | sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); | 2509 | sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); |
@@ -1850,7 +2557,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) | |||
1850 | if (err) | 2557 | if (err) |
1851 | payload += nlmsg_len(nlh); | 2558 | payload += nlmsg_len(nlh); |
1852 | 2559 | ||
1853 | skb = nlmsg_new(payload, GFP_KERNEL); | 2560 | skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), |
2561 | NETLINK_CB(in_skb).portid, GFP_KERNEL); | ||
1854 | if (!skb) { | 2562 | if (!skb) { |
1855 | struct sock *sk; | 2563 | struct sock *sk; |
1856 | 2564 | ||
@@ -2116,7 +2824,7 @@ static const struct proto_ops netlink_ops = { | |||
2116 | .socketpair = sock_no_socketpair, | 2824 | .socketpair = sock_no_socketpair, |
2117 | .accept = sock_no_accept, | 2825 | .accept = sock_no_accept, |
2118 | .getname = netlink_getname, | 2826 | .getname = netlink_getname, |
2119 | .poll = datagram_poll, | 2827 | .poll = netlink_poll, |
2120 | .ioctl = sock_no_ioctl, | 2828 | .ioctl = sock_no_ioctl, |
2121 | .listen = sock_no_listen, | 2829 | .listen = sock_no_listen, |
2122 | .shutdown = sock_no_shutdown, | 2830 | .shutdown = sock_no_shutdown, |
@@ -2124,7 +2832,7 @@ static const struct proto_ops netlink_ops = { | |||
2124 | .getsockopt = netlink_getsockopt, | 2832 | .getsockopt = netlink_getsockopt, |
2125 | .sendmsg = netlink_sendmsg, | 2833 | .sendmsg = netlink_sendmsg, |
2126 | .recvmsg = netlink_recvmsg, | 2834 | .recvmsg = netlink_recvmsg, |
2127 | .mmap = sock_no_mmap, | 2835 | .mmap = netlink_mmap, |
2128 | .sendpage = sock_no_sendpage, | 2836 | .sendpage = sock_no_sendpage, |
2129 | }; | 2837 | }; |
2130 | 2838 | ||