diff options
author | Patrick McHardy <kaber@trash.net> | 2013-04-17 02:47:02 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-04-19 14:57:57 -0400 |
commit | 9652e931e73be7e54a9c40e9bcd4bbdafe92a406 (patch) | |
tree | b943a75e53e22e9dcdb26558d971ac25af566a02 | |
parent | ccdfcc398594ddf3f77348c5a10938dbe9efefbe (diff) |
netlink: add mmap'ed netlink helper functions
Add helper functions for looking up mmap'ed frame headers, reading and
writing their status, allocating skbs with mmap'ed data areas and a poll
function.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netlink.h | 7 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 185 |
2 files changed, 190 insertions, 2 deletions
diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d8e9264ae04a..07c473848dbd 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h | |||
@@ -15,10 +15,17 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) | |||
15 | return (struct nlmsghdr *)skb->data; | 15 | return (struct nlmsghdr *)skb->data; |
16 | } | 16 | } |
17 | 17 | ||
18 | enum netlink_skb_flags { | ||
19 | NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ | ||
20 | NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ | ||
21 | NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ | ||
22 | }; | ||
23 | |||
18 | struct netlink_skb_parms { | 24 | struct netlink_skb_parms { |
19 | struct scm_creds creds; /* Skb credentials */ | 25 | struct scm_creds creds; /* Skb credentials */ |
20 | __u32 portid; | 26 | __u32 portid; |
21 | __u32 dst_group; | 27 | __u32 dst_group; |
28 | __u32 flags; | ||
22 | struct sock *sk; | 29 | struct sock *sk; |
23 | }; | 30 | }; |
24 | 31 | ||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1d3c7128e90e..6560635fd25c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/audit.h> | 56 | #include <linux/audit.h> |
57 | #include <linux/mutex.h> | 57 | #include <linux/mutex.h> |
58 | #include <linux/vmalloc.h> | 58 | #include <linux/vmalloc.h> |
59 | #include <asm/cacheflush.h> | ||
59 | 60 | ||
60 | #include <net/net_namespace.h> | 61 | #include <net/net_namespace.h> |
61 | #include <net/sock.h> | 62 | #include <net/sock.h> |
@@ -89,6 +90,7 @@ EXPORT_SYMBOL_GPL(nl_table); | |||
89 | static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); | 90 | static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); |
90 | 91 | ||
91 | static int netlink_dump(struct sock *sk); | 92 | static int netlink_dump(struct sock *sk); |
93 | static void netlink_skb_destructor(struct sk_buff *skb); | ||
92 | 94 | ||
93 | DEFINE_RWLOCK(nl_table_lock); | 95 | DEFINE_RWLOCK(nl_table_lock); |
94 | EXPORT_SYMBOL_GPL(nl_table_lock); | 96 | EXPORT_SYMBOL_GPL(nl_table_lock); |
@@ -109,6 +111,11 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u | |||
109 | } | 111 | } |
110 | 112 | ||
111 | #ifdef CONFIG_NETLINK_MMAP | 113 | #ifdef CONFIG_NETLINK_MMAP |
114 | static bool netlink_skb_is_mmaped(const struct sk_buff *skb) | ||
115 | { | ||
116 | return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; | ||
117 | } | ||
118 | |||
112 | static __pure struct page *pgvec_to_page(const void *addr) | 119 | static __pure struct page *pgvec_to_page(const void *addr) |
113 | { | 120 | { |
114 | if (is_vmalloc_addr(addr)) | 121 | if (is_vmalloc_addr(addr)) |
@@ -332,8 +339,154 @@ out: | |||
332 | mutex_unlock(&nlk->pg_vec_lock); | 339 | mutex_unlock(&nlk->pg_vec_lock); |
333 | return 0; | 340 | return 0; |
334 | } | 341 | } |
342 | |||
343 | static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) | ||
344 | { | ||
345 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | ||
346 | struct page *p_start, *p_end; | ||
347 | |||
348 | /* First page is flushed through netlink_{get,set}_status */ | ||
349 | p_start = pgvec_to_page(hdr + PAGE_SIZE); | ||
350 | p_end = pgvec_to_page((void *)hdr + NL_MMAP_MSG_HDRLEN + hdr->nm_len - 1); | ||
351 | while (p_start <= p_end) { | ||
352 | flush_dcache_page(p_start); | ||
353 | p_start++; | ||
354 | } | ||
355 | #endif | ||
356 | } | ||
357 | |||
358 | static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) | ||
359 | { | ||
360 | smp_rmb(); | ||
361 | flush_dcache_page(pgvec_to_page(hdr)); | ||
362 | return hdr->nm_status; | ||
363 | } | ||
364 | |||
365 | static void netlink_set_status(struct nl_mmap_hdr *hdr, | ||
366 | enum nl_mmap_status status) | ||
367 | { | ||
368 | hdr->nm_status = status; | ||
369 | flush_dcache_page(pgvec_to_page(hdr)); | ||
370 | smp_wmb(); | ||
371 | } | ||
372 | |||
373 | static struct nl_mmap_hdr * | ||
374 | __netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) | ||
375 | { | ||
376 | unsigned int pg_vec_pos, frame_off; | ||
377 | |||
378 | pg_vec_pos = pos / ring->frames_per_block; | ||
379 | frame_off = pos % ring->frames_per_block; | ||
380 | |||
381 | return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); | ||
382 | } | ||
383 | |||
384 | static struct nl_mmap_hdr * | ||
385 | netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, | ||
386 | enum nl_mmap_status status) | ||
387 | { | ||
388 | struct nl_mmap_hdr *hdr; | ||
389 | |||
390 | hdr = __netlink_lookup_frame(ring, pos); | ||
391 | if (netlink_get_status(hdr) != status) | ||
392 | return NULL; | ||
393 | |||
394 | return hdr; | ||
395 | } | ||
396 | |||
397 | static struct nl_mmap_hdr * | ||
398 | netlink_current_frame(const struct netlink_ring *ring, | ||
399 | enum nl_mmap_status status) | ||
400 | { | ||
401 | return netlink_lookup_frame(ring, ring->head, status); | ||
402 | } | ||
403 | |||
404 | static struct nl_mmap_hdr * | ||
405 | netlink_previous_frame(const struct netlink_ring *ring, | ||
406 | enum nl_mmap_status status) | ||
407 | { | ||
408 | unsigned int prev; | ||
409 | |||
410 | prev = ring->head ? ring->head - 1 : ring->frame_max; | ||
411 | return netlink_lookup_frame(ring, prev, status); | ||
412 | } | ||
413 | |||
414 | static void netlink_increment_head(struct netlink_ring *ring) | ||
415 | { | ||
416 | ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; | ||
417 | } | ||
418 | |||
419 | static void netlink_forward_ring(struct netlink_ring *ring) | ||
420 | { | ||
421 | unsigned int head = ring->head, pos = head; | ||
422 | const struct nl_mmap_hdr *hdr; | ||
423 | |||
424 | do { | ||
425 | hdr = __netlink_lookup_frame(ring, pos); | ||
426 | if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) | ||
427 | break; | ||
428 | if (hdr->nm_status != NL_MMAP_STATUS_SKIP) | ||
429 | break; | ||
430 | netlink_increment_head(ring); | ||
431 | } while (ring->head != head); | ||
432 | } | ||
433 | |||
434 | static unsigned int netlink_poll(struct file *file, struct socket *sock, | ||
435 | poll_table *wait) | ||
436 | { | ||
437 | struct sock *sk = sock->sk; | ||
438 | struct netlink_sock *nlk = nlk_sk(sk); | ||
439 | unsigned int mask; | ||
440 | |||
441 | mask = datagram_poll(file, sock, wait); | ||
442 | |||
443 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
444 | if (nlk->rx_ring.pg_vec) { | ||
445 | netlink_forward_ring(&nlk->rx_ring); | ||
446 | if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) | ||
447 | mask |= POLLIN | POLLRDNORM; | ||
448 | } | ||
449 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
450 | |||
451 | spin_lock_bh(&sk->sk_write_queue.lock); | ||
452 | if (nlk->tx_ring.pg_vec) { | ||
453 | if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) | ||
454 | mask |= POLLOUT | POLLWRNORM; | ||
455 | } | ||
456 | spin_unlock_bh(&sk->sk_write_queue.lock); | ||
457 | |||
458 | return mask; | ||
459 | } | ||
460 | |||
461 | static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) | ||
462 | { | ||
463 | return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); | ||
464 | } | ||
465 | |||
466 | static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, | ||
467 | struct netlink_ring *ring, | ||
468 | struct nl_mmap_hdr *hdr) | ||
469 | { | ||
470 | unsigned int size; | ||
471 | void *data; | ||
472 | |||
473 | size = ring->frame_size - NL_MMAP_HDRLEN; | ||
474 | data = (void *)hdr + NL_MMAP_HDRLEN; | ||
475 | |||
476 | skb->head = data; | ||
477 | skb->data = data; | ||
478 | skb_reset_tail_pointer(skb); | ||
479 | skb->end = skb->tail + size; | ||
480 | skb->len = 0; | ||
481 | |||
482 | skb->destructor = netlink_skb_destructor; | ||
483 | NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; | ||
484 | NETLINK_CB(skb).sk = sk; | ||
485 | } | ||
335 | #else /* CONFIG_NETLINK_MMAP */ | 486 | #else /* CONFIG_NETLINK_MMAP */ |
487 | #define netlink_skb_is_mmaped(skb) false | ||
336 | #define netlink_mmap sock_no_mmap | 488 | #define netlink_mmap sock_no_mmap |
489 | #define netlink_poll datagram_poll | ||
337 | #endif /* CONFIG_NETLINK_MMAP */ | 490 | #endif /* CONFIG_NETLINK_MMAP */ |
338 | 491 | ||
339 | static void netlink_destroy_callback(struct netlink_callback *cb) | 492 | static void netlink_destroy_callback(struct netlink_callback *cb) |
@@ -350,7 +503,35 @@ static void netlink_consume_callback(struct netlink_callback *cb) | |||
350 | 503 | ||
351 | static void netlink_skb_destructor(struct sk_buff *skb) | 504 | static void netlink_skb_destructor(struct sk_buff *skb) |
352 | { | 505 | { |
353 | sock_rfree(skb); | 506 | #ifdef CONFIG_NETLINK_MMAP |
507 | struct nl_mmap_hdr *hdr; | ||
508 | struct netlink_ring *ring; | ||
509 | struct sock *sk; | ||
510 | |||
511 | /* If a packet from the kernel to userspace was freed because of an | ||
512 | * error without being delivered to userspace, the kernel must reset | ||
513 | * the status. In the direction userspace to kernel, the status is | ||
514 | * always reset here after the packet was processed and freed. | ||
515 | */ | ||
516 | if (netlink_skb_is_mmaped(skb)) { | ||
517 | hdr = netlink_mmap_hdr(skb); | ||
518 | sk = NETLINK_CB(skb).sk; | ||
519 | |||
520 | if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { | ||
521 | hdr->nm_len = 0; | ||
522 | netlink_set_status(hdr, NL_MMAP_STATUS_VALID); | ||
523 | } | ||
524 | ring = &nlk_sk(sk)->rx_ring; | ||
525 | |||
526 | WARN_ON(atomic_read(&ring->pending) == 0); | ||
527 | atomic_dec(&ring->pending); | ||
528 | sock_put(sk); | ||
529 | |||
530 | skb->data = NULL; | ||
531 | } | ||
532 | #endif | ||
533 | if (skb->sk != NULL) | ||
534 | sock_rfree(skb); | ||
354 | } | 535 | } |
355 | 536 | ||
356 | static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) | 537 | static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) |
@@ -2349,7 +2530,7 @@ static const struct proto_ops netlink_ops = { | |||
2349 | .socketpair = sock_no_socketpair, | 2530 | .socketpair = sock_no_socketpair, |
2350 | .accept = sock_no_accept, | 2531 | .accept = sock_no_accept, |
2351 | .getname = netlink_getname, | 2532 | .getname = netlink_getname, |
2352 | .poll = datagram_poll, | 2533 | .poll = netlink_poll, |
2353 | .ioctl = sock_no_ioctl, | 2534 | .ioctl = sock_no_ioctl, |
2354 | .listen = sock_no_listen, | 2535 | .listen = sock_no_listen, |
2355 | .shutdown = sock_no_shutdown, | 2536 | .shutdown = sock_no_shutdown, |