aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2013-04-17 02:47:02 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-19 14:57:57 -0400
commit9652e931e73be7e54a9c40e9bcd4bbdafe92a406 (patch)
treeb943a75e53e22e9dcdb26558d971ac25af566a02
parentccdfcc398594ddf3f77348c5a10938dbe9efefbe (diff)
netlink: add mmap'ed netlink helper functions
Add helper functions for looking up mmap'ed frame headers, reading and writing their status, allocating skbs with mmap'ed data areas and a poll function. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netlink.h7
-rw-r--r--net/netlink/af_netlink.c185
2 files changed, 190 insertions, 2 deletions
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index d8e9264ae04a..07c473848dbd 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -15,10 +15,17 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
15 return (struct nlmsghdr *)skb->data; 15 return (struct nlmsghdr *)skb->data;
16} 16}
17 17
18enum netlink_skb_flags {
19 NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */
20 NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */
21 NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */
22};
23
18struct netlink_skb_parms { 24struct netlink_skb_parms {
19 struct scm_creds creds; /* Skb credentials */ 25 struct scm_creds creds; /* Skb credentials */
20 __u32 portid; 26 __u32 portid;
21 __u32 dst_group; 27 __u32 dst_group;
28 __u32 flags;
22 struct sock *sk; 29 struct sock *sk;
23}; 30};
24 31
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1d3c7128e90e..6560635fd25c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -56,6 +56,7 @@
56#include <linux/audit.h> 56#include <linux/audit.h>
57#include <linux/mutex.h> 57#include <linux/mutex.h>
58#include <linux/vmalloc.h> 58#include <linux/vmalloc.h>
59#include <asm/cacheflush.h>
59 60
60#include <net/net_namespace.h> 61#include <net/net_namespace.h>
61#include <net/sock.h> 62#include <net/sock.h>
@@ -89,6 +90,7 @@ EXPORT_SYMBOL_GPL(nl_table);
89static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 90static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
90 91
91static int netlink_dump(struct sock *sk); 92static int netlink_dump(struct sock *sk);
93static void netlink_skb_destructor(struct sk_buff *skb);
92 94
93DEFINE_RWLOCK(nl_table_lock); 95DEFINE_RWLOCK(nl_table_lock);
94EXPORT_SYMBOL_GPL(nl_table_lock); 96EXPORT_SYMBOL_GPL(nl_table_lock);
@@ -109,6 +111,11 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
109} 111}
110 112
111#ifdef CONFIG_NETLINK_MMAP 113#ifdef CONFIG_NETLINK_MMAP
114static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
115{
116 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
117}
118
112static __pure struct page *pgvec_to_page(const void *addr) 119static __pure struct page *pgvec_to_page(const void *addr)
113{ 120{
114 if (is_vmalloc_addr(addr)) 121 if (is_vmalloc_addr(addr))
@@ -332,8 +339,154 @@ out:
332 mutex_unlock(&nlk->pg_vec_lock); 339 mutex_unlock(&nlk->pg_vec_lock);
333 return 0; 340 return 0;
334} 341}
342
343static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
344{
345#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
346 struct page *p_start, *p_end;
347
348 /* First page is flushed through netlink_{get,set}_status */
349 p_start = pgvec_to_page(hdr + PAGE_SIZE);
350 p_end = pgvec_to_page((void *)hdr + NL_MMAP_MSG_HDRLEN + hdr->nm_len - 1);
351 while (p_start <= p_end) {
352 flush_dcache_page(p_start);
353 p_start++;
354 }
355#endif
356}
357
358static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
359{
360 smp_rmb();
361 flush_dcache_page(pgvec_to_page(hdr));
362 return hdr->nm_status;
363}
364
365static void netlink_set_status(struct nl_mmap_hdr *hdr,
366 enum nl_mmap_status status)
367{
368 hdr->nm_status = status;
369 flush_dcache_page(pgvec_to_page(hdr));
370 smp_wmb();
371}
372
373static struct nl_mmap_hdr *
374__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
375{
376 unsigned int pg_vec_pos, frame_off;
377
378 pg_vec_pos = pos / ring->frames_per_block;
379 frame_off = pos % ring->frames_per_block;
380
381 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
382}
383
384static struct nl_mmap_hdr *
385netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
386 enum nl_mmap_status status)
387{
388 struct nl_mmap_hdr *hdr;
389
390 hdr = __netlink_lookup_frame(ring, pos);
391 if (netlink_get_status(hdr) != status)
392 return NULL;
393
394 return hdr;
395}
396
397static struct nl_mmap_hdr *
398netlink_current_frame(const struct netlink_ring *ring,
399 enum nl_mmap_status status)
400{
401 return netlink_lookup_frame(ring, ring->head, status);
402}
403
404static struct nl_mmap_hdr *
405netlink_previous_frame(const struct netlink_ring *ring,
406 enum nl_mmap_status status)
407{
408 unsigned int prev;
409
410 prev = ring->head ? ring->head - 1 : ring->frame_max;
411 return netlink_lookup_frame(ring, prev, status);
412}
413
414static void netlink_increment_head(struct netlink_ring *ring)
415{
416 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
417}
418
419static void netlink_forward_ring(struct netlink_ring *ring)
420{
421 unsigned int head = ring->head, pos = head;
422 const struct nl_mmap_hdr *hdr;
423
424 do {
425 hdr = __netlink_lookup_frame(ring, pos);
426 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
427 break;
428 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
429 break;
430 netlink_increment_head(ring);
431 } while (ring->head != head);
432}
433
434static unsigned int netlink_poll(struct file *file, struct socket *sock,
435 poll_table *wait)
436{
437 struct sock *sk = sock->sk;
438 struct netlink_sock *nlk = nlk_sk(sk);
439 unsigned int mask;
440
441 mask = datagram_poll(file, sock, wait);
442
443 spin_lock_bh(&sk->sk_receive_queue.lock);
444 if (nlk->rx_ring.pg_vec) {
445 netlink_forward_ring(&nlk->rx_ring);
446 if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
447 mask |= POLLIN | POLLRDNORM;
448 }
449 spin_unlock_bh(&sk->sk_receive_queue.lock);
450
451 spin_lock_bh(&sk->sk_write_queue.lock);
452 if (nlk->tx_ring.pg_vec) {
453 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
454 mask |= POLLOUT | POLLWRNORM;
455 }
456 spin_unlock_bh(&sk->sk_write_queue.lock);
457
458 return mask;
459}
460
461static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
462{
463 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
464}
465
466static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
467 struct netlink_ring *ring,
468 struct nl_mmap_hdr *hdr)
469{
470 unsigned int size;
471 void *data;
472
473 size = ring->frame_size - NL_MMAP_HDRLEN;
474 data = (void *)hdr + NL_MMAP_HDRLEN;
475
476 skb->head = data;
477 skb->data = data;
478 skb_reset_tail_pointer(skb);
479 skb->end = skb->tail + size;
480 skb->len = 0;
481
482 skb->destructor = netlink_skb_destructor;
483 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
484 NETLINK_CB(skb).sk = sk;
485}
335#else /* CONFIG_NETLINK_MMAP */ 486#else /* CONFIG_NETLINK_MMAP */
487#define netlink_skb_is_mmaped(skb) false
336#define netlink_mmap sock_no_mmap 488#define netlink_mmap sock_no_mmap
489#define netlink_poll datagram_poll
337#endif /* CONFIG_NETLINK_MMAP */ 490#endif /* CONFIG_NETLINK_MMAP */
338 491
339static void netlink_destroy_callback(struct netlink_callback *cb) 492static void netlink_destroy_callback(struct netlink_callback *cb)
@@ -350,7 +503,35 @@ static void netlink_consume_callback(struct netlink_callback *cb)
350 503
351static void netlink_skb_destructor(struct sk_buff *skb) 504static void netlink_skb_destructor(struct sk_buff *skb)
352{ 505{
353 sock_rfree(skb); 506#ifdef CONFIG_NETLINK_MMAP
507 struct nl_mmap_hdr *hdr;
508 struct netlink_ring *ring;
509 struct sock *sk;
510
511 /* If a packet from the kernel to userspace was freed because of an
512 * error without being delivered to userspace, the kernel must reset
513 * the status. In the direction userspace to kernel, the status is
514 * always reset here after the packet was processed and freed.
515 */
516 if (netlink_skb_is_mmaped(skb)) {
517 hdr = netlink_mmap_hdr(skb);
518 sk = NETLINK_CB(skb).sk;
519
520 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
521 hdr->nm_len = 0;
522 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
523 }
524 ring = &nlk_sk(sk)->rx_ring;
525
526 WARN_ON(atomic_read(&ring->pending) == 0);
527 atomic_dec(&ring->pending);
528 sock_put(sk);
529
530 skb->data = NULL;
531 }
532#endif
533 if (skb->sk != NULL)
534 sock_rfree(skb);
354} 535}
355 536
356static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 537static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
@@ -2349,7 +2530,7 @@ static const struct proto_ops netlink_ops = {
2349 .socketpair = sock_no_socketpair, 2530 .socketpair = sock_no_socketpair,
2350 .accept = sock_no_accept, 2531 .accept = sock_no_accept,
2351 .getname = netlink_getname, 2532 .getname = netlink_getname,
2352 .poll = datagram_poll, 2533 .poll = netlink_poll,
2353 .ioctl = sock_no_ioctl, 2534 .ioctl = sock_no_ioctl,
2354 .listen = sock_no_listen, 2535 .listen = sock_no_listen,
2355 .shutdown = sock_no_shutdown, 2536 .shutdown = sock_no_shutdown,