summaryrefslogtreecommitdiffstats
path: root/net/unix/garbage.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 17:48:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 17:48:40 -0500
commit38e7571c07be01f9f19b355a9306a4e3d5cb0f5b (patch)
tree48812ba46a6fe37ee59d31e0de418f336bbb15ca /net/unix/garbage.c
parent80201fe175cbf7f3e372f53eba0a881a702ad926 (diff)
parent21b4aa5d20fd07207e73270cadffed5c63fb4343 (diff)
Merge tag 'io_uring-2019-03-06' of git://git.kernel.dk/linux-block
Pull io_uring IO interface from Jens Axboe: "Second attempt at adding the io_uring interface. Since the first one, we've added basic unit testing of the three system calls, that resides in liburing like the other unit tests that we have so far. It'll take a while to get full coverage of it, but we're working towards it. I've also added two basic test programs to tools/io_uring. One uses the raw interface and has support for all the various features that io_uring supports outside of standard IO, like fixed files, fixed IO buffers, and polled IO. The other uses the liburing API, and is a simplified version of cp(1). This adds support for a new IO interface, io_uring. io_uring allows an application to communicate with the kernel through two rings, the submission queue (SQ) and completion queue (CQ) ring. This allows for very efficient handling of IOs, see the v5 posting for some basic numbers: https://lore.kernel.org/linux-block/20190116175003.17880-1-axboe@kernel.dk/ Outside of just efficiency, the interface is also flexible and extendable, and allows for future use cases like the upcoming NVMe key-value store API, networked IO, and so on. It also supports async buffered IO, something that we've always failed to support in the kernel. Outside of basic IO features, it supports async polled IO as well. This particular feature has already been tested at Facebook months ago for flash storage boxes, with 25-33% improvements. It makes polled IO actually useful for real world use cases, where even basic flash sees a nice win in terms of efficiency, latency, and performance. These boxes were IOPS bound before, now they are not. This series adds three new system calls. One for setting up an io_uring instance (io_uring_setup(2)), one for submitting/completing IO (io_uring_enter(2)), and one for aux functions like registrating file sets, buffers, etc (io_uring_register(2)). Through the help of Arnd, I've coordinated the syscall numbers so merge on that front should be painless. Jon did a writeup of the interface a while back, which (except for minor details that have been tweaked) is still accurate. Find that here: https://lwn.net/Articles/776703/ Huge thanks to Al Viro for helping getting the reference cycle code correct, and to Jann Horn for his extensive reviews focused on both security and bugs in general. There's a userspace library that provides basic functionality for applications that don't need or want to care about how to fiddle with the rings directly. It has helpers to allow applications to easily set up an io_uring instance, and submit/complete IO through it without knowing about the intricacies of the rings. It also includes man pages (thanks to Jeff Moyer), and will continue to grow support helper functions and features as time progresses. Find it here: git://git.kernel.dk/liburing Fio has full support for the raw interface, both in the form of an IO engine (io_uring), but also with a small test application (t/io_uring) that can exercise and benchmark the interface" * tag 'io_uring-2019-03-06' of git://git.kernel.dk/linux-block: io_uring: add a few test tools io_uring: allow workqueue item to handle multiple buffered requests io_uring: add support for IORING_OP_POLL io_uring: add io_kiocb ref count io_uring: add submission polling io_uring: add file set registration net: split out functions related to registering inflight socket files io_uring: add support for pre-mapped user IO buffers block: implement bio helper to add iter bvec pages to bio io_uring: batch io_kiocb allocation io_uring: use fget/fput_many() for file references fs: add fget_many() and fput_many() io_uring: support for IO polling io_uring: add fsync support Add io_uring IO interface
Diffstat (limited to 'net/unix/garbage.c')
-rw-r--r--net/unix/garbage.c68
1 files changed, 2 insertions, 66 deletions
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c36757e72844..8bbe1b8e4ff7 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -86,77 +86,13 @@
86#include <net/scm.h> 86#include <net/scm.h>
87#include <net/tcp_states.h> 87#include <net/tcp_states.h>
88 88
89#include "scm.h"
90
89/* Internal data structures and random procedures: */ 91/* Internal data structures and random procedures: */
90 92
91static LIST_HEAD(gc_inflight_list);
92static LIST_HEAD(gc_candidates); 93static LIST_HEAD(gc_candidates);
93static DEFINE_SPINLOCK(unix_gc_lock);
94static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); 94static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
95 95
96unsigned int unix_tot_inflight;
97
98struct sock *unix_get_socket(struct file *filp)
99{
100 struct sock *u_sock = NULL;
101 struct inode *inode = file_inode(filp);
102
103 /* Socket ? */
104 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
105 struct socket *sock = SOCKET_I(inode);
106 struct sock *s = sock->sk;
107
108 /* PF_UNIX ? */
109 if (s && sock->ops && sock->ops->family == PF_UNIX)
110 u_sock = s;
111 }
112 return u_sock;
113}
114
115/* Keep the number of times in flight count for the file
116 * descriptor if it is for an AF_UNIX socket.
117 */
118
119void unix_inflight(struct user_struct *user, struct file *fp)
120{
121 struct sock *s = unix_get_socket(fp);
122
123 spin_lock(&unix_gc_lock);
124
125 if (s) {
126 struct unix_sock *u = unix_sk(s);
127
128 if (atomic_long_inc_return(&u->inflight) == 1) {
129 BUG_ON(!list_empty(&u->link));
130 list_add_tail(&u->link, &gc_inflight_list);
131 } else {
132 BUG_ON(list_empty(&u->link));
133 }
134 unix_tot_inflight++;
135 }
136 user->unix_inflight++;
137 spin_unlock(&unix_gc_lock);
138}
139
140void unix_notinflight(struct user_struct *user, struct file *fp)
141{
142 struct sock *s = unix_get_socket(fp);
143
144 spin_lock(&unix_gc_lock);
145
146 if (s) {
147 struct unix_sock *u = unix_sk(s);
148
149 BUG_ON(!atomic_long_read(&u->inflight));
150 BUG_ON(list_empty(&u->link));
151
152 if (atomic_long_dec_and_test(&u->inflight))
153 list_del_init(&u->link);
154 unix_tot_inflight--;
155 }
156 user->unix_inflight--;
157 spin_unlock(&unix_gc_lock);
158}
159
160static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), 96static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
161 struct sk_buff_head *hitlist) 97 struct sk_buff_head *hitlist)
162{ 98{