aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2008-10-13 19:45:25 -0400
committerEric Van Hensbergen <ericvh@gmail.com>2008-10-17 12:04:41 -0400
commit992b3f1dbeec401e19a80bdb8c81e5df5381f4c5 (patch)
tree5efa63f094f05fcb0cd83899cf0aad0ac7871018 /net
parent2e532d68a2b3e2aa6b19731501222069735c741c (diff)
9p-trans_fd: use single poller
trans_fd used pool of upto 100 pollers to monitor the r/w fds. The approach makes sense in userspace back when the only available interfaces were poll(2) and select(2). As each event monitor - trigger - handling iteration took O(n) where `n' is the number of watched fds, it makes sense to spread them to many pollers such that the `n' can be divided by the number of pollers. However, this doesn't make any sense in kernel because persistent edge triggered event monitoring is how the whole thing is implemented in the kernel in the first place. This patch converts trans_fd to use single poller which watches all the fds instead of the poll of pollers approach. All the fds are registered for monitoring on creation and only the fds with pending events are scanned when something happens much like how epoll is implemented. This change makes trans_fd fd monitoring more efficient and simpler. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_fd.c252
1 files changed, 86 insertions, 166 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6dabbdb66651..f84592345573 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -44,7 +44,6 @@
44#define P9_PORT 564 44#define P9_PORT 564
45#define MAX_SOCK_BUF (64*1024) 45#define MAX_SOCK_BUF (64*1024)
46#define ERREQFLUSH 1 46#define ERREQFLUSH 1
47#define SCHED_TIMEOUT 10
48#define MAXPOLLWADDR 2 47#define MAXPOLLWADDR 2
49 48
50/** 49/**
@@ -135,17 +134,16 @@ struct p9_req {
135 struct list_head req_list; 134 struct list_head req_list;
136}; 135};
137 136
138struct p9_mux_poll_task { 137struct p9_poll_wait {
139 struct task_struct *task; 138 struct p9_conn *conn;
140 struct list_head mux_list; 139 wait_queue_t wait;
141 int muxnum; 140 wait_queue_head_t *wait_addr;
142}; 141};
143 142
144/** 143/**
145 * struct p9_conn - fd mux connection state information 144 * struct p9_conn - fd mux connection state information
146 * @lock: protects mux_list (?) 145 * @lock: protects mux_list (?)
147 * @mux_list: list link for mux to manage multiple connections (?) 146 * @mux_list: list link for mux to manage multiple connections (?)
148 * @poll_task: task polling on this connection
149 * @msize: maximum size for connection (dup) 147 * @msize: maximum size for connection (dup)
150 * @extended: 9p2000.u flag (dup) 148 * @extended: 9p2000.u flag (dup)
151 * @trans: reference to transport instance for this connection 149 * @trans: reference to transport instance for this connection
@@ -171,7 +169,6 @@ struct p9_mux_poll_task {
171struct p9_conn { 169struct p9_conn {
172 spinlock_t lock; /* protect lock structure */ 170 spinlock_t lock; /* protect lock structure */
173 struct list_head mux_list; 171 struct list_head mux_list;
174 struct p9_mux_poll_task *poll_task;
175 int msize; 172 int msize;
176 unsigned char extended; 173 unsigned char extended;
177 struct p9_trans *trans; 174 struct p9_trans *trans;
@@ -185,8 +182,8 @@ struct p9_conn {
185 int wpos; 182 int wpos;
186 int wsize; 183 int wsize;
187 char *wbuf; 184 char *wbuf;
188 wait_queue_t poll_wait[MAXPOLLWADDR]; 185 struct list_head poll_pending_link;
189 wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; 186 struct p9_poll_wait poll_wait[MAXPOLLWADDR];
190 poll_table pt; 187 poll_table pt;
191 struct work_struct rq; 188 struct work_struct rq;
192 struct work_struct wq; 189 struct work_struct wq;
@@ -220,12 +217,10 @@ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address,
220static int p9_fd_write(struct p9_trans *trans, void *v, int len); 217static int p9_fd_write(struct p9_trans *trans, void *v, int len);
221static int p9_fd_read(struct p9_trans *trans, void *v, int len); 218static int p9_fd_read(struct p9_trans *trans, void *v, int len);
222 219
223static DEFINE_MUTEX(p9_mux_task_lock); 220static DEFINE_SPINLOCK(p9_poll_lock);
221static LIST_HEAD(p9_poll_pending_list);
224static struct workqueue_struct *p9_mux_wq; 222static struct workqueue_struct *p9_mux_wq;
225 223static struct task_struct *p9_poll_task;
226static int p9_mux_num;
227static int p9_mux_poll_task_num;
228static struct p9_mux_poll_task p9_mux_poll_tasks[100];
229 224
230static void p9_conn_destroy(struct p9_conn *); 225static void p9_conn_destroy(struct p9_conn *);
231static unsigned int p9_fd_poll(struct p9_trans *trans, 226static unsigned int p9_fd_poll(struct p9_trans *trans,
@@ -255,130 +250,23 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
255 p9_idpool_put(tag, m->tagpool); 250 p9_idpool_put(tag, m->tagpool);
256} 251}
257 252
258/** 253static void p9_mux_poll_stop(struct p9_conn *m)
259 * p9_mux_calc_poll_procs - calculates the number of polling procs
260 * @muxnum: number of mounts
261 *
262 * Calculation is based on the number of mounted v9fs filesystems.
263 * The current implementation returns sqrt of the number of mounts.
264 */
265
266static int p9_mux_calc_poll_procs(int muxnum)
267{
268 int n;
269
270 if (p9_mux_poll_task_num)
271 n = muxnum / p9_mux_poll_task_num +
272 (muxnum % p9_mux_poll_task_num ? 1 : 0);
273 else
274 n = 1;
275
276 if (n > ARRAY_SIZE(p9_mux_poll_tasks))
277 n = ARRAY_SIZE(p9_mux_poll_tasks);
278
279 return n;
280}
281
282static int p9_mux_poll_start(struct p9_conn *m)
283{ 254{
284 int i, n; 255 unsigned long flags;
285 struct p9_mux_poll_task *vpt, *vptlast; 256 int i;
286 struct task_struct *pproc;
287
288 P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num,
289 p9_mux_poll_task_num);
290 mutex_lock(&p9_mux_task_lock);
291
292 n = p9_mux_calc_poll_procs(p9_mux_num + 1);
293 if (n > p9_mux_poll_task_num) {
294 for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
295 if (p9_mux_poll_tasks[i].task == NULL) {
296 vpt = &p9_mux_poll_tasks[i];
297 P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n",
298 vpt);
299 pproc = kthread_create(p9_poll_proc, vpt,
300 "v9fs-poll");
301
302 if (!IS_ERR(pproc)) {
303 vpt->task = pproc;
304 INIT_LIST_HEAD(&vpt->mux_list);
305 vpt->muxnum = 0;
306 p9_mux_poll_task_num++;
307 wake_up_process(vpt->task);
308 }
309 break;
310 }
311 }
312
313 if (i >= ARRAY_SIZE(p9_mux_poll_tasks))
314 P9_DPRINTK(P9_DEBUG_ERROR,
315 "warning: no free poll slots\n");
316 }
317 257
318 n = (p9_mux_num + 1) / p9_mux_poll_task_num + 258 for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
319 ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0); 259 struct p9_poll_wait *pwait = &m->poll_wait[i];
320
321 vptlast = NULL;
322 for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
323 vpt = &p9_mux_poll_tasks[i];
324 if (vpt->task != NULL) {
325 vptlast = vpt;
326 if (vpt->muxnum < n) {
327 P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
328 list_add(&m->mux_list, &vpt->mux_list);
329 vpt->muxnum++;
330 m->poll_task = vpt;
331 memset(&m->poll_waddr, 0,
332 sizeof(m->poll_waddr));
333 init_poll_funcptr(&m->pt, p9_pollwait);
334 break;
335 }
336 }
337 }
338 260
339 if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { 261 if (pwait->wait_addr) {
340 if (vptlast == NULL) { 262 remove_wait_queue(pwait->wait_addr, &pwait->wait);
341 mutex_unlock(&p9_mux_task_lock); 263 pwait->wait_addr = NULL;
342 return -ENOMEM;
343 } 264 }
344
345 P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
346 list_add(&m->mux_list, &vptlast->mux_list);
347 vptlast->muxnum++;
348 m->poll_task = vptlast;
349 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
350 init_poll_funcptr(&m->pt, p9_pollwait);
351 } 265 }
352 266
353 p9_mux_num++; 267 spin_lock_irqsave(&p9_poll_lock, flags);
354 mutex_unlock(&p9_mux_task_lock); 268 list_del_init(&m->poll_pending_link);
355 269 spin_unlock_irqrestore(&p9_poll_lock, flags);
356 return 0;
357}
358
359static void p9_mux_poll_stop(struct p9_conn *m)
360{
361 int i;
362 struct p9_mux_poll_task *vpt;
363
364 mutex_lock(&p9_mux_task_lock);
365 vpt = m->poll_task;
366 list_del(&m->mux_list);
367 for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
368 if (m->poll_waddr[i] != NULL) {
369 remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
370 m->poll_waddr[i] = NULL;
371 }
372 }
373 vpt->muxnum--;
374 if (!vpt->muxnum) {
375 P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt);
376 kthread_stop(vpt->task);
377 vpt->task = NULL;
378 p9_mux_poll_task_num--;
379 }
380 p9_mux_num--;
381 mutex_unlock(&p9_mux_task_lock);
382} 270}
383 271
384/** 272/**
@@ -414,11 +302,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
414 INIT_LIST_HEAD(&m->unsent_req_list); 302 INIT_LIST_HEAD(&m->unsent_req_list);
415 INIT_WORK(&m->rq, p9_read_work); 303 INIT_WORK(&m->rq, p9_read_work);
416 INIT_WORK(&m->wq, p9_write_work); 304 INIT_WORK(&m->wq, p9_write_work);
417 n = p9_mux_poll_start(m); 305 INIT_LIST_HEAD(&m->poll_pending_link);
418 if (n) { 306 init_poll_funcptr(&m->pt, p9_pollwait);
419 kfree(m);
420 return ERR_PTR(n);
421 }
422 307
423 n = p9_fd_poll(trans, &m->pt); 308 n = p9_fd_poll(trans, &m->pt);
424 if (n & POLLIN) { 309 if (n & POLLIN) {
@@ -431,11 +316,12 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
431 set_bit(Wpending, &m->wsched); 316 set_bit(Wpending, &m->wsched);
432 } 317 }
433 318
434 for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { 319 for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
435 if (IS_ERR(m->poll_waddr[i])) { 320 if (IS_ERR(m->poll_wait[i].wait_addr)) {
436 p9_mux_poll_stop(m); 321 p9_mux_poll_stop(m);
437 kfree(m); 322 kfree(m);
438 return (void *)m->poll_waddr; /* the error code */ 323 /* return the error code */
324 return (void *)m->poll_wait[i].wait_addr;
439 } 325 }
440 } 326 }
441 327
@@ -464,6 +350,23 @@ static void p9_conn_destroy(struct p9_conn *m)
464 kfree(m); 350 kfree(m);
465} 351}
466 352
353static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
354{
355 struct p9_poll_wait *pwait =
356 container_of(wait, struct p9_poll_wait, wait);
357 struct p9_conn *m = pwait->conn;
358 unsigned long flags;
359 DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
360
361 spin_lock_irqsave(&p9_poll_lock, flags);
362 if (list_empty(&m->poll_pending_link))
363 list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
364 spin_unlock_irqrestore(&p9_poll_lock, flags);
365
366 /* perform the default wake up operation */
367 return default_wake_function(&dummy_wait, mode, sync, key);
368}
369
467/** 370/**
468 * p9_pollwait - add poll task to the wait queue 371 * p9_pollwait - add poll task to the wait queue
469 * @filp: file pointer being polled 372 * @filp: file pointer being polled
@@ -476,29 +379,32 @@ static void p9_conn_destroy(struct p9_conn *m)
476static void 379static void
477p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) 380p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
478{ 381{
382 struct p9_conn *m = container_of(p, struct p9_conn, pt);
383 struct p9_poll_wait *pwait = NULL;
479 int i; 384 int i;
480 struct p9_conn *m;
481 385
482 m = container_of(p, struct p9_conn, pt); 386 for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
483 for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) 387 if (m->poll_wait[i].wait_addr == NULL) {
484 if (m->poll_waddr[i] == NULL) 388 pwait = &m->poll_wait[i];
485 break; 389 break;
390 }
391 }
486 392
487 if (i >= ARRAY_SIZE(m->poll_waddr)) { 393 if (!pwait) {
488 P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); 394 P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
489 return; 395 return;
490 } 396 }
491 397
492 m->poll_waddr[i] = wait_address;
493
494 if (!wait_address) { 398 if (!wait_address) {
495 P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n"); 399 P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
496 m->poll_waddr[i] = ERR_PTR(-EIO); 400 pwait->wait_addr = ERR_PTR(-EIO);
497 return; 401 return;
498 } 402 }
499 403
500 init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); 404 pwait->conn = m;
501 add_wait_queue(wait_address, &m->poll_wait[i]); 405 pwait->wait_addr = wait_address;
406 init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
407 add_wait_queue(wait_address, &pwait->wait);
502} 408}
503 409
504/** 410/**
@@ -553,23 +459,34 @@ static void p9_poll_mux(struct p9_conn *m)
553 459
554static int p9_poll_proc(void *a) 460static int p9_poll_proc(void *a)
555{ 461{
556 struct p9_conn *m, *mtmp; 462 unsigned long flags;
557 struct p9_mux_poll_task *vpt;
558 463
559 vpt = a; 464 P9_DPRINTK(P9_DEBUG_MUX, "start %p\n", current);
560 P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt); 465 repeat:
561 while (!kthread_should_stop()) { 466 spin_lock_irqsave(&p9_poll_lock, flags);
562 set_current_state(TASK_INTERRUPTIBLE); 467 while (!list_empty(&p9_poll_pending_list)) {
468 struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
469 struct p9_conn,
470 poll_pending_link);
471 list_del_init(&conn->poll_pending_link);
472 spin_unlock_irqrestore(&p9_poll_lock, flags);
563 473
564 list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { 474 p9_poll_mux(conn);
565 p9_poll_mux(m);
566 }
567 475
568 P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n"); 476 spin_lock_irqsave(&p9_poll_lock, flags);
569 schedule_timeout(SCHED_TIMEOUT * HZ);
570 } 477 }
478 spin_unlock_irqrestore(&p9_poll_lock, flags);
571 479
480 set_current_state(TASK_INTERRUPTIBLE);
481 if (list_empty(&p9_poll_pending_list)) {
482 P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
483 schedule();
484 }
572 __set_current_state(TASK_RUNNING); 485 __set_current_state(TASK_RUNNING);
486
487 if (!kthread_should_stop())
488 goto repeat;
489
573 P9_DPRINTK(P9_DEBUG_MUX, "finish\n"); 490 P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
574 return 0; 491 return 0;
575} 492}
@@ -1602,17 +1519,19 @@ static struct p9_trans_module p9_fd_trans = {
1602 1519
1603int p9_trans_fd_init(void) 1520int p9_trans_fd_init(void)
1604{ 1521{
1605 int i;
1606
1607 for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
1608 p9_mux_poll_tasks[i].task = NULL;
1609
1610 p9_mux_wq = create_workqueue("v9fs"); 1522 p9_mux_wq = create_workqueue("v9fs");
1611 if (!p9_mux_wq) { 1523 if (!p9_mux_wq) {
1612 printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); 1524 printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
1613 return -ENOMEM; 1525 return -ENOMEM;
1614 } 1526 }
1615 1527
1528 p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
1529 if (IS_ERR(p9_poll_task)) {
1530 destroy_workqueue(p9_mux_wq);
1531 printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
1532 return PTR_ERR(p9_poll_task);
1533 }
1534
1616 v9fs_register_trans(&p9_tcp_trans); 1535 v9fs_register_trans(&p9_tcp_trans);
1617 v9fs_register_trans(&p9_unix_trans); 1536 v9fs_register_trans(&p9_unix_trans);
1618 v9fs_register_trans(&p9_fd_trans); 1537 v9fs_register_trans(&p9_fd_trans);
@@ -1622,6 +1541,7 @@ int p9_trans_fd_init(void)
1622 1541
1623void p9_trans_fd_exit(void) 1542void p9_trans_fd_exit(void)
1624{ 1543{
1544 kthread_stop(p9_poll_task);
1625 v9fs_unregister_trans(&p9_tcp_trans); 1545 v9fs_unregister_trans(&p9_tcp_trans);
1626 v9fs_unregister_trans(&p9_unix_trans); 1546 v9fs_unregister_trans(&p9_unix_trans);
1627 v9fs_unregister_trans(&p9_fd_trans); 1547 v9fs_unregister_trans(&p9_fd_trans);