aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-03-28 04:56:33 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-28 12:16:04 -0500
commit70674f95c0a2ea694d5c39f4e514f538a09be36f (patch)
tree906d109fafc5eafff6a90c8d866e0525fdaf6783 /fs
parentb02389e98a7b64ad5cd4823740defa8821f30bbd (diff)
[PATCH] Optimize select/poll by putting small data sets on the stack
Optimize select and poll by a using stack space for small fd sets This brings back an old optimization from Linux 2.0. Using the stack is faster than kmalloc. On a Intel P4 system it speeds up a select of a single pty fd by about 13% (~4000 cycles -> ~3500) It also saves memory because a daemon hanging in select or poll will usually save one or two less pages. This can add up - e.g. if you have 10 daemons blocking in poll/select you save 40KB of memory. I did a patch for this long ago, but it was never applied. This version is a reimplementation of the old patch that tries to be less intrusive. I only did the minimal changes needed for the stack allocation. The cut off point before external memory is allocated is currently at 832bytes. The system calls always allocate this much memory on the stack. These 832 bytes are divided into 256 bytes frontend data (for the select bitmaps of the pollfds) and the rest of the space for the wait queues used by the low level drivers. There are some extreme cases where this won't work out for select and it falls back to allocating memory too early - especially with very sparse large select bitmaps - but the majority of processes who only have a small number of file descriptors should be ok. [TBD: 832/256 might not be the best split for select or poll] I suspect more optimizations might be possible, but they would be more complicated. One way would be to cache the select/poll context over multiple system calls because typically the input values should be similar. Problem is when to flush the file descriptors out though. Signed-off-by: Andi Kleen <ak@suse.de> Cc: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/select.c106
1 files changed, 64 insertions, 42 deletions
diff --git a/fs/select.c b/fs/select.c
index 1815a57d2255..d8b4f0722b8d 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -29,12 +29,6 @@
29#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) 29#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
30#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) 30#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
31 31
32struct poll_table_entry {
33 struct file * filp;
34 wait_queue_t wait;
35 wait_queue_head_t * wait_address;
36};
37
38struct poll_table_page { 32struct poll_table_page {
39 struct poll_table_page * next; 33 struct poll_table_page * next;
40 struct poll_table_entry * entry; 34 struct poll_table_entry * entry;
@@ -64,13 +58,23 @@ void poll_initwait(struct poll_wqueues *pwq)
64 init_poll_funcptr(&pwq->pt, __pollwait); 58 init_poll_funcptr(&pwq->pt, __pollwait);
65 pwq->error = 0; 59 pwq->error = 0;
66 pwq->table = NULL; 60 pwq->table = NULL;
61 pwq->inline_index = 0;
67} 62}
68 63
69EXPORT_SYMBOL(poll_initwait); 64EXPORT_SYMBOL(poll_initwait);
70 65
66static void free_poll_entry(struct poll_table_entry *entry)
67{
68 remove_wait_queue(entry->wait_address,&entry->wait);
69 fput(entry->filp);
70}
71
71void poll_freewait(struct poll_wqueues *pwq) 72void poll_freewait(struct poll_wqueues *pwq)
72{ 73{
73 struct poll_table_page * p = pwq->table; 74 struct poll_table_page * p = pwq->table;
75 int i;
76 for (i = 0; i < pwq->inline_index; i++)
77 free_poll_entry(pwq->inline_entries + i);
74 while (p) { 78 while (p) {
75 struct poll_table_entry * entry; 79 struct poll_table_entry * entry;
76 struct poll_table_page *old; 80 struct poll_table_page *old;
@@ -78,8 +82,7 @@ void poll_freewait(struct poll_wqueues *pwq)
78 entry = p->entry; 82 entry = p->entry;
79 do { 83 do {
80 entry--; 84 entry--;
81 remove_wait_queue(entry->wait_address,&entry->wait); 85 free_poll_entry(entry);
82 fput(entry->filp);
83 } while (entry > p->entries); 86 } while (entry > p->entries);
84 old = p; 87 old = p;
85 p = p->next; 88 p = p->next;
@@ -89,12 +92,14 @@ void poll_freewait(struct poll_wqueues *pwq)
89 92
90EXPORT_SYMBOL(poll_freewait); 93EXPORT_SYMBOL(poll_freewait);
91 94
92static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 95static struct poll_table_entry *poll_get_entry(poll_table *_p)
93 poll_table *_p)
94{ 96{
95 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); 97 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
96 struct poll_table_page *table = p->table; 98 struct poll_table_page *table = p->table;
97 99
100 if (p->inline_index < N_INLINE_POLL_ENTRIES)
101 return p->inline_entries + p->inline_index++;
102
98 if (!table || POLL_TABLE_FULL(table)) { 103 if (!table || POLL_TABLE_FULL(table)) {
99 struct poll_table_page *new_table; 104 struct poll_table_page *new_table;
100 105
@@ -102,7 +107,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
102 if (!new_table) { 107 if (!new_table) {
103 p->error = -ENOMEM; 108 p->error = -ENOMEM;
104 __set_current_state(TASK_RUNNING); 109 __set_current_state(TASK_RUNNING);
105 return; 110 return NULL;
106 } 111 }
107 new_table->entry = new_table->entries; 112 new_table->entry = new_table->entries;
108 new_table->next = table; 113 new_table->next = table;
@@ -110,16 +115,21 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
110 table = new_table; 115 table = new_table;
111 } 116 }
112 117
113 /* Add a new entry */ 118 return table->entry++;
114 { 119}
115 struct poll_table_entry * entry = table->entry; 120
116 table->entry = entry+1; 121/* Add a new entry */
117 get_file(filp); 122static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
118 entry->filp = filp; 123 poll_table *p)
119 entry->wait_address = wait_address; 124{
120 init_waitqueue_entry(&entry->wait, current); 125 struct poll_table_entry *entry = poll_get_entry(p);
121 add_wait_queue(wait_address,&entry->wait); 126 if (!entry)
122 } 127 return;
128 get_file(filp);
129 entry->filp = filp;
130 entry->wait_address = wait_address;
131 init_waitqueue_entry(&entry->wait, current);
132 add_wait_queue(wait_address,&entry->wait);
123} 133}
124 134
125#define FDS_IN(fds, n) (fds->in + n) 135#define FDS_IN(fds, n) (fds->in + n)
@@ -284,16 +294,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
284 return retval; 294 return retval;
285} 295}
286 296
287static void *select_bits_alloc(int size)
288{
289 return kmalloc(6 * size, GFP_KERNEL);
290}
291
292static void select_bits_free(void *bits, int size)
293{
294 kfree(bits);
295}
296
297/* 297/*
298 * We can actually return ERESTARTSYS instead of EINTR, but I'd 298 * We can actually return ERESTARTSYS instead of EINTR, but I'd
299 * like to be certain this leads to no problems. So I return 299 * like to be certain this leads to no problems. So I return
@@ -312,6 +312,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
312 char *bits; 312 char *bits;
313 int ret, size, max_fdset; 313 int ret, size, max_fdset;
314 struct fdtable *fdt; 314 struct fdtable *fdt;
315 /* Allocate small arguments on the stack to save memory and be faster */
316 char stack_fds[SELECT_STACK_ALLOC];
315 317
316 ret = -EINVAL; 318 ret = -EINVAL;
317 if (n < 0) 319 if (n < 0)
@@ -332,7 +334,10 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
332 */ 334 */
333 ret = -ENOMEM; 335 ret = -ENOMEM;
334 size = FDS_BYTES(n); 336 size = FDS_BYTES(n);
335 bits = select_bits_alloc(size); 337 if (6*size < SELECT_STACK_ALLOC)
338 bits = stack_fds;
339 else
340 bits = kmalloc(6 * size, GFP_KERNEL);
336 if (!bits) 341 if (!bits)
337 goto out_nofds; 342 goto out_nofds;
338 fds.in = (unsigned long *) bits; 343 fds.in = (unsigned long *) bits;
@@ -367,7 +372,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
367 ret = -EFAULT; 372 ret = -EFAULT;
368 373
369out: 374out:
370 select_bits_free(bits, size); 375 if (bits != stack_fds)
376 kfree(bits);
371out_nofds: 377out_nofds:
372 return ret; 378 return ret;
373} 379}
@@ -619,6 +625,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
619 return count; 625 return count;
620} 626}
621 627
628#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
629 sizeof(struct pollfd))
630
622int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 631int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
623{ 632{
624 struct poll_wqueues table; 633 struct poll_wqueues table;
@@ -628,6 +637,9 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
628 struct poll_list *walk; 637 struct poll_list *walk;
629 struct fdtable *fdt; 638 struct fdtable *fdt;
630 int max_fdset; 639 int max_fdset;
640 /* Allocate small arguments on the stack to save memory and be faster */
641 char stack_pps[POLL_STACK_ALLOC];
642 struct poll_list *stack_pp = NULL;
631 643
632 /* Do a sanity check on nfds ... */ 644 /* Do a sanity check on nfds ... */
633 rcu_read_lock(); 645 rcu_read_lock();
@@ -645,14 +657,23 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
645 err = -ENOMEM; 657 err = -ENOMEM;
646 while(i!=0) { 658 while(i!=0) {
647 struct poll_list *pp; 659 struct poll_list *pp;
648 pp = kmalloc(sizeof(struct poll_list)+ 660 int num, size;
649 sizeof(struct pollfd)* 661 if (stack_pp == NULL)
650 (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i), 662 num = N_STACK_PPS;
651 GFP_KERNEL); 663 else
652 if(pp==NULL) 664 num = POLLFD_PER_PAGE;
653 goto out_fds; 665 if (num > i)
666 num = i;
667 size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;
668 if (!stack_pp)
669 stack_pp = pp = (struct poll_list *)stack_pps;
670 else {
671 pp = kmalloc(size, GFP_KERNEL);
672 if (!pp)
673 goto out_fds;
674 }
654 pp->next=NULL; 675 pp->next=NULL;
655 pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i); 676 pp->len = num;
656 if (head == NULL) 677 if (head == NULL)
657 head = pp; 678 head = pp;
658 else 679 else
@@ -660,7 +681,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
660 681
661 walk = pp; 682 walk = pp;
662 if (copy_from_user(pp->entries, ufds + nfds-i, 683 if (copy_from_user(pp->entries, ufds + nfds-i,
663 sizeof(struct pollfd)*pp->len)) { 684 sizeof(struct pollfd)*num)) {
664 err = -EFAULT; 685 err = -EFAULT;
665 goto out_fds; 686 goto out_fds;
666 } 687 }
@@ -689,7 +710,8 @@ out_fds:
689 walk = head; 710 walk = head;
690 while(walk!=NULL) { 711 while(walk!=NULL) {
691 struct poll_list *pp = walk->next; 712 struct poll_list *pp = walk->next;
692 kfree(walk); 713 if (walk != stack_pp)
714 kfree(walk);
693 walk = pp; 715 walk = pp;
694 } 716 }
695 poll_freewait(&table); 717 poll_freewait(&table);