diff options
author | Andi Kleen <ak@suse.de> | 2006-03-28 04:56:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-28 12:16:04 -0500 |
commit | 70674f95c0a2ea694d5c39f4e514f538a09be36f (patch) | |
tree | 906d109fafc5eafff6a90c8d866e0525fdaf6783 | |
parent | b02389e98a7b64ad5cd4823740defa8821f30bbd (diff) |
[PATCH] Optimize select/poll by putting small data sets on the stack
Optimize select and poll by a using stack space for small fd sets
This brings back an old optimization from Linux 2.0. Using the stack is
faster than kmalloc. On a Intel P4 system it speeds up a select of a
single pty fd by about 13% (~4000 cycles -> ~3500)
It also saves memory because a daemon hanging in select or poll will
usually save one or two less pages. This can add up - e.g. if you have 10
daemons blocking in poll/select you save 40KB of memory.
I did a patch for this long ago, but it was never applied. This version is
a reimplementation of the old patch that tries to be less intrusive. I
only did the minimal changes needed for the stack allocation.
The cut off point before external memory is allocated is currently at
832bytes. The system calls always allocate this much memory on the stack.
These 832 bytes are divided into 256 bytes frontend data (for the select
bitmaps of the pollfds) and the rest of the space for the wait queues used
by the low level drivers. There are some extreme cases where this won't
work out for select and it falls back to allocating memory too early -
especially with very sparse large select bitmaps - but the majority of
processes who only have a small number of file descriptors should be ok.
[TBD: 832/256 might not be the best split for select or poll]
I suspect more optimizations might be possible, but they would be more
complicated. One way would be to cache the select/poll context over
multiple system calls because typically the input values should be similar.
Problem is when to flush the file descriptors out though.
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/select.c | 106 | ||||
-rw-r--r-- | include/linux/poll.h | 17 |
2 files changed, 81 insertions, 42 deletions
diff --git a/fs/select.c b/fs/select.c index 1815a57d2255..d8b4f0722b8d 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -29,12 +29,6 @@ | |||
29 | #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) | 29 | #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) |
30 | #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) | 30 | #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) |
31 | 31 | ||
32 | struct poll_table_entry { | ||
33 | struct file * filp; | ||
34 | wait_queue_t wait; | ||
35 | wait_queue_head_t * wait_address; | ||
36 | }; | ||
37 | |||
38 | struct poll_table_page { | 32 | struct poll_table_page { |
39 | struct poll_table_page * next; | 33 | struct poll_table_page * next; |
40 | struct poll_table_entry * entry; | 34 | struct poll_table_entry * entry; |
@@ -64,13 +58,23 @@ void poll_initwait(struct poll_wqueues *pwq) | |||
64 | init_poll_funcptr(&pwq->pt, __pollwait); | 58 | init_poll_funcptr(&pwq->pt, __pollwait); |
65 | pwq->error = 0; | 59 | pwq->error = 0; |
66 | pwq->table = NULL; | 60 | pwq->table = NULL; |
61 | pwq->inline_index = 0; | ||
67 | } | 62 | } |
68 | 63 | ||
69 | EXPORT_SYMBOL(poll_initwait); | 64 | EXPORT_SYMBOL(poll_initwait); |
70 | 65 | ||
66 | static void free_poll_entry(struct poll_table_entry *entry) | ||
67 | { | ||
68 | remove_wait_queue(entry->wait_address,&entry->wait); | ||
69 | fput(entry->filp); | ||
70 | } | ||
71 | |||
71 | void poll_freewait(struct poll_wqueues *pwq) | 72 | void poll_freewait(struct poll_wqueues *pwq) |
72 | { | 73 | { |
73 | struct poll_table_page * p = pwq->table; | 74 | struct poll_table_page * p = pwq->table; |
75 | int i; | ||
76 | for (i = 0; i < pwq->inline_index; i++) | ||
77 | free_poll_entry(pwq->inline_entries + i); | ||
74 | while (p) { | 78 | while (p) { |
75 | struct poll_table_entry * entry; | 79 | struct poll_table_entry * entry; |
76 | struct poll_table_page *old; | 80 | struct poll_table_page *old; |
@@ -78,8 +82,7 @@ void poll_freewait(struct poll_wqueues *pwq) | |||
78 | entry = p->entry; | 82 | entry = p->entry; |
79 | do { | 83 | do { |
80 | entry--; | 84 | entry--; |
81 | remove_wait_queue(entry->wait_address,&entry->wait); | 85 | free_poll_entry(entry); |
82 | fput(entry->filp); | ||
83 | } while (entry > p->entries); | 86 | } while (entry > p->entries); |
84 | old = p; | 87 | old = p; |
85 | p = p->next; | 88 | p = p->next; |
@@ -89,12 +92,14 @@ void poll_freewait(struct poll_wqueues *pwq) | |||
89 | 92 | ||
90 | EXPORT_SYMBOL(poll_freewait); | 93 | EXPORT_SYMBOL(poll_freewait); |
91 | 94 | ||
92 | static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, | 95 | static struct poll_table_entry *poll_get_entry(poll_table *_p) |
93 | poll_table *_p) | ||
94 | { | 96 | { |
95 | struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); | 97 | struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); |
96 | struct poll_table_page *table = p->table; | 98 | struct poll_table_page *table = p->table; |
97 | 99 | ||
100 | if (p->inline_index < N_INLINE_POLL_ENTRIES) | ||
101 | return p->inline_entries + p->inline_index++; | ||
102 | |||
98 | if (!table || POLL_TABLE_FULL(table)) { | 103 | if (!table || POLL_TABLE_FULL(table)) { |
99 | struct poll_table_page *new_table; | 104 | struct poll_table_page *new_table; |
100 | 105 | ||
@@ -102,7 +107,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, | |||
102 | if (!new_table) { | 107 | if (!new_table) { |
103 | p->error = -ENOMEM; | 108 | p->error = -ENOMEM; |
104 | __set_current_state(TASK_RUNNING); | 109 | __set_current_state(TASK_RUNNING); |
105 | return; | 110 | return NULL; |
106 | } | 111 | } |
107 | new_table->entry = new_table->entries; | 112 | new_table->entry = new_table->entries; |
108 | new_table->next = table; | 113 | new_table->next = table; |
@@ -110,16 +115,21 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, | |||
110 | table = new_table; | 115 | table = new_table; |
111 | } | 116 | } |
112 | 117 | ||
113 | /* Add a new entry */ | 118 | return table->entry++; |
114 | { | 119 | } |
115 | struct poll_table_entry * entry = table->entry; | 120 | |
116 | table->entry = entry+1; | 121 | /* Add a new entry */ |
117 | get_file(filp); | 122 | static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, |
118 | entry->filp = filp; | 123 | poll_table *p) |
119 | entry->wait_address = wait_address; | 124 | { |
120 | init_waitqueue_entry(&entry->wait, current); | 125 | struct poll_table_entry *entry = poll_get_entry(p); |
121 | add_wait_queue(wait_address,&entry->wait); | 126 | if (!entry) |
122 | } | 127 | return; |
128 | get_file(filp); | ||
129 | entry->filp = filp; | ||
130 | entry->wait_address = wait_address; | ||
131 | init_waitqueue_entry(&entry->wait, current); | ||
132 | add_wait_queue(wait_address,&entry->wait); | ||
123 | } | 133 | } |
124 | 134 | ||
125 | #define FDS_IN(fds, n) (fds->in + n) | 135 | #define FDS_IN(fds, n) (fds->in + n) |
@@ -284,16 +294,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) | |||
284 | return retval; | 294 | return retval; |
285 | } | 295 | } |
286 | 296 | ||
287 | static void *select_bits_alloc(int size) | ||
288 | { | ||
289 | return kmalloc(6 * size, GFP_KERNEL); | ||
290 | } | ||
291 | |||
292 | static void select_bits_free(void *bits, int size) | ||
293 | { | ||
294 | kfree(bits); | ||
295 | } | ||
296 | |||
297 | /* | 297 | /* |
298 | * We can actually return ERESTARTSYS instead of EINTR, but I'd | 298 | * We can actually return ERESTARTSYS instead of EINTR, but I'd |
299 | * like to be certain this leads to no problems. So I return | 299 | * like to be certain this leads to no problems. So I return |
@@ -312,6 +312,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, | |||
312 | char *bits; | 312 | char *bits; |
313 | int ret, size, max_fdset; | 313 | int ret, size, max_fdset; |
314 | struct fdtable *fdt; | 314 | struct fdtable *fdt; |
315 | /* Allocate small arguments on the stack to save memory and be faster */ | ||
316 | char stack_fds[SELECT_STACK_ALLOC]; | ||
315 | 317 | ||
316 | ret = -EINVAL; | 318 | ret = -EINVAL; |
317 | if (n < 0) | 319 | if (n < 0) |
@@ -332,7 +334,10 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, | |||
332 | */ | 334 | */ |
333 | ret = -ENOMEM; | 335 | ret = -ENOMEM; |
334 | size = FDS_BYTES(n); | 336 | size = FDS_BYTES(n); |
335 | bits = select_bits_alloc(size); | 337 | if (6*size < SELECT_STACK_ALLOC) |
338 | bits = stack_fds; | ||
339 | else | ||
340 | bits = kmalloc(6 * size, GFP_KERNEL); | ||
336 | if (!bits) | 341 | if (!bits) |
337 | goto out_nofds; | 342 | goto out_nofds; |
338 | fds.in = (unsigned long *) bits; | 343 | fds.in = (unsigned long *) bits; |
@@ -367,7 +372,8 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, | |||
367 | ret = -EFAULT; | 372 | ret = -EFAULT; |
368 | 373 | ||
369 | out: | 374 | out: |
370 | select_bits_free(bits, size); | 375 | if (bits != stack_fds) |
376 | kfree(bits); | ||
371 | out_nofds: | 377 | out_nofds: |
372 | return ret; | 378 | return ret; |
373 | } | 379 | } |
@@ -619,6 +625,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
619 | return count; | 625 | return count; |
620 | } | 626 | } |
621 | 627 | ||
628 | #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ | ||
629 | sizeof(struct pollfd)) | ||
630 | |||
622 | int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) | 631 | int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) |
623 | { | 632 | { |
624 | struct poll_wqueues table; | 633 | struct poll_wqueues table; |
@@ -628,6 +637,9 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) | |||
628 | struct poll_list *walk; | 637 | struct poll_list *walk; |
629 | struct fdtable *fdt; | 638 | struct fdtable *fdt; |
630 | int max_fdset; | 639 | int max_fdset; |
640 | /* Allocate small arguments on the stack to save memory and be faster */ | ||
641 | char stack_pps[POLL_STACK_ALLOC]; | ||
642 | struct poll_list *stack_pp = NULL; | ||
631 | 643 | ||
632 | /* Do a sanity check on nfds ... */ | 644 | /* Do a sanity check on nfds ... */ |
633 | rcu_read_lock(); | 645 | rcu_read_lock(); |
@@ -645,14 +657,23 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) | |||
645 | err = -ENOMEM; | 657 | err = -ENOMEM; |
646 | while(i!=0) { | 658 | while(i!=0) { |
647 | struct poll_list *pp; | 659 | struct poll_list *pp; |
648 | pp = kmalloc(sizeof(struct poll_list)+ | 660 | int num, size; |
649 | sizeof(struct pollfd)* | 661 | if (stack_pp == NULL) |
650 | (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i), | 662 | num = N_STACK_PPS; |
651 | GFP_KERNEL); | 663 | else |
652 | if(pp==NULL) | 664 | num = POLLFD_PER_PAGE; |
653 | goto out_fds; | 665 | if (num > i) |
666 | num = i; | ||
667 | size = sizeof(struct poll_list) + sizeof(struct pollfd)*num; | ||
668 | if (!stack_pp) | ||
669 | stack_pp = pp = (struct poll_list *)stack_pps; | ||
670 | else { | ||
671 | pp = kmalloc(size, GFP_KERNEL); | ||
672 | if (!pp) | ||
673 | goto out_fds; | ||
674 | } | ||
654 | pp->next=NULL; | 675 | pp->next=NULL; |
655 | pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i); | 676 | pp->len = num; |
656 | if (head == NULL) | 677 | if (head == NULL) |
657 | head = pp; | 678 | head = pp; |
658 | else | 679 | else |
@@ -660,7 +681,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) | |||
660 | 681 | ||
661 | walk = pp; | 682 | walk = pp; |
662 | if (copy_from_user(pp->entries, ufds + nfds-i, | 683 | if (copy_from_user(pp->entries, ufds + nfds-i, |
663 | sizeof(struct pollfd)*pp->len)) { | 684 | sizeof(struct pollfd)*num)) { |
664 | err = -EFAULT; | 685 | err = -EFAULT; |
665 | goto out_fds; | 686 | goto out_fds; |
666 | } | 687 | } |
@@ -689,7 +710,8 @@ out_fds: | |||
689 | walk = head; | 710 | walk = head; |
690 | while(walk!=NULL) { | 711 | while(walk!=NULL) { |
691 | struct poll_list *pp = walk->next; | 712 | struct poll_list *pp = walk->next; |
692 | kfree(walk); | 713 | if (walk != stack_pp) |
714 | kfree(walk); | ||
693 | walk = pp; | 715 | walk = pp; |
694 | } | 716 | } |
695 | poll_freewait(&table); | 717 | poll_freewait(&table); |
diff --git a/include/linux/poll.h b/include/linux/poll.h index 8e8f6098508a..51e1b56741fb 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h | |||
@@ -11,6 +11,15 @@ | |||
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <asm/uaccess.h> | 12 | #include <asm/uaccess.h> |
13 | 13 | ||
14 | /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating | ||
15 | additional memory. */ | ||
16 | #define MAX_STACK_ALLOC 832 | ||
17 | #define FRONTEND_STACK_ALLOC 256 | ||
18 | #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC | ||
19 | #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC | ||
20 | #define WQUEUES_STACK_ALLOC (MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC) | ||
21 | #define N_INLINE_POLL_ENTRIES (WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry)) | ||
22 | |||
14 | struct poll_table_struct; | 23 | struct poll_table_struct; |
15 | 24 | ||
16 | /* | 25 | /* |
@@ -33,6 +42,12 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) | |||
33 | pt->qproc = qproc; | 42 | pt->qproc = qproc; |
34 | } | 43 | } |
35 | 44 | ||
45 | struct poll_table_entry { | ||
46 | struct file * filp; | ||
47 | wait_queue_t wait; | ||
48 | wait_queue_head_t * wait_address; | ||
49 | }; | ||
50 | |||
36 | /* | 51 | /* |
37 | * Structures and helpers for sys_poll/sys_poll | 52 | * Structures and helpers for sys_poll/sys_poll |
38 | */ | 53 | */ |
@@ -40,6 +55,8 @@ struct poll_wqueues { | |||
40 | poll_table pt; | 55 | poll_table pt; |
41 | struct poll_table_page * table; | 56 | struct poll_table_page * table; |
42 | int error; | 57 | int error; |
58 | int inline_index; | ||
59 | struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES]; | ||
43 | }; | 60 | }; |
44 | 61 | ||
45 | extern void poll_initwait(struct poll_wqueues *pwq); | 62 | extern void poll_initwait(struct poll_wqueues *pwq); |