diff options
author | Andres Salomon <dilinger@queued.net> | 2010-11-29 18:39:51 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2010-12-15 20:11:40 -0500 |
commit | b5318d302f8a20eacbbfc01b0ee35b108085a363 (patch) | |
tree | d16b882c326cbe95fde6f3fb46b5e5b2b1b93d49 | |
parent | c10d1e260f7cb6766dc76b4e36ed8f4be53f195a (diff) |
x86, olpc: Speed up device tree creation during boot
Calling alloc_bootmem() for tiny chunks of memory over and over is really
slow; on an XO-1, it caused the time between when the kernel started
booting and when the display came alive (post-lxfb probe) to increase
to 44s. This patch optimizes the prom_early_alloc function by
calling alloc_bootmem for 4k-sized blocks of memory, and handing out
chunks of that to callers. With this patch, the time between kernel load
and display initialization decreased to 23s. If there's a better way to
do this early in the boot process, please let me know.
(Note: increasing the chunk size to 16k didn't noticably affect boot time,
and wasted 9k.)
v4: clarify comment, requested by hpa
v3: fix wasted memory buglet found by Milton Miller, and style fix.
v2: reorder prom_early_alloc as suggested by Grant.
Signed-off-by: Andres Salomon <dilinger@queued.net>
LKML-Reference: <20101129153951.74202a84@queued.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r-- | arch/x86/platform/olpc/olpc_dt.c | 28 |
1 files changed, 23 insertions, 5 deletions
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 70546975a920..dab874647530 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c | |||
@@ -126,14 +126,32 @@ static unsigned int prom_early_allocated __initdata; | |||
126 | 126 | ||
127 | void * __init prom_early_alloc(unsigned long size) | 127 | void * __init prom_early_alloc(unsigned long size) |
128 | { | 128 | { |
129 | static u8 *mem; | ||
130 | static size_t free_mem; | ||
129 | void *res; | 131 | void *res; |
130 | 132 | ||
131 | res = alloc_bootmem(size); | 133 | if (free_mem < size) { |
132 | if (res) | 134 | const size_t chunk_size = max(PAGE_SIZE, size); |
133 | memset(res, 0, size); | 135 | |
134 | 136 | /* | |
135 | prom_early_allocated += size; | 137 | * To mimimize the number of allocations, grab at least |
138 | * PAGE_SIZE of memory (that's an arbitrary choice that's | ||
139 | * fast enough on the platforms we care about while minimizing | ||
140 | * wasted bootmem) and hand off chunks of it to callers. | ||
141 | */ | ||
142 | res = alloc_bootmem(chunk_size); | ||
143 | if (!res) | ||
144 | return NULL; | ||
145 | prom_early_allocated += chunk_size; | ||
146 | memset(res, 0, chunk_size); | ||
147 | free_mem = chunk_size; | ||
148 | mem = res; | ||
149 | } | ||
136 | 150 | ||
151 | /* allocate from the local cache */ | ||
152 | free_mem -= size; | ||
153 | res = mem; | ||
154 | mem += size; | ||
137 | return res; | 155 | return res; |
138 | } | 156 | } |
139 | 157 | ||