diff options
author | Jeff Dike <jdike@addtoit.com> | 2006-03-31 05:30:08 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-31 15:18:50 -0500 |
commit | 02dea0875b0f9b331a65fd6097dfd6115ca4ef24 (patch) | |
tree | 100126d72e889296a5d8d8714dd681ab041710cb | |
parent | 85b6bce3658a823aa169586fe71ffba0f12ccc71 (diff) |
[PATCH] UML: Hotplug memory, take 2
Changes since first version
added check for MADV_REMOVE support on the host
fixed error return botch
shrunk sprintf array by one character
This adds hotplug memory support to UML. The mconsole syntax is
config mem=[+-]n[KMG]
In other words, add or subtract some number of kilobytes, megabytes, or
gigabytes.
Unplugged pages are allocated and then madvise(MADV_TRUNCATE), which is a
currently experimental madvise extension. These pages are tracked so they
can be plugged back in later if the admin decides to give them back. The
first page to be unplugged is used to keep track of about 4M of other
pages. A list_head is the first thing on this page. The rest is filled
with addresses of other unplugged pages. This first page is not madvised,
obviously.
When this page is filled, the next page is used in a similar way and linked
onto a list with the first page. Etc. This whole process reverses when
pages are plugged back in. When a tracking page no longer tracks any
unplugged pages, then it is next in line for plugging, which is done by
freeing pages back to the kernel.
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/um/drivers/mconsole_kern.c | 138 | ||||
-rw-r--r-- | arch/um/include/mem_user.h | 1 | ||||
-rw-r--r-- | arch/um/include/os.h | 2 | ||||
-rw-r--r-- | arch/um/os-Linux/mem.c | 27 | ||||
-rw-r--r-- | arch/um/os-Linux/process.c | 44 | ||||
-rw-r--r-- | arch/um/os-Linux/start_up.c | 20 |
6 files changed, 185 insertions, 47 deletions
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 1488816588ea..d060fce4940e 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include "linux/namei.h" | 20 | #include "linux/namei.h" |
21 | #include "linux/proc_fs.h" | 21 | #include "linux/proc_fs.h" |
22 | #include "linux/syscalls.h" | 22 | #include "linux/syscalls.h" |
23 | #include "linux/list.h" | ||
24 | #include "linux/mm.h" | ||
23 | #include "linux/console.h" | 25 | #include "linux/console.h" |
24 | #include "asm/irq.h" | 26 | #include "asm/irq.h" |
25 | #include "asm/uaccess.h" | 27 | #include "asm/uaccess.h" |
@@ -347,6 +349,142 @@ static struct mc_device *mconsole_find_dev(char *name) | |||
347 | return(NULL); | 349 | return(NULL); |
348 | } | 350 | } |
349 | 351 | ||
352 | #define UNPLUGGED_PER_PAGE \ | ||
353 | ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(unsigned long)) | ||
354 | |||
355 | struct unplugged_pages { | ||
356 | struct list_head list; | ||
357 | void *pages[UNPLUGGED_PER_PAGE]; | ||
358 | }; | ||
359 | |||
360 | static unsigned long long unplugged_pages_count = 0; | ||
361 | static struct list_head unplugged_pages = LIST_HEAD_INIT(unplugged_pages); | ||
362 | static int unplug_index = UNPLUGGED_PER_PAGE; | ||
363 | |||
364 | static int mem_config(char *str) | ||
365 | { | ||
366 | unsigned long long diff; | ||
367 | int err = -EINVAL, i, add; | ||
368 | char *ret; | ||
369 | |||
370 | if(str[0] != '=') | ||
371 | goto out; | ||
372 | |||
373 | str++; | ||
374 | if(str[0] == '-') | ||
375 | add = 0; | ||
376 | else if(str[0] == '+'){ | ||
377 | add = 1; | ||
378 | } | ||
379 | else goto out; | ||
380 | |||
381 | str++; | ||
382 | diff = memparse(str, &ret); | ||
383 | if(*ret != '\0') | ||
384 | goto out; | ||
385 | |||
386 | diff /= PAGE_SIZE; | ||
387 | |||
388 | for(i = 0; i < diff; i++){ | ||
389 | struct unplugged_pages *unplugged; | ||
390 | void *addr; | ||
391 | |||
392 | if(add){ | ||
393 | if(list_empty(&unplugged_pages)) | ||
394 | break; | ||
395 | |||
396 | unplugged = list_entry(unplugged_pages.next, | ||
397 | struct unplugged_pages, list); | ||
398 | if(unplug_index > 0) | ||
399 | addr = unplugged->pages[--unplug_index]; | ||
400 | else { | ||
401 | list_del(&unplugged->list); | ||
402 | addr = unplugged; | ||
403 | unplug_index = UNPLUGGED_PER_PAGE; | ||
404 | } | ||
405 | |||
406 | free_page((unsigned long) addr); | ||
407 | unplugged_pages_count--; | ||
408 | } | ||
409 | else { | ||
410 | struct page *page; | ||
411 | |||
412 | page = alloc_page(GFP_ATOMIC); | ||
413 | if(page == NULL) | ||
414 | break; | ||
415 | |||
416 | unplugged = page_address(page); | ||
417 | if(unplug_index == UNPLUGGED_PER_PAGE){ | ||
418 | INIT_LIST_HEAD(&unplugged->list); | ||
419 | list_add(&unplugged->list, &unplugged_pages); | ||
420 | unplug_index = 0; | ||
421 | } | ||
422 | else { | ||
423 | struct list_head *entry = unplugged_pages.next; | ||
424 | addr = unplugged; | ||
425 | |||
426 | unplugged = list_entry(entry, | ||
427 | struct unplugged_pages, | ||
428 | list); | ||
429 | unplugged->pages[unplug_index++] = addr; | ||
430 | err = os_drop_memory(addr, PAGE_SIZE); | ||
431 | if(err) | ||
432 | printk("Failed to release memory - " | ||
433 | "errno = %d\n", err); | ||
434 | } | ||
435 | |||
436 | unplugged_pages_count++; | ||
437 | } | ||
438 | } | ||
439 | |||
440 | err = 0; | ||
441 | out: | ||
442 | return err; | ||
443 | } | ||
444 | |||
445 | static int mem_get_config(char *name, char *str, int size, char **error_out) | ||
446 | { | ||
447 | char buf[sizeof("18446744073709551615")]; | ||
448 | int len = 0; | ||
449 | |||
450 | sprintf(buf, "%ld", uml_physmem); | ||
451 | CONFIG_CHUNK(str, size, len, buf, 1); | ||
452 | |||
453 | return len; | ||
454 | } | ||
455 | |||
456 | static int mem_id(char **str, int *start_out, int *end_out) | ||
457 | { | ||
458 | *start_out = 0; | ||
459 | *end_out = 0; | ||
460 | |||
461 | return 0; | ||
462 | } | ||
463 | |||
464 | static int mem_remove(int n) | ||
465 | { | ||
466 | return -EBUSY; | ||
467 | } | ||
468 | |||
469 | static struct mc_device mem_mc = { | ||
470 | .name = "mem", | ||
471 | .config = mem_config, | ||
472 | .get_config = mem_get_config, | ||
473 | .id = mem_id, | ||
474 | .remove = mem_remove, | ||
475 | }; | ||
476 | |||
477 | static int mem_mc_init(void) | ||
478 | { | ||
479 | if(can_drop_memory()) | ||
480 | mconsole_register_dev(&mem_mc); | ||
481 | else printk("Can't release memory to the host - memory hotplug won't " | ||
482 | "be supported\n"); | ||
483 | return 0; | ||
484 | } | ||
485 | |||
486 | __initcall(mem_mc_init); | ||
487 | |||
350 | #define CONFIG_BUF_SIZE 64 | 488 | #define CONFIG_BUF_SIZE 64 |
351 | 489 | ||
352 | static void mconsole_get_config(int (*get_config)(char *, char *, int, | 490 | static void mconsole_get_config(int (*get_config)(char *, char *, int, |
diff --git a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h index a1064c5823bf..a54514d2cc3a 100644 --- a/arch/um/include/mem_user.h +++ b/arch/um/include/mem_user.h | |||
@@ -49,7 +49,6 @@ extern int iomem_size; | |||
49 | extern unsigned long host_task_size; | 49 | extern unsigned long host_task_size; |
50 | extern unsigned long task_size; | 50 | extern unsigned long task_size; |
51 | 51 | ||
52 | extern void check_devanon(void); | ||
53 | extern int init_mem_user(void); | 52 | extern int init_mem_user(void); |
54 | extern void setup_memory(void *entry); | 53 | extern void setup_memory(void *entry); |
55 | extern unsigned long find_iomem(char *driver, unsigned long *len_out); | 54 | extern unsigned long find_iomem(char *driver, unsigned long *len_out); |
diff --git a/arch/um/include/os.h b/arch/um/include/os.h index d3d1bc6074ef..5fb84e889b2b 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h | |||
@@ -205,6 +205,8 @@ extern int os_map_memory(void *virt, int fd, unsigned long long off, | |||
205 | extern int os_protect_memory(void *addr, unsigned long len, | 205 | extern int os_protect_memory(void *addr, unsigned long len, |
206 | int r, int w, int x); | 206 | int r, int w, int x); |
207 | extern int os_unmap_memory(void *addr, int len); | 207 | extern int os_unmap_memory(void *addr, int len); |
208 | extern int os_drop_memory(void *addr, int length); | ||
209 | extern int can_drop_memory(void); | ||
208 | extern void os_flush_stdout(void); | 210 | extern void os_flush_stdout(void); |
209 | 211 | ||
210 | /* tt.c | 212 | /* tt.c |
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 9d7d69a523bb..6ab372da9657 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c | |||
@@ -121,36 +121,11 @@ int create_tmp_file(unsigned long long len) | |||
121 | return(fd); | 121 | return(fd); |
122 | } | 122 | } |
123 | 123 | ||
124 | static int create_anon_file(unsigned long long len) | ||
125 | { | ||
126 | void *addr; | ||
127 | int fd; | ||
128 | |||
129 | fd = open("/dev/anon", O_RDWR); | ||
130 | if(fd < 0) { | ||
131 | perror("opening /dev/anon"); | ||
132 | exit(1); | ||
133 | } | ||
134 | |||
135 | addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); | ||
136 | if(addr == MAP_FAILED){ | ||
137 | perror("mapping physmem file"); | ||
138 | exit(1); | ||
139 | } | ||
140 | munmap(addr, len); | ||
141 | |||
142 | return(fd); | ||
143 | } | ||
144 | |||
145 | extern int have_devanon; | ||
146 | |||
147 | int create_mem_file(unsigned long long len) | 124 | int create_mem_file(unsigned long long len) |
148 | { | 125 | { |
149 | int err, fd; | 126 | int err, fd; |
150 | 127 | ||
151 | if(have_devanon) | 128 | fd = create_tmp_file(len); |
152 | fd = create_anon_file(len); | ||
153 | else fd = create_tmp_file(len); | ||
154 | 129 | ||
155 | err = os_set_exec_close(fd, 1); | 130 | err = os_set_exec_close(fd, 1); |
156 | if(err < 0){ | 131 | if(err < 0){ |
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index d261888f39c4..8176b0b52047 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/unistd.h> | 11 | #include <linux/unistd.h> |
12 | #include <sys/mman.h> | 12 | #include <sys/mman.h> |
13 | #include <sys/wait.h> | 13 | #include <sys/wait.h> |
14 | #include <sys/mman.h> | ||
14 | #include "ptrace_user.h" | 15 | #include "ptrace_user.h" |
15 | #include "os.h" | 16 | #include "os.h" |
16 | #include "user.h" | 17 | #include "user.h" |
@@ -20,6 +21,7 @@ | |||
20 | #include "kern_util.h" | 21 | #include "kern_util.h" |
21 | #include "longjmp.h" | 22 | #include "longjmp.h" |
22 | #include "skas_ptrace.h" | 23 | #include "skas_ptrace.h" |
24 | #include "kern_constants.h" | ||
23 | 25 | ||
24 | #define ARBITRARY_ADDR -1 | 26 | #define ARBITRARY_ADDR -1 |
25 | #define FAILURE_PID -1 | 27 | #define FAILURE_PID -1 |
@@ -187,6 +189,48 @@ int os_unmap_memory(void *addr, int len) | |||
187 | return(0); | 189 | return(0); |
188 | } | 190 | } |
189 | 191 | ||
192 | #ifndef MADV_REMOVE | ||
193 | #define MADV_REMOVE 0x5 /* remove these pages & resources */ | ||
194 | #endif | ||
195 | |||
196 | int os_drop_memory(void *addr, int length) | ||
197 | { | ||
198 | int err; | ||
199 | |||
200 | err = madvise(addr, length, MADV_REMOVE); | ||
201 | if(err < 0) | ||
202 | err = -errno; | ||
203 | return err; | ||
204 | } | ||
205 | |||
206 | int can_drop_memory(void) | ||
207 | { | ||
208 | void *addr; | ||
209 | int fd; | ||
210 | |||
211 | printk("Checking host MADV_REMOVE support..."); | ||
212 | fd = create_mem_file(UM_KERN_PAGE_SIZE); | ||
213 | if(fd < 0){ | ||
214 | printk("Creating test memory file failed, err = %d\n", -fd); | ||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, | ||
219 | MAP_PRIVATE, fd, 0); | ||
220 | if(addr == MAP_FAILED){ | ||
221 | printk("Mapping test memory file failed, err = %d\n", -errno); | ||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | if(madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0){ | ||
226 | printk("MADV_REMOVE failed, err = %d\n", -errno); | ||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | printk("OK\n"); | ||
231 | return 1; | ||
232 | } | ||
233 | |||
190 | void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) | 234 | void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) |
191 | { | 235 | { |
192 | int flags = 0, pages; | 236 | int flags = 0, pages; |
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 32753131f8d8..387e26af301a 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c | |||
@@ -470,25 +470,6 @@ int can_do_skas(void) | |||
470 | } | 470 | } |
471 | #endif | 471 | #endif |
472 | 472 | ||
473 | int have_devanon = 0; | ||
474 | |||
475 | /* Runs on boot kernel stack - already safe to use printk. */ | ||
476 | |||
477 | void check_devanon(void) | ||
478 | { | ||
479 | int fd; | ||
480 | |||
481 | printk("Checking for /dev/anon on the host..."); | ||
482 | fd = open("/dev/anon", O_RDWR); | ||
483 | if(fd < 0){ | ||
484 | printk("Not available (open failed with errno %d)\n", errno); | ||
485 | return; | ||
486 | } | ||
487 | |||
488 | printk("OK\n"); | ||
489 | have_devanon = 1; | ||
490 | } | ||
491 | |||
492 | int __init parse_iomem(char *str, int *add) | 473 | int __init parse_iomem(char *str, int *add) |
493 | { | 474 | { |
494 | struct iomem_region *new; | 475 | struct iomem_region *new; |
@@ -664,6 +645,5 @@ void os_check_bugs(void) | |||
664 | { | 645 | { |
665 | check_ptrace(); | 646 | check_ptrace(); |
666 | check_sigio(); | 647 | check_sigio(); |
667 | check_devanon(); | ||
668 | } | 648 | } |
669 | 649 | ||