aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Dike <jdike@addtoit.com>2006-03-31 05:30:08 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-31 15:18:50 -0500
commit02dea0875b0f9b331a65fd6097dfd6115ca4ef24 (patch)
tree100126d72e889296a5d8d8714dd681ab041710cb
parent85b6bce3658a823aa169586fe71ffba0f12ccc71 (diff)
[PATCH] UML: Hotplug memory, take 2
Changes since first version added check for MADV_REMOVE support on the host fixed error return botch shrunk sprintf array by one character This adds hotplug memory support to UML. The mconsole syntax is config mem=[+-]n[KMG] In other words, add or subtract some number of kilobytes, megabytes, or gigabytes. Unplugged pages are allocated and then madvise(MADV_TRUNCATE), which is a currently experimental madvise extension. These pages are tracked so they can be plugged back in later if the admin decides to give them back. The first page to be unplugged is used to keep track of about 4M of other pages. A list_head is the first thing on this page. The rest is filled with addresses of other unplugged pages. This first page is not madvised, obviously. When this page is filled, the next page is used in a similar way and linked onto a list with the first page. Etc. This whole process reverses when pages are plugged back in. When a tracking page no longer tracks any unplugged pages, then it is next in line for plugging, which is done by freeing pages back to the kernel. Signed-off-by: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/um/drivers/mconsole_kern.c138
-rw-r--r--arch/um/include/mem_user.h1
-rw-r--r--arch/um/include/os.h2
-rw-r--r--arch/um/os-Linux/mem.c27
-rw-r--r--arch/um/os-Linux/process.c44
-rw-r--r--arch/um/os-Linux/start_up.c20
6 files changed, 185 insertions, 47 deletions
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 1488816588ea..d060fce4940e 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -20,6 +20,8 @@
20#include "linux/namei.h" 20#include "linux/namei.h"
21#include "linux/proc_fs.h" 21#include "linux/proc_fs.h"
22#include "linux/syscalls.h" 22#include "linux/syscalls.h"
23#include "linux/list.h"
24#include "linux/mm.h"
23#include "linux/console.h" 25#include "linux/console.h"
24#include "asm/irq.h" 26#include "asm/irq.h"
25#include "asm/uaccess.h" 27#include "asm/uaccess.h"
@@ -347,6 +349,142 @@ static struct mc_device *mconsole_find_dev(char *name)
347 return(NULL); 349 return(NULL);
348} 350}
349 351
352#define UNPLUGGED_PER_PAGE \
353 ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(unsigned long))
354
355struct unplugged_pages {
356 struct list_head list;
357 void *pages[UNPLUGGED_PER_PAGE];
358};
359
360static unsigned long long unplugged_pages_count = 0;
361static struct list_head unplugged_pages = LIST_HEAD_INIT(unplugged_pages);
362static int unplug_index = UNPLUGGED_PER_PAGE;
363
364static int mem_config(char *str)
365{
366 unsigned long long diff;
367 int err = -EINVAL, i, add;
368 char *ret;
369
370 if(str[0] != '=')
371 goto out;
372
373 str++;
374 if(str[0] == '-')
375 add = 0;
376 else if(str[0] == '+'){
377 add = 1;
378 }
379 else goto out;
380
381 str++;
382 diff = memparse(str, &ret);
383 if(*ret != '\0')
384 goto out;
385
386 diff /= PAGE_SIZE;
387
388 for(i = 0; i < diff; i++){
389 struct unplugged_pages *unplugged;
390 void *addr;
391
392 if(add){
393 if(list_empty(&unplugged_pages))
394 break;
395
396 unplugged = list_entry(unplugged_pages.next,
397 struct unplugged_pages, list);
398 if(unplug_index > 0)
399 addr = unplugged->pages[--unplug_index];
400 else {
401 list_del(&unplugged->list);
402 addr = unplugged;
403 unplug_index = UNPLUGGED_PER_PAGE;
404 }
405
406 free_page((unsigned long) addr);
407 unplugged_pages_count--;
408 }
409 else {
410 struct page *page;
411
412 page = alloc_page(GFP_ATOMIC);
413 if(page == NULL)
414 break;
415
416 unplugged = page_address(page);
417 if(unplug_index == UNPLUGGED_PER_PAGE){
418 INIT_LIST_HEAD(&unplugged->list);
419 list_add(&unplugged->list, &unplugged_pages);
420 unplug_index = 0;
421 }
422 else {
423 struct list_head *entry = unplugged_pages.next;
424 addr = unplugged;
425
426 unplugged = list_entry(entry,
427 struct unplugged_pages,
428 list);
429 unplugged->pages[unplug_index++] = addr;
430 err = os_drop_memory(addr, PAGE_SIZE);
431 if(err)
432 printk("Failed to release memory - "
433 "errno = %d\n", err);
434 }
435
436 unplugged_pages_count++;
437 }
438 }
439
440 err = 0;
441out:
442 return err;
443}
444
445static int mem_get_config(char *name, char *str, int size, char **error_out)
446{
447 char buf[sizeof("18446744073709551615")];
448 int len = 0;
449
450 sprintf(buf, "%ld", uml_physmem);
451 CONFIG_CHUNK(str, size, len, buf, 1);
452
453 return len;
454}
455
456static int mem_id(char **str, int *start_out, int *end_out)
457{
458 *start_out = 0;
459 *end_out = 0;
460
461 return 0;
462}
463
464static int mem_remove(int n)
465{
466 return -EBUSY;
467}
468
469static struct mc_device mem_mc = {
470 .name = "mem",
471 .config = mem_config,
472 .get_config = mem_get_config,
473 .id = mem_id,
474 .remove = mem_remove,
475};
476
477static int mem_mc_init(void)
478{
479 if(can_drop_memory())
480 mconsole_register_dev(&mem_mc);
481 else printk("Can't release memory to the host - memory hotplug won't "
482 "be supported\n");
483 return 0;
484}
485
486__initcall(mem_mc_init);
487
350#define CONFIG_BUF_SIZE 64 488#define CONFIG_BUF_SIZE 64
351 489
352static void mconsole_get_config(int (*get_config)(char *, char *, int, 490static void mconsole_get_config(int (*get_config)(char *, char *, int,
diff --git a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h
index a1064c5823bf..a54514d2cc3a 100644
--- a/arch/um/include/mem_user.h
+++ b/arch/um/include/mem_user.h
@@ -49,7 +49,6 @@ extern int iomem_size;
49extern unsigned long host_task_size; 49extern unsigned long host_task_size;
50extern unsigned long task_size; 50extern unsigned long task_size;
51 51
52extern void check_devanon(void);
53extern int init_mem_user(void); 52extern int init_mem_user(void);
54extern void setup_memory(void *entry); 53extern void setup_memory(void *entry);
55extern unsigned long find_iomem(char *driver, unsigned long *len_out); 54extern unsigned long find_iomem(char *driver, unsigned long *len_out);
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index d3d1bc6074ef..5fb84e889b2b 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -205,6 +205,8 @@ extern int os_map_memory(void *virt, int fd, unsigned long long off,
205extern int os_protect_memory(void *addr, unsigned long len, 205extern int os_protect_memory(void *addr, unsigned long len,
206 int r, int w, int x); 206 int r, int w, int x);
207extern int os_unmap_memory(void *addr, int len); 207extern int os_unmap_memory(void *addr, int len);
208extern int os_drop_memory(void *addr, int length);
209extern int can_drop_memory(void);
208extern void os_flush_stdout(void); 210extern void os_flush_stdout(void);
209 211
210/* tt.c 212/* tt.c
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 9d7d69a523bb..6ab372da9657 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -121,36 +121,11 @@ int create_tmp_file(unsigned long long len)
121 return(fd); 121 return(fd);
122} 122}
123 123
124static int create_anon_file(unsigned long long len)
125{
126 void *addr;
127 int fd;
128
129 fd = open("/dev/anon", O_RDWR);
130 if(fd < 0) {
131 perror("opening /dev/anon");
132 exit(1);
133 }
134
135 addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
136 if(addr == MAP_FAILED){
137 perror("mapping physmem file");
138 exit(1);
139 }
140 munmap(addr, len);
141
142 return(fd);
143}
144
145extern int have_devanon;
146
147int create_mem_file(unsigned long long len) 124int create_mem_file(unsigned long long len)
148{ 125{
149 int err, fd; 126 int err, fd;
150 127
151 if(have_devanon) 128 fd = create_tmp_file(len);
152 fd = create_anon_file(len);
153 else fd = create_tmp_file(len);
154 129
155 err = os_set_exec_close(fd, 1); 130 err = os_set_exec_close(fd, 1);
156 if(err < 0){ 131 if(err < 0){
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index d261888f39c4..8176b0b52047 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -11,6 +11,7 @@
11#include <linux/unistd.h> 11#include <linux/unistd.h>
12#include <sys/mman.h> 12#include <sys/mman.h>
13#include <sys/wait.h> 13#include <sys/wait.h>
14#include <sys/mman.h>
14#include "ptrace_user.h" 15#include "ptrace_user.h"
15#include "os.h" 16#include "os.h"
16#include "user.h" 17#include "user.h"
@@ -20,6 +21,7 @@
20#include "kern_util.h" 21#include "kern_util.h"
21#include "longjmp.h" 22#include "longjmp.h"
22#include "skas_ptrace.h" 23#include "skas_ptrace.h"
24#include "kern_constants.h"
23 25
24#define ARBITRARY_ADDR -1 26#define ARBITRARY_ADDR -1
25#define FAILURE_PID -1 27#define FAILURE_PID -1
@@ -187,6 +189,48 @@ int os_unmap_memory(void *addr, int len)
187 return(0); 189 return(0);
188} 190}
189 191
192#ifndef MADV_REMOVE
193#define MADV_REMOVE 0x5 /* remove these pages & resources */
194#endif
195
196int os_drop_memory(void *addr, int length)
197{
198 int err;
199
200 err = madvise(addr, length, MADV_REMOVE);
201 if(err < 0)
202 err = -errno;
203 return err;
204}
205
206int can_drop_memory(void)
207{
208 void *addr;
209 int fd;
210
211 printk("Checking host MADV_REMOVE support...");
212 fd = create_mem_file(UM_KERN_PAGE_SIZE);
213 if(fd < 0){
214 printk("Creating test memory file failed, err = %d\n", -fd);
215 return 0;
216 }
217
218 addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
219 MAP_PRIVATE, fd, 0);
220 if(addr == MAP_FAILED){
221 printk("Mapping test memory file failed, err = %d\n", -errno);
222 return 0;
223 }
224
225 if(madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0){
226 printk("MADV_REMOVE failed, err = %d\n", -errno);
227 return 0;
228 }
229
230 printk("OK\n");
231 return 1;
232}
233
190void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) 234void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
191{ 235{
192 int flags = 0, pages; 236 int flags = 0, pages;
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 32753131f8d8..387e26af301a 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -470,25 +470,6 @@ int can_do_skas(void)
470} 470}
471#endif 471#endif
472 472
473int have_devanon = 0;
474
475/* Runs on boot kernel stack - already safe to use printk. */
476
477void check_devanon(void)
478{
479 int fd;
480
481 printk("Checking for /dev/anon on the host...");
482 fd = open("/dev/anon", O_RDWR);
483 if(fd < 0){
484 printk("Not available (open failed with errno %d)\n", errno);
485 return;
486 }
487
488 printk("OK\n");
489 have_devanon = 1;
490}
491
492int __init parse_iomem(char *str, int *add) 473int __init parse_iomem(char *str, int *add)
493{ 474{
494 struct iomem_region *new; 475 struct iomem_region *new;
@@ -664,6 +645,5 @@ void os_check_bugs(void)
664{ 645{
665 check_ptrace(); 646 check_ptrace();
666 check_sigio(); 647 check_sigio();
667 check_devanon();
668} 648}
669 649