aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2005-05-01 11:58:57 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-01 11:58:57 -0400
commitc9e3735359ac2d74ee61c6f1e5724f4a6db570bf (patch)
tree381204a24f4378d3c8d898f8e0210b1b97f5f248 /arch/s390/kernel
parent4b7e0706620e3947dc1685dfdbc1413404afb545 (diff)
[PATCH] s390: fix memory holes and cleanup setup_arch
The memory setup didn't take care of memory holes and this makes the memory management think there would be more memory available than there is in reality. That causes the OOM killer to kill processes even if there is enough memory left that can be written to the swap space. The patch fixes this by using free_area_init_node with an array of memory holes instead of free_area_init. Further the patch cleans up the code in setup.c by splitting setup_arch into smaller pieces. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/setup.c385
1 files changed, 214 insertions, 171 deletions
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c879c40aa7a5..f0679be4f96f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -60,6 +60,8 @@ struct {
60#define CHUNK_READ_WRITE 0 60#define CHUNK_READ_WRITE 0
61#define CHUNK_READ_ONLY 1 61#define CHUNK_READ_ONLY 1
62volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ 62volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
63unsigned long __initdata zholes_size[MAX_NR_ZONES];
64static unsigned long __initdata memory_end;
63 65
64/* 66/*
65 * Setup options 67 * Setup options
@@ -78,11 +80,15 @@ static char command_line[COMMAND_LINE_SIZE] = { 0, };
78 80
79static struct resource code_resource = { 81static struct resource code_resource = {
80 .name = "Kernel code", 82 .name = "Kernel code",
83 .start = (unsigned long) &_text,
84 .end = (unsigned long) &_etext - 1,
81 .flags = IORESOURCE_BUSY | IORESOURCE_MEM, 85 .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
82}; 86};
83 87
84static struct resource data_resource = { 88static struct resource data_resource = {
85 .name = "Kernel data", 89 .name = "Kernel data",
90 .start = (unsigned long) &_etext,
91 .end = (unsigned long) &_edata - 1,
86 .flags = IORESOURCE_BUSY | IORESOURCE_MEM, 92 .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
87}; 93};
88 94
@@ -310,90 +316,50 @@ void machine_power_off(void)
310 316
311EXPORT_SYMBOL(machine_power_off); 317EXPORT_SYMBOL(machine_power_off);
312 318
313/* 319static void __init
314 * Setup function called from init/main.c just after the banner 320add_memory_hole(unsigned long start, unsigned long end)
315 * was printed.
316 */
317extern char _pstart, _pend, _stext;
318
319void __init setup_arch(char **cmdline_p)
320{ 321{
321 unsigned long bootmap_size; 322 unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
322 unsigned long memory_start, memory_end; 323
323 char c = ' ', cn, *to = command_line, *from = COMMAND_LINE; 324 if (end <= dma_pfn)
324 unsigned long start_pfn, end_pfn; 325 zholes_size[ZONE_DMA] += end - start + 1;
325 static unsigned int smptrap=0; 326 else if (start > dma_pfn)
326 unsigned long delay = 0; 327 zholes_size[ZONE_NORMAL] += end - start + 1;
327 struct _lowcore *lc; 328 else {
328 int i; 329 zholes_size[ZONE_DMA] += dma_pfn - start + 1;
330 zholes_size[ZONE_NORMAL] += end - dma_pfn;
331 }
332}
329 333
330 if (smptrap) 334static void __init
331 return; 335parse_cmdline_early(char **cmdline_p)
332 smptrap=1; 336{
337 char c = ' ', cn, *to = command_line, *from = COMMAND_LINE;
338 unsigned long delay = 0;
333 339
334 /* 340 /* Save unparsed command line copy for /proc/cmdline */
335 * print what head.S has found out about the machine 341 memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
336 */ 342 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
337#ifndef CONFIG_ARCH_S390X
338 printk((MACHINE_IS_VM) ?
339 "We are running under VM (31 bit mode)\n" :
340 "We are running native (31 bit mode)\n");
341 printk((MACHINE_HAS_IEEE) ?
342 "This machine has an IEEE fpu\n" :
343 "This machine has no IEEE fpu\n");
344#else /* CONFIG_ARCH_S390X */
345 printk((MACHINE_IS_VM) ?
346 "We are running under VM (64 bit mode)\n" :
347 "We are running native (64 bit mode)\n");
348#endif /* CONFIG_ARCH_S390X */
349 343
350 ROOT_DEV = Root_RAM0; 344 for (;;) {
351 memory_start = (unsigned long) &_end; /* fixit if use $CODELO etc*/ 345 /*
352#ifndef CONFIG_ARCH_S390X 346 * "mem=XXX[kKmM]" sets memsize
353 memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */ 347 */
354 /* 348 if (c == ' ' && strncmp(from, "mem=", 4) == 0) {
355 * We need some free virtual space to be able to do vmalloc. 349 memory_end = simple_strtoul(from+4, &from, 0);
356 * On a machine with 2GB memory we make sure that we have at 350 if ( *from == 'K' || *from == 'k' ) {
357 * least 128 MB free space for vmalloc. 351 memory_end = memory_end << 10;
358 */ 352 from++;
359 if (memory_end > 1920*1024*1024) 353 } else if ( *from == 'M' || *from == 'm' ) {
360 memory_end = 1920*1024*1024; 354 memory_end = memory_end << 20;
361#else /* CONFIG_ARCH_S390X */ 355 from++;
362 memory_end = memory_size & ~0x200000UL; /* detected in head.s */ 356 }
363#endif /* CONFIG_ARCH_S390X */ 357 }
364 init_mm.start_code = PAGE_OFFSET; 358 /*
365 init_mm.end_code = (unsigned long) &_etext; 359 * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
366 init_mm.end_data = (unsigned long) &_edata; 360 */
367 init_mm.brk = (unsigned long) &_end; 361 if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) {
368 362 delay = simple_strtoul(from+9, &from, 0);
369 code_resource.start = (unsigned long) &_text;
370 code_resource.end = (unsigned long) &_etext - 1;
371 data_resource.start = (unsigned long) &_etext;
372 data_resource.end = (unsigned long) &_edata - 1;
373
374 /* Save unparsed command line copy for /proc/cmdline */
375 memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
376 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
377
378 for (;;) {
379 /*
380 * "mem=XXX[kKmM]" sets memsize
381 */
382 if (c == ' ' && strncmp(from, "mem=", 4) == 0) {
383 memory_end = simple_strtoul(from+4, &from, 0);
384 if ( *from == 'K' || *from == 'k' ) {
385 memory_end = memory_end << 10;
386 from++;
387 } else if ( *from == 'M' || *from == 'm' ) {
388 memory_end = memory_end << 20;
389 from++;
390 }
391 }
392 /*
393 * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
394 */
395 if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) {
396 delay = simple_strtoul(from+9, &from, 0);
397 if (*from == 's' || *from == 'S') { 363 if (*from == 's' || *from == 'S') {
398 delay = delay*1000000; 364 delay = delay*1000000;
399 from++; 365 from++;
@@ -403,24 +369,110 @@ void __init setup_arch(char **cmdline_p)
403 } 369 }
404 /* now wait for the requested amount of time */ 370 /* now wait for the requested amount of time */
405 udelay(delay); 371 udelay(delay);
406 } 372 }
407 cn = *(from++); 373 cn = *(from++);
408 if (!cn) 374 if (!cn)
409 break; 375 break;
410 if (cn == '\n') 376 if (cn == '\n')
411 cn = ' '; /* replace newlines with space */ 377 cn = ' '; /* replace newlines with space */
412 if (cn == 0x0d) 378 if (cn == 0x0d)
413 cn = ' '; /* replace 0x0d with space */ 379 cn = ' '; /* replace 0x0d with space */
414 if (cn == ' ' && c == ' ') 380 if (cn == ' ' && c == ' ')
415 continue; /* remove additional spaces */ 381 continue; /* remove additional spaces */
416 c = cn; 382 c = cn;
417 if (to - command_line >= COMMAND_LINE_SIZE) 383 if (to - command_line >= COMMAND_LINE_SIZE)
418 break; 384 break;
419 *(to++) = c; 385 *(to++) = c;
420 } 386 }
421 if (c == ' ' && to > command_line) to--; 387 if (c == ' ' && to > command_line) to--;
422 *to = '\0'; 388 *to = '\0';
423 *cmdline_p = command_line; 389 *cmdline_p = command_line;
390}
391
392static void __init
393setup_lowcore(void)
394{
395 struct _lowcore *lc;
396 int lc_pages;
397
398 /*
399 * Setup lowcore for boot cpu
400 */
401 lc_pages = sizeof(void *) == 8 ? 2 : 1;
402 lc = (struct _lowcore *)
403 __alloc_bootmem(lc_pages * PAGE_SIZE, lc_pages * PAGE_SIZE, 0);
404 memset(lc, 0, lc_pages * PAGE_SIZE);
405 lc->restart_psw.mask = PSW_BASE_BITS;
406 lc->restart_psw.addr =
407 PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
408 lc->external_new_psw.mask = PSW_KERNEL_BITS;
409 lc->external_new_psw.addr =
410 PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
411 lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
412 lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
413 lc->program_new_psw.mask = PSW_KERNEL_BITS;
414 lc->program_new_psw.addr =
415 PSW_ADDR_AMODE | (unsigned long)pgm_check_handler;
416 lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
417 lc->mcck_new_psw.addr =
418 PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
419 lc->io_new_psw.mask = PSW_KERNEL_BITS;
420 lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
421 lc->ipl_device = S390_lowcore.ipl_device;
422 lc->jiffy_timer = -1LL;
423 lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
424 lc->async_stack = (unsigned long)
425 __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
426#ifdef CONFIG_CHECK_STACK
427 lc->panic_stack = (unsigned long)
428 __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
429#endif
430 lc->current_task = (unsigned long) init_thread_union.thread_info.task;
431 lc->thread_info = (unsigned long) &init_thread_union;
432#ifdef CONFIG_ARCH_S390X
433 if (MACHINE_HAS_DIAG44)
434 lc->diag44_opcode = 0x83000044;
435 else
436 lc->diag44_opcode = 0x07000700;
437#endif /* CONFIG_ARCH_S390X */
438 set_prefix((u32)(unsigned long) lc);
439}
440
441static void __init
442setup_resources(void)
443{
444 struct resource *res;
445 int i;
446
447 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
448 res = alloc_bootmem_low(sizeof(struct resource));
449 res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
450 switch (memory_chunk[i].type) {
451 case CHUNK_READ_WRITE:
452 res->name = "System RAM";
453 break;
454 case CHUNK_READ_ONLY:
455 res->name = "System ROM";
456 res->flags |= IORESOURCE_READONLY;
457 break;
458 default:
459 res->name = "reserved";
460 }
461 res->start = memory_chunk[i].addr;
462 res->end = memory_chunk[i].addr + memory_chunk[i].size - 1;
463 request_resource(&iomem_resource, res);
464 request_resource(res, &code_resource);
465 request_resource(res, &data_resource);
466 }
467}
468
469static void __init
470setup_memory(void)
471{
472 unsigned long bootmap_size;
473 unsigned long start_pfn, end_pfn;
474 unsigned long last_rw_end;
475 int i;
424 476
425 /* 477 /*
426 * partially used pages are not usable - thus 478 * partially used pages are not usable - thus
@@ -437,6 +489,8 @@ void __init setup_arch(char **cmdline_p)
437 /* 489 /*
438 * Register RAM areas with the bootmem allocator. 490 * Register RAM areas with the bootmem allocator.
439 */ 491 */
492 last_rw_end = start_pfn;
493
440 for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) { 494 for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) {
441 unsigned long start_chunk, end_chunk; 495 unsigned long start_chunk, end_chunk;
442 496
@@ -450,102 +504,91 @@ void __init setup_arch(char **cmdline_p)
450 start_chunk = start_pfn; 504 start_chunk = start_pfn;
451 if (end_chunk > end_pfn) 505 if (end_chunk > end_pfn)
452 end_chunk = end_pfn; 506 end_chunk = end_pfn;
453 if (start_chunk < end_chunk) 507 if (start_chunk < end_chunk) {
454 free_bootmem(start_chunk << PAGE_SHIFT, 508 free_bootmem(start_chunk << PAGE_SHIFT,
455 (end_chunk - start_chunk) << PAGE_SHIFT); 509 (end_chunk - start_chunk) << PAGE_SHIFT);
510 if (last_rw_end < start_chunk)
511 add_memory_hole(last_rw_end, start_chunk - 1);
512 last_rw_end = end_chunk;
513 }
456 } 514 }
457 515
458 /* 516 if (last_rw_end < end_pfn - 1)
459 * Reserve the bootmem bitmap itself as well. We do this in two 517 add_memory_hole(last_rw_end, end_pfn - 1);
460 * steps (first step was init_bootmem()) because this catches 518
461 * the (very unlikely) case of us accidentally initializing the 519 /*
462 * bootmem allocator with an invalid RAM area. 520 * Reserve the bootmem bitmap itself as well. We do this in two
463 */ 521 * steps (first step was init_bootmem()) because this catches
464 reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size); 522 * the (very unlikely) case of us accidentally initializing the
523 * bootmem allocator with an invalid RAM area.
524 */
525 reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size);
465 526
466#ifdef CONFIG_BLK_DEV_INITRD 527#ifdef CONFIG_BLK_DEV_INITRD
467 if (INITRD_START) { 528 if (INITRD_START) {
468 if (INITRD_START + INITRD_SIZE <= memory_end) { 529 if (INITRD_START + INITRD_SIZE <= memory_end) {
469 reserve_bootmem(INITRD_START, INITRD_SIZE); 530 reserve_bootmem(INITRD_START, INITRD_SIZE);
470 initrd_start = INITRD_START; 531 initrd_start = INITRD_START;
471 initrd_end = initrd_start + INITRD_SIZE; 532 initrd_end = initrd_start + INITRD_SIZE;
472 } else { 533 } else {
473 printk("initrd extends beyond end of memory " 534 printk("initrd extends beyond end of memory "
474 "(0x%08lx > 0x%08lx)\ndisabling initrd\n", 535 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
475 initrd_start + INITRD_SIZE, memory_end); 536 initrd_start + INITRD_SIZE, memory_end);
476 initrd_start = initrd_end = 0; 537 initrd_start = initrd_end = 0;
477 } 538 }
478 } 539 }
479#endif 540#endif
541}
480 542
481 for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) { 543/*
482 struct resource *res; 544 * Setup function called from init/main.c just after the banner
483 545 * was printed.
484 res = alloc_bootmem_low(sizeof(struct resource)); 546 */
485 res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
486
487 switch (memory_chunk[i].type) {
488 case CHUNK_READ_WRITE:
489 res->name = "System RAM";
490 break;
491 case CHUNK_READ_ONLY:
492 res->name = "System ROM";
493 res->flags |= IORESOURCE_READONLY;
494 break;
495 default:
496 res->name = "reserved";
497 }
498 res->start = memory_chunk[i].addr;
499 res->end = memory_chunk[i].addr + memory_chunk[i].size - 1;
500 request_resource(&iomem_resource, res);
501 request_resource(res, &code_resource);
502 request_resource(res, &data_resource);
503 }
504 547
548void __init
549setup_arch(char **cmdline_p)
550{
505 /* 551 /*
506 * Setup lowcore for boot cpu 552 * print what head.S has found out about the machine
507 */ 553 */
508#ifndef CONFIG_ARCH_S390X 554#ifndef CONFIG_ARCH_S390X
509 lc = (struct _lowcore *) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); 555 printk((MACHINE_IS_VM) ?
510 memset(lc, 0, PAGE_SIZE); 556 "We are running under VM (31 bit mode)\n" :
557 "We are running native (31 bit mode)\n");
558 printk((MACHINE_HAS_IEEE) ?
559 "This machine has an IEEE fpu\n" :
560 "This machine has no IEEE fpu\n");
511#else /* CONFIG_ARCH_S390X */ 561#else /* CONFIG_ARCH_S390X */
512 lc = (struct _lowcore *) __alloc_bootmem(2*PAGE_SIZE, 2*PAGE_SIZE, 0); 562 printk((MACHINE_IS_VM) ?
513 memset(lc, 0, 2*PAGE_SIZE); 563 "We are running under VM (64 bit mode)\n" :
564 "We are running native (64 bit mode)\n");
514#endif /* CONFIG_ARCH_S390X */ 565#endif /* CONFIG_ARCH_S390X */
515 lc->restart_psw.mask = PSW_BASE_BITS; 566
516 lc->restart_psw.addr = 567 ROOT_DEV = Root_RAM0;
517 PSW_ADDR_AMODE | (unsigned long) restart_int_handler; 568#ifndef CONFIG_ARCH_S390X
518 lc->external_new_psw.mask = PSW_KERNEL_BITS; 569 memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */
519 lc->external_new_psw.addr = 570 /*
520 PSW_ADDR_AMODE | (unsigned long) ext_int_handler; 571 * We need some free virtual space to be able to do vmalloc.
521 lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; 572 * On a machine with 2GB memory we make sure that we have at
522 lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; 573 * least 128 MB free space for vmalloc.
523 lc->program_new_psw.mask = PSW_KERNEL_BITS; 574 */
524 lc->program_new_psw.addr = 575 if (memory_end > 1920*1024*1024)
525 PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; 576 memory_end = 1920*1024*1024;
526 lc->mcck_new_psw.mask = PSW_KERNEL_BITS; 577#else /* CONFIG_ARCH_S390X */
527 lc->mcck_new_psw.addr = 578 memory_end = memory_size & ~0x200000UL; /* detected in head.s */
528 PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
529 lc->io_new_psw.mask = PSW_KERNEL_BITS;
530 lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
531 lc->ipl_device = S390_lowcore.ipl_device;
532 lc->jiffy_timer = -1LL;
533 lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
534 lc->async_stack = (unsigned long)
535 __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
536#ifdef CONFIG_CHECK_STACK
537 lc->panic_stack = (unsigned long)
538 __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
539#endif
540 lc->current_task = (unsigned long) init_thread_union.thread_info.task;
541 lc->thread_info = (unsigned long) &init_thread_union;
542#ifdef CONFIG_ARCH_S390X
543 if (MACHINE_HAS_DIAG44)
544 lc->diag44_opcode = 0x83000044;
545 else
546 lc->diag44_opcode = 0x07000700;
547#endif /* CONFIG_ARCH_S390X */ 579#endif /* CONFIG_ARCH_S390X */
548 set_prefix((u32)(unsigned long) lc); 580
581 init_mm.start_code = PAGE_OFFSET;
582 init_mm.end_code = (unsigned long) &_etext;
583 init_mm.end_data = (unsigned long) &_edata;
584 init_mm.brk = (unsigned long) &_end;
585
586 parse_cmdline_early(cmdline_p);
587
588 setup_memory();
589 setup_resources();
590 setup_lowcore();
591
549 cpu_init(); 592 cpu_init();
550 __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; 593 __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
551 594