aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-20 21:40:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-20 21:40:36 -0400
commit830ac8524f301da8867400777d5c19e6d01ec525 (patch)
tree8d8588c66e9d424973304a84e6857d1a87695eaa
parentdb93f8b42036bd60d95e8d28ee98b308d5846b9f (diff)
parent157752d84f5df47e01577970f9c5f61a0b9f4546 (diff)
Merge branch 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull kdump fixes from Peter Anvin: "The kexec/kdump people have found several problems with the support for loading over 4 GiB that was introduced in this merge cycle. This is partly due to a number of design problems inherent in the way the various pieces of kdump fit together (it is pretty horrifically manual in many places.) After a *lot* of iterations this is the patchset that was agreed upon, but of course it is now very late in the cycle. However, because it changes both the syntax and semantics of the crashkernel option, it would be desirable to avoid a stable release with the broken interfaces." I'm not happy with the timing, since originally the plan was to release the final 3.9 tomorrow. But apparently I'm doing an -rc8 instead... * 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: kexec: use Crash kernel for Crash kernel low x86, kdump: Change crashkernel_high/low= to crashkernel=,high/low x86, kdump: Retore crashkernel= to allocate under 896M x86, kdump: Set crashkernel_low automatically
-rw-r--r--Documentation/kernel-parameters.txt23
-rw-r--r--arch/x86/kernel/setup.c45
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/swiotlb.h1
-rw-r--r--kernel/kexec.c118
-rw-r--r--lib/swiotlb.c19
6 files changed, 180 insertions, 28 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d1cc3a9fa14f..8ccbf27aead4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -596,9 +596,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
596 is selected automatically. Check 596 is selected automatically. Check
597 Documentation/kdump/kdump.txt for further details. 597 Documentation/kdump/kdump.txt for further details.
598 598
599 crashkernel_low=size[KMG]
600 [KNL, x86] parts under 4G.
601
602 crashkernel=range1:size1[,range2:size2,...][@offset] 599 crashkernel=range1:size1[,range2:size2,...][@offset]
603 [KNL] Same as above, but depends on the memory 600 [KNL] Same as above, but depends on the memory
604 in the running system. The syntax of range is 601 in the running system. The syntax of range is
@@ -606,6 +603,26 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
606 a memory unit (amount[KMG]). See also 603 a memory unit (amount[KMG]). See also
607 Documentation/kdump/kdump.txt for an example. 604 Documentation/kdump/kdump.txt for an example.
608 605
606 crashkernel=size[KMG],high
607 [KNL, x86_64] range could be above 4G. Allow kernel
608 to allocate physical memory region from top, so could
609 be above 4G if system have more than 4G ram installed.
610 Otherwise memory region will be allocated below 4G, if
611 available.
612 It will be ignored if crashkernel=X is specified.
613 crashkernel=size[KMG],low
614 [KNL, x86_64] range under 4G. When crashkernel=X,high
615 is passed, kernel could allocate physical memory region
616 above 4G, that cause second kernel crash on system
617 that require some amount of low memory, e.g. swiotlb
618 requires at least 64M+32K low memory. Kernel would
619 try to allocate 72M below 4G automatically.
620 This one let user to specify own low range under 4G
621 for second kernel instead.
622 0: to disable low allocation.
623 It will be ignored when crashkernel=X,high is not used
624 or memory reserved is below 4G.
625
609 cs89x0_dma= [HW,NET] 626 cs89x0_dma= [HW,NET]
610 Format: <dma> 627 Format: <dma>
611 628
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 90d8cc930f5e..fae9134a2de9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -507,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void)
507/* 507/*
508 * Keep the crash kernel below this limit. On 32 bits earlier kernels 508 * Keep the crash kernel below this limit. On 32 bits earlier kernels
509 * would limit the kernel to the low 512 MiB due to mapping restrictions. 509 * would limit the kernel to the low 512 MiB due to mapping restrictions.
510 * On 64bit, old kexec-tools need to under 896MiB.
510 */ 511 */
511#ifdef CONFIG_X86_32 512#ifdef CONFIG_X86_32
512# define CRASH_KERNEL_ADDR_MAX (512 << 20) 513# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20)
514# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20)
513#else 515#else
514# define CRASH_KERNEL_ADDR_MAX MAXMEM 516# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20)
517# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM
515#endif 518#endif
516 519
517static void __init reserve_crashkernel_low(void) 520static void __init reserve_crashkernel_low(void)
@@ -521,19 +524,35 @@ static void __init reserve_crashkernel_low(void)
521 unsigned long long low_base = 0, low_size = 0; 524 unsigned long long low_base = 0, low_size = 0;
522 unsigned long total_low_mem; 525 unsigned long total_low_mem;
523 unsigned long long base; 526 unsigned long long base;
527 bool auto_set = false;
524 int ret; 528 int ret;
525 529
526 total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); 530 total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
531 /* crashkernel=Y,low */
527 ret = parse_crashkernel_low(boot_command_line, total_low_mem, 532 ret = parse_crashkernel_low(boot_command_line, total_low_mem,
528 &low_size, &base); 533 &low_size, &base);
529 if (ret != 0 || low_size <= 0) 534 if (ret != 0) {
530 return; 535 /*
536 * two parts from lib/swiotlb.c:
537 * swiotlb size: user specified with swiotlb= or default.
538 * swiotlb overflow buffer: now is hardcoded to 32k.
539 * We round it to 8M for other buffers that
540 * may need to stay low too.
541 */
542 low_size = swiotlb_size_or_default() + (8UL<<20);
543 auto_set = true;
544 } else {
545 /* passed with crashkernel=0,low ? */
546 if (!low_size)
547 return;
548 }
531 549
532 low_base = memblock_find_in_range(low_size, (1ULL<<32), 550 low_base = memblock_find_in_range(low_size, (1ULL<<32),
533 low_size, alignment); 551 low_size, alignment);
534 552
535 if (!low_base) { 553 if (!low_base) {
536 pr_info("crashkernel low reservation failed - No suitable area found.\n"); 554 if (!auto_set)
555 pr_info("crashkernel low reservation failed - No suitable area found.\n");
537 556
538 return; 557 return;
539 } 558 }
@@ -554,14 +573,22 @@ static void __init reserve_crashkernel(void)
554 const unsigned long long alignment = 16<<20; /* 16M */ 573 const unsigned long long alignment = 16<<20; /* 16M */
555 unsigned long long total_mem; 574 unsigned long long total_mem;
556 unsigned long long crash_size, crash_base; 575 unsigned long long crash_size, crash_base;
576 bool high = false;
557 int ret; 577 int ret;
558 578
559 total_mem = memblock_phys_mem_size(); 579 total_mem = memblock_phys_mem_size();
560 580
581 /* crashkernel=XM */
561 ret = parse_crashkernel(boot_command_line, total_mem, 582 ret = parse_crashkernel(boot_command_line, total_mem,
562 &crash_size, &crash_base); 583 &crash_size, &crash_base);
563 if (ret != 0 || crash_size <= 0) 584 if (ret != 0 || crash_size <= 0) {
564 return; 585 /* crashkernel=X,high */
586 ret = parse_crashkernel_high(boot_command_line, total_mem,
587 &crash_size, &crash_base);
588 if (ret != 0 || crash_size <= 0)
589 return;
590 high = true;
591 }
565 592
566 /* 0 means: find the address automatically */ 593 /* 0 means: find the address automatically */
567 if (crash_base <= 0) { 594 if (crash_base <= 0) {
@@ -569,7 +596,9 @@ static void __init reserve_crashkernel(void)
569 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX 596 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
570 */ 597 */
571 crash_base = memblock_find_in_range(alignment, 598 crash_base = memblock_find_in_range(alignment,
572 CRASH_KERNEL_ADDR_MAX, crash_size, alignment); 599 high ? CRASH_KERNEL_ADDR_HIGH_MAX :
600 CRASH_KERNEL_ADDR_LOW_MAX,
601 crash_size, alignment);
573 602
574 if (!crash_base) { 603 if (!crash_base) {
575 pr_info("crashkernel reservation failed - No suitable area found.\n"); 604 pr_info("crashkernel reservation failed - No suitable area found.\n");
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d2e6927bbaae..d78d28a733b1 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -200,6 +200,8 @@ extern size_t vmcoreinfo_max_size;
200 200
201int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, 201int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
202 unsigned long long *crash_size, unsigned long long *crash_base); 202 unsigned long long *crash_size, unsigned long long *crash_base);
203int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
204 unsigned long long *crash_size, unsigned long long *crash_base);
203int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, 205int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
204 unsigned long long *crash_size, unsigned long long *crash_base); 206 unsigned long long *crash_size, unsigned long long *crash_base);
205int crash_shrink_memory(unsigned long new_size); 207int crash_shrink_memory(unsigned long new_size);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 2de42f9401d2..a5ffd32642fd 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -25,6 +25,7 @@ extern int swiotlb_force;
25extern void swiotlb_init(int verbose); 25extern void swiotlb_init(int verbose);
26int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); 26int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
27extern unsigned long swiotlb_nr_tbl(void); 27extern unsigned long swiotlb_nr_tbl(void);
28unsigned long swiotlb_size_or_default(void);
28extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); 29extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
29 30
30/* 31/*
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bddd3d7a74b6..ffd4e111fd67 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -55,7 +55,7 @@ struct resource crashk_res = {
55 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 55 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
56}; 56};
57struct resource crashk_low_res = { 57struct resource crashk_low_res = {
58 .name = "Crash kernel low", 58 .name = "Crash kernel",
59 .start = 0, 59 .start = 0,
60 .end = 0, 60 .end = 0,
61 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 61 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
@@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline,
1368 return 0; 1368 return 0;
1369} 1369}
1370 1370
1371#define SUFFIX_HIGH 0
1372#define SUFFIX_LOW 1
1373#define SUFFIX_NULL 2
1374static __initdata char *suffix_tbl[] = {
1375 [SUFFIX_HIGH] = ",high",
1376 [SUFFIX_LOW] = ",low",
1377 [SUFFIX_NULL] = NULL,
1378};
1379
1371/* 1380/*
1372 * That function is the entry point for command line parsing and should be 1381 * That function parses "suffix" crashkernel command lines like
1373 * called from the arch-specific code. 1382 *
1383 * crashkernel=size,[high|low]
1384 *
1385 * It returns 0 on success and -EINVAL on failure.
1374 */ 1386 */
1387static int __init parse_crashkernel_suffix(char *cmdline,
1388 unsigned long long *crash_size,
1389 unsigned long long *crash_base,
1390 const char *suffix)
1391{
1392 char *cur = cmdline;
1393
1394 *crash_size = memparse(cmdline, &cur);
1395 if (cmdline == cur) {
1396 pr_warn("crashkernel: memory value expected\n");
1397 return -EINVAL;
1398 }
1399
1400 /* check with suffix */
1401 if (strncmp(cur, suffix, strlen(suffix))) {
1402 pr_warn("crashkernel: unrecognized char\n");
1403 return -EINVAL;
1404 }
1405 cur += strlen(suffix);
1406 if (*cur != ' ' && *cur != '\0') {
1407 pr_warn("crashkernel: unrecognized char\n");
1408 return -EINVAL;
1409 }
1410
1411 return 0;
1412}
1413
1414static __init char *get_last_crashkernel(char *cmdline,
1415 const char *name,
1416 const char *suffix)
1417{
1418 char *p = cmdline, *ck_cmdline = NULL;
1419
1420 /* find crashkernel and use the last one if there are more */
1421 p = strstr(p, name);
1422 while (p) {
1423 char *end_p = strchr(p, ' ');
1424 char *q;
1425
1426 if (!end_p)
1427 end_p = p + strlen(p);
1428
1429 if (!suffix) {
1430 int i;
1431
1432 /* skip the one with any known suffix */
1433 for (i = 0; suffix_tbl[i]; i++) {
1434 q = end_p - strlen(suffix_tbl[i]);
1435 if (!strncmp(q, suffix_tbl[i],
1436 strlen(suffix_tbl[i])))
1437 goto next;
1438 }
1439 ck_cmdline = p;
1440 } else {
1441 q = end_p - strlen(suffix);
1442 if (!strncmp(q, suffix, strlen(suffix)))
1443 ck_cmdline = p;
1444 }
1445next:
1446 p = strstr(p+1, name);
1447 }
1448
1449 if (!ck_cmdline)
1450 return NULL;
1451
1452 return ck_cmdline;
1453}
1454
1375static int __init __parse_crashkernel(char *cmdline, 1455static int __init __parse_crashkernel(char *cmdline,
1376 unsigned long long system_ram, 1456 unsigned long long system_ram,
1377 unsigned long long *crash_size, 1457 unsigned long long *crash_size,
1378 unsigned long long *crash_base, 1458 unsigned long long *crash_base,
1379 const char *name) 1459 const char *name,
1460 const char *suffix)
1380{ 1461{
1381 char *p = cmdline, *ck_cmdline = NULL;
1382 char *first_colon, *first_space; 1462 char *first_colon, *first_space;
1463 char *ck_cmdline;
1383 1464
1384 BUG_ON(!crash_size || !crash_base); 1465 BUG_ON(!crash_size || !crash_base);
1385 *crash_size = 0; 1466 *crash_size = 0;
1386 *crash_base = 0; 1467 *crash_base = 0;
1387 1468
1388 /* find crashkernel and use the last one if there are more */ 1469 ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1389 p = strstr(p, name);
1390 while (p) {
1391 ck_cmdline = p;
1392 p = strstr(p+1, name);
1393 }
1394 1470
1395 if (!ck_cmdline) 1471 if (!ck_cmdline)
1396 return -EINVAL; 1472 return -EINVAL;
1397 1473
1398 ck_cmdline += strlen(name); 1474 ck_cmdline += strlen(name);
1399 1475
1476 if (suffix)
1477 return parse_crashkernel_suffix(ck_cmdline, crash_size,
1478 crash_base, suffix);
1400 /* 1479 /*
1401 * if the commandline contains a ':', then that's the extended 1480 * if the commandline contains a ':', then that's the extended
1402 * syntax -- if not, it must be the classic syntax 1481 * syntax -- if not, it must be the classic syntax
@@ -1413,13 +1492,26 @@ static int __init __parse_crashkernel(char *cmdline,
1413 return 0; 1492 return 0;
1414} 1493}
1415 1494
1495/*
1496 * That function is the entry point for command line parsing and should be
1497 * called from the arch-specific code.
1498 */
1416int __init parse_crashkernel(char *cmdline, 1499int __init parse_crashkernel(char *cmdline,
1417 unsigned long long system_ram, 1500 unsigned long long system_ram,
1418 unsigned long long *crash_size, 1501 unsigned long long *crash_size,
1419 unsigned long long *crash_base) 1502 unsigned long long *crash_base)
1420{ 1503{
1421 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1504 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1422 "crashkernel="); 1505 "crashkernel=", NULL);
1506}
1507
1508int __init parse_crashkernel_high(char *cmdline,
1509 unsigned long long system_ram,
1510 unsigned long long *crash_size,
1511 unsigned long long *crash_base)
1512{
1513 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1514 "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1423} 1515}
1424 1516
1425int __init parse_crashkernel_low(char *cmdline, 1517int __init parse_crashkernel_low(char *cmdline,
@@ -1428,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline,
1428 unsigned long long *crash_base) 1520 unsigned long long *crash_base)
1429{ 1521{
1430 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1522 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1431 "crashkernel_low="); 1523 "crashkernel=", suffix_tbl[SUFFIX_LOW]);
1432} 1524}
1433 1525
1434static void update_vmcoreinfo_note(void) 1526static void update_vmcoreinfo_note(void)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bfe02b8fc55b..d23762e6652c 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -105,9 +105,9 @@ setup_io_tlb_npages(char *str)
105 if (!strcmp(str, "force")) 105 if (!strcmp(str, "force"))
106 swiotlb_force = 1; 106 swiotlb_force = 1;
107 107
108 return 1; 108 return 0;
109} 109}
110__setup("swiotlb=", setup_io_tlb_npages); 110early_param("swiotlb", setup_io_tlb_npages);
111/* make io_tlb_overflow tunable too? */ 111/* make io_tlb_overflow tunable too? */
112 112
113unsigned long swiotlb_nr_tbl(void) 113unsigned long swiotlb_nr_tbl(void)
@@ -115,6 +115,18 @@ unsigned long swiotlb_nr_tbl(void)
115 return io_tlb_nslabs; 115 return io_tlb_nslabs;
116} 116}
117EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); 117EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
118
119/* default to 64MB */
120#define IO_TLB_DEFAULT_SIZE (64UL<<20)
121unsigned long swiotlb_size_or_default(void)
122{
123 unsigned long size;
124
125 size = io_tlb_nslabs << IO_TLB_SHIFT;
126
127 return size ? size : (IO_TLB_DEFAULT_SIZE);
128}
129
118/* Note that this doesn't work with highmem page */ 130/* Note that this doesn't work with highmem page */
119static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, 131static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
120 volatile void *address) 132 volatile void *address)
@@ -188,8 +200,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
188void __init 200void __init
189swiotlb_init(int verbose) 201swiotlb_init(int verbose)
190{ 202{
191 /* default to 64MB */ 203 size_t default_size = IO_TLB_DEFAULT_SIZE;
192 size_t default_size = 64UL<<20;
193 unsigned char *vstart; 204 unsigned char *vstart;
194 unsigned long bytes; 205 unsigned long bytes;
195 206