diff options
| -rw-r--r-- | Documentation/x86/x86_64/boot-options.txt | 16 | ||||
| -rw-r--r-- | arch/x86/mm/numa_64.c | 160 |
2 files changed, 16 insertions, 160 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 01150c64aa73..7fbbaf85f5b7 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
| @@ -170,19 +170,9 @@ NUMA | |||
| 170 | If given as a memory unit, fills all system RAM with nodes of | 170 | If given as a memory unit, fills all system RAM with nodes of |
| 171 | size interleaved over physical nodes. | 171 | size interleaved over physical nodes. |
| 172 | 172 | ||
| 173 | numa=fake=CMDLINE | 173 | numa=fake=<N> |
| 174 | If a number, fakes CMDLINE nodes and ignores NUMA setup of the | 174 | If given as an integer, fills all system RAM with N fake nodes |
| 175 | actual machine. Otherwise, system memory is configured | 175 | interleaved over physical nodes. |
| 176 | depending on the sizes and coefficients listed. For example: | ||
| 177 | numa=fake=2*512,1024,4*256,*128 | ||
| 178 | gives two 512M nodes, a 1024M node, four 256M nodes, and the | ||
| 179 | rest split into 128M chunks. If the last character of CMDLINE | ||
| 180 | is a *, the remaining memory is divided up equally among its | ||
| 181 | coefficient: | ||
| 182 | numa=fake=2*512,2* | ||
| 183 | gives two 512M nodes and the rest split into two nodes. | ||
| 184 | Otherwise, the remaining system RAM is allocated to an | ||
| 185 | additional node. | ||
| 186 | 176 | ||
| 187 | ACPI | 177 | ACPI |
| 188 | 178 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c47c78ba3aca..3307ea8bd43a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
| @@ -598,172 +598,38 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
| 598 | } | 598 | } |
| 599 | 599 | ||
| 600 | /* | 600 | /* |
| 601 | * Splits num_nodes nodes up equally starting at node_start. The return value | ||
| 602 | * is the number of nodes split up and addr is adjusted to be at the end of the | ||
| 603 | * last node allocated. | ||
| 604 | */ | ||
| 605 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, | ||
| 606 | int num_nodes) | ||
| 607 | { | ||
| 608 | unsigned int big; | ||
| 609 | u64 size; | ||
| 610 | int i; | ||
| 611 | |||
| 612 | if (num_nodes <= 0) | ||
| 613 | return -1; | ||
| 614 | if (num_nodes > MAX_NUMNODES) | ||
| 615 | num_nodes = MAX_NUMNODES; | ||
| 616 | size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / | ||
| 617 | num_nodes; | ||
| 618 | /* | ||
| 619 | * Calculate the number of big nodes that can be allocated as a result | ||
| 620 | * of consolidating the leftovers. | ||
| 621 | */ | ||
| 622 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / | ||
| 623 | FAKE_NODE_MIN_SIZE; | ||
| 624 | |||
| 625 | /* Round down to nearest FAKE_NODE_MIN_SIZE. */ | ||
| 626 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
| 627 | if (!size) { | ||
| 628 | printk(KERN_ERR "Not enough memory for each node. " | ||
| 629 | "NUMA emulation disabled.\n"); | ||
| 630 | return -1; | ||
| 631 | } | ||
| 632 | |||
| 633 | for (i = node_start; i < num_nodes + node_start; i++) { | ||
| 634 | u64 end = *addr + size; | ||
| 635 | |||
| 636 | if (i < big) | ||
| 637 | end += FAKE_NODE_MIN_SIZE; | ||
| 638 | /* | ||
| 639 | * The final node can have the remaining system RAM. Other | ||
| 640 | * nodes receive roughly the same amount of available pages. | ||
| 641 | */ | ||
| 642 | if (i == num_nodes + node_start - 1) | ||
| 643 | end = max_addr; | ||
| 644 | else | ||
| 645 | end = find_end_of_node(*addr, max_addr, size); | ||
| 646 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) | ||
| 647 | break; | ||
| 648 | } | ||
| 649 | return i - node_start + 1; | ||
| 650 | } | ||
| 651 | |||
| 652 | /* | ||
| 653 | * Splits the remaining system RAM into chunks of size. The remaining memory is | ||
| 654 | * always assigned to a final node and can be asymmetric. Returns the number of | ||
| 655 | * nodes split. | ||
| 656 | */ | ||
| 657 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, | ||
| 658 | u64 size) | ||
| 659 | { | ||
| 660 | int i = node_start; | ||
| 661 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
| 662 | while (!setup_node_range(i++, addr, size, max_addr)) | ||
| 663 | ; | ||
| 664 | return i - node_start; | ||
| 665 | } | ||
| 666 | |||
| 667 | /* | ||
| 668 | * Sets up the system RAM area from start_pfn to last_pfn according to the | 601 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
| 669 | * numa=fake command-line option. | 602 | * numa=fake command-line option. |
| 670 | */ | 603 | */ |
| 671 | static int __init numa_emulation(unsigned long start_pfn, | 604 | static int __init numa_emulation(unsigned long start_pfn, |
| 672 | unsigned long last_pfn, int acpi, int k8) | 605 | unsigned long last_pfn, int acpi, int k8) |
| 673 | { | 606 | { |
| 674 | u64 size, addr = start_pfn << PAGE_SHIFT; | 607 | u64 addr = start_pfn << PAGE_SHIFT; |
| 675 | u64 max_addr = last_pfn << PAGE_SHIFT; | 608 | u64 max_addr = last_pfn << PAGE_SHIFT; |
| 676 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | ||
| 677 | int num_phys_nodes; | 609 | int num_phys_nodes; |
| 610 | int num_nodes; | ||
| 611 | int i; | ||
| 678 | 612 | ||
| 679 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); | 613 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
| 680 | /* | 614 | /* |
| 681 | * If the numa=fake command-line contains a 'M' or 'G', it represents | 615 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
| 682 | * the fixed node size. | 616 | * the fixed node size. Otherwise, if it is just a single number N, |
| 617 | * split the system RAM into N fake nodes. | ||
| 683 | */ | 618 | */ |
| 684 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { | 619 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { |
| 620 | u64 size; | ||
| 621 | |||
| 685 | size = memparse(cmdline, &cmdline); | 622 | size = memparse(cmdline, &cmdline); |
| 686 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); | 623 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); |
| 687 | if (num_nodes < 0) | 624 | } else { |
| 688 | return num_nodes; | 625 | unsigned long n; |
| 689 | goto out; | ||
| 690 | } | ||
| 691 | 626 | ||
| 692 | /* | 627 | n = simple_strtoul(cmdline, NULL, 0); |
| 693 | * If the numa=fake command-line is just a single number N, split the | 628 | num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); |
| 694 | * system RAM into N fake nodes. | ||
| 695 | */ | ||
| 696 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | ||
| 697 | long n = simple_strtol(cmdline, NULL, 0); | ||
| 698 | |||
| 699 | num_nodes = split_nodes_interleave(addr, max_addr, | ||
| 700 | num_phys_nodes, n); | ||
| 701 | if (num_nodes < 0) | ||
| 702 | return num_nodes; | ||
| 703 | goto out; | ||
| 704 | } | 629 | } |
| 705 | 630 | ||
| 706 | /* Parse the command line. */ | 631 | if (num_nodes < 0) |
| 707 | for (coeff_flag = 0; ; cmdline++) { | 632 | return num_nodes; |
| 708 | if (*cmdline && isdigit(*cmdline)) { | ||
| 709 | num = num * 10 + *cmdline - '0'; | ||
| 710 | continue; | ||
| 711 | } | ||
| 712 | if (*cmdline == '*') { | ||
| 713 | if (num > 0) | ||
| 714 | coeff = num; | ||
| 715 | coeff_flag = 1; | ||
| 716 | } | ||
| 717 | if (!*cmdline || *cmdline == ',') { | ||
| 718 | if (!coeff_flag) | ||
| 719 | coeff = 1; | ||
| 720 | /* | ||
| 721 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
| 722 | * Command-line coefficients are in megabytes. | ||
| 723 | */ | ||
| 724 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
| 725 | if (size) | ||
| 726 | for (i = 0; i < coeff; i++, num_nodes++) | ||
| 727 | if (setup_node_range(num_nodes, &addr, | ||
| 728 | size, max_addr) < 0) | ||
| 729 | goto done; | ||
| 730 | if (!*cmdline) | ||
| 731 | break; | ||
| 732 | coeff_flag = 0; | ||
| 733 | coeff = -1; | ||
| 734 | } | ||
| 735 | num = 0; | ||
| 736 | } | ||
| 737 | done: | ||
| 738 | if (!num_nodes) | ||
| 739 | return -1; | ||
| 740 | /* Fill remainder of system RAM, if appropriate. */ | ||
| 741 | if (addr < max_addr) { | ||
| 742 | if (coeff_flag && coeff < 0) { | ||
| 743 | /* Split remaining nodes into num-sized chunks */ | ||
| 744 | num_nodes += split_nodes_by_size(&addr, max_addr, | ||
| 745 | num_nodes, num); | ||
| 746 | goto out; | ||
| 747 | } | ||
| 748 | switch (*(cmdline - 1)) { | ||
| 749 | case '*': | ||
| 750 | /* Split remaining nodes into coeff chunks */ | ||
| 751 | if (coeff <= 0) | ||
| 752 | break; | ||
| 753 | num_nodes += split_nodes_equally(&addr, max_addr, | ||
| 754 | num_nodes, coeff); | ||
| 755 | break; | ||
| 756 | case ',': | ||
| 757 | /* Do not allocate remaining system RAM */ | ||
| 758 | break; | ||
| 759 | default: | ||
| 760 | /* Give one final node */ | ||
| 761 | setup_node_range(num_nodes, &addr, max_addr - addr, | ||
| 762 | max_addr); | ||
| 763 | num_nodes++; | ||
| 764 | } | ||
| 765 | } | ||
| 766 | out: | ||
| 767 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); | 633 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); |
| 768 | if (memnode_shift < 0) { | 634 | if (memnode_shift < 0) { |
| 769 | memnode_shift = 0; | 635 | memnode_shift = 0; |
