diff options
author | Hideo AOKI <haoki@redhat.com> | 2006-04-11 01:52:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-04-11 09:18:32 -0400 |
commit | cb45b0e966cbe747b6189c15b108901cc7d6c97c (patch) | |
tree | 0402d4809ec175e80b083f7a713ec32c0109baad /mm/page_alloc.c | |
parent | e23ca00bf1b1c6c0f04702cb4d29e275ab8dc330 (diff) |
[PATCH] overcommit: add calculate_totalreserve_pages()
These patches are an enhancement of OVERCOMMIT_GUESS algorithm in
__vm_enough_memory().
- why the kernel needed patching
When the kernel can't allocate anonymous pages in practice, currnet
OVERCOMMIT_GUESS could return success. This implementation might be
the cause of oom kill in memory pressure situation.
If the Linux runs with page reservation features like
/proc/sys/vm/lowmem_reserve_ratio and without swap region, I think
the oom kill occurs easily.
- the overall design approach in the patch
When the OVERCOMMET_GUESS algorithm calculates number of free pages,
the reserved free pages are regarded as non-free pages.
This change helps to avoid the pitfall that the number of free pages
become less than the number which the kernel tries to keep free.
- testing results
I tested the patches using my test kernel module.
If the patches aren't applied to the kernel, __vm_enough_memory()
returns success in the situation but autual page allocation is
failed.
On the other hand, if the patches are applied to the kernel, memory
allocation failure is avoided since __vm_enough_memory() returns
failure in the situation.
I checked that on i386 SMP 16GB memory machine. I haven't tested on
nommu environment currently.
This patch adds totalreserve_pages for __vm_enough_memory().
Calculate_totalreserve_pages() checks maximum lowmem_reserve pages and
pages_high in each zone. Finally, the function stores the sum of each
zone to totalreserve_pages.
The totalreserve_pages is calculated when the VM is initilized.
And the variable is updated when /proc/sys/vm/lowmem_reserve_raito
or /proc/sys/vm/min_free_kbytes are changed.
Signed-off-by: Hideo Aoki <haoki@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b8165e037dee..97d6827c7d66 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; | |||
51 | EXPORT_SYMBOL(node_possible_map); | 51 | EXPORT_SYMBOL(node_possible_map); |
52 | unsigned long totalram_pages __read_mostly; | 52 | unsigned long totalram_pages __read_mostly; |
53 | unsigned long totalhigh_pages __read_mostly; | 53 | unsigned long totalhigh_pages __read_mostly; |
54 | unsigned long totalreserve_pages __read_mostly; | ||
54 | long nr_swap_pages; | 55 | long nr_swap_pages; |
55 | int percpu_pagelist_fraction; | 56 | int percpu_pagelist_fraction; |
56 | 57 | ||
@@ -2477,6 +2478,38 @@ void __init page_alloc_init(void) | |||
2477 | } | 2478 | } |
2478 | 2479 | ||
2479 | /* | 2480 | /* |
2481 | * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio | ||
2482 | * or min_free_kbytes changes. | ||
2483 | */ | ||
2484 | static void calculate_totalreserve_pages(void) | ||
2485 | { | ||
2486 | struct pglist_data *pgdat; | ||
2487 | unsigned long reserve_pages = 0; | ||
2488 | int i, j; | ||
2489 | |||
2490 | for_each_online_pgdat(pgdat) { | ||
2491 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
2492 | struct zone *zone = pgdat->node_zones + i; | ||
2493 | unsigned long max = 0; | ||
2494 | |||
2495 | /* Find valid and maximum lowmem_reserve in the zone */ | ||
2496 | for (j = i; j < MAX_NR_ZONES; j++) { | ||
2497 | if (zone->lowmem_reserve[j] > max) | ||
2498 | max = zone->lowmem_reserve[j]; | ||
2499 | } | ||
2500 | |||
2501 | /* we treat pages_high as reserved pages. */ | ||
2502 | max += zone->pages_high; | ||
2503 | |||
2504 | if (max > zone->present_pages) | ||
2505 | max = zone->present_pages; | ||
2506 | reserve_pages += max; | ||
2507 | } | ||
2508 | } | ||
2509 | totalreserve_pages = reserve_pages; | ||
2510 | } | ||
2511 | |||
2512 | /* | ||
2480 | * setup_per_zone_lowmem_reserve - called whenever | 2513 | * setup_per_zone_lowmem_reserve - called whenever |
2481 | * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone | 2514 | * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone |
2482 | * has a correct pages reserved value, so an adequate number of | 2515 | * has a correct pages reserved value, so an adequate number of |
@@ -2507,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void) | |||
2507 | } | 2540 | } |
2508 | } | 2541 | } |
2509 | } | 2542 | } |
2543 | |||
2544 | /* update totalreserve_pages */ | ||
2545 | calculate_totalreserve_pages(); | ||
2510 | } | 2546 | } |
2511 | 2547 | ||
2512 | /* | 2548 | /* |
@@ -2561,6 +2597,9 @@ void setup_per_zone_pages_min(void) | |||
2561 | zone->pages_high = zone->pages_min + tmp / 2; | 2597 | zone->pages_high = zone->pages_min + tmp / 2; |
2562 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 2598 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
2563 | } | 2599 | } |
2600 | |||
2601 | /* update totalreserve_pages */ | ||
2602 | calculate_totalreserve_pages(); | ||
2564 | } | 2603 | } |
2565 | 2604 | ||
2566 | /* | 2605 | /* |