diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 115 |
1 files changed, 2 insertions, 113 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2b3bf6767d54..de7c6e43b1c9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4977,72 +4977,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) | |||
4977 | } | 4977 | } |
4978 | 4978 | ||
4979 | /* | 4979 | /* |
4980 | * Helper functions to size the waitqueue hash table. | ||
4981 | * Essentially these want to choose hash table sizes sufficiently | ||
4982 | * large so that collisions trying to wait on pages are rare. | ||
4983 | * But in fact, the number of active page waitqueues on typical | ||
4984 | * systems is ridiculously low, less than 200. So this is even | ||
4985 | * conservative, even though it seems large. | ||
4986 | * | ||
4987 | * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to | ||
4988 | * waitqueues, i.e. the size of the waitq table given the number of pages. | ||
4989 | */ | ||
4990 | #define PAGES_PER_WAITQUEUE 256 | ||
4991 | |||
4992 | #ifndef CONFIG_MEMORY_HOTPLUG | ||
4993 | static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) | ||
4994 | { | ||
4995 | unsigned long size = 1; | ||
4996 | |||
4997 | pages /= PAGES_PER_WAITQUEUE; | ||
4998 | |||
4999 | while (size < pages) | ||
5000 | size <<= 1; | ||
5001 | |||
5002 | /* | ||
5003 | * Once we have dozens or even hundreds of threads sleeping | ||
5004 | * on IO we've got bigger problems than wait queue collision. | ||
5005 | * Limit the size of the wait table to a reasonable size. | ||
5006 | */ | ||
5007 | size = min(size, 4096UL); | ||
5008 | |||
5009 | return max(size, 4UL); | ||
5010 | } | ||
5011 | #else | ||
5012 | /* | ||
5013 | * A zone's size might be changed by hot-add, so it is not possible to determine | ||
5014 | * a suitable size for its wait_table. So we use the maximum size now. | ||
5015 | * | ||
5016 | * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie: | ||
5017 | * | ||
5018 | * i386 (preemption config) : 4096 x 16 = 64Kbyte. | ||
5019 | * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte. | ||
5020 | * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte. | ||
5021 | * | ||
5022 | * The maximum entries are prepared when a zone's memory is (512K + 256) pages | ||
5023 | * or more by the traditional way. (See above). It equals: | ||
5024 | * | ||
5025 | * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte. | ||
5026 | * ia64(16K page size) : = ( 8G + 4M)byte. | ||
5027 | * powerpc (64K page size) : = (32G +16M)byte. | ||
5028 | */ | ||
5029 | static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) | ||
5030 | { | ||
5031 | return 4096UL; | ||
5032 | } | ||
5033 | #endif | ||
5034 | |||
5035 | /* | ||
5036 | * This is an integer logarithm so that shifts can be used later | ||
5037 | * to extract the more random high bits from the multiplicative | ||
5038 | * hash function before the remainder is taken. | ||
5039 | */ | ||
5040 | static inline unsigned long wait_table_bits(unsigned long size) | ||
5041 | { | ||
5042 | return ffz(~size); | ||
5043 | } | ||
5044 | |||
5045 | /* | ||
5046 | * Initially all pages are reserved - free ones are freed | 4980 | * Initially all pages are reserved - free ones are freed |
5047 | * up by free_all_bootmem() once the early boot process is | 4981 | * up by free_all_bootmem() once the early boot process is |
5048 | * done. Non-atomic initialization, single-pass. | 4982 | * done. Non-atomic initialization, single-pass. |
@@ -5304,49 +5238,6 @@ void __init setup_per_cpu_pageset(void) | |||
5304 | alloc_percpu(struct per_cpu_nodestat); | 5238 | alloc_percpu(struct per_cpu_nodestat); |
5305 | } | 5239 | } |
5306 | 5240 | ||
5307 | static noinline __ref | ||
5308 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | ||
5309 | { | ||
5310 | int i; | ||
5311 | size_t alloc_size; | ||
5312 | |||
5313 | /* | ||
5314 | * The per-page waitqueue mechanism uses hashed waitqueues | ||
5315 | * per zone. | ||
5316 | */ | ||
5317 | zone->wait_table_hash_nr_entries = | ||
5318 | wait_table_hash_nr_entries(zone_size_pages); | ||
5319 | zone->wait_table_bits = | ||
5320 | wait_table_bits(zone->wait_table_hash_nr_entries); | ||
5321 | alloc_size = zone->wait_table_hash_nr_entries | ||
5322 | * sizeof(wait_queue_head_t); | ||
5323 | |||
5324 | if (!slab_is_available()) { | ||
5325 | zone->wait_table = (wait_queue_head_t *) | ||
5326 | memblock_virt_alloc_node_nopanic( | ||
5327 | alloc_size, zone->zone_pgdat->node_id); | ||
5328 | } else { | ||
5329 | /* | ||
5330 | * This case means that a zone whose size was 0 gets new memory | ||
5331 | * via memory hot-add. | ||
5332 | * But it may be the case that a new node was hot-added. In | ||
5333 | * this case vmalloc() will not be able to use this new node's | ||
5334 | * memory - this wait_table must be initialized to use this new | ||
5335 | * node itself as well. | ||
5336 | * To use this new node's memory, further consideration will be | ||
5337 | * necessary. | ||
5338 | */ | ||
5339 | zone->wait_table = vmalloc(alloc_size); | ||
5340 | } | ||
5341 | if (!zone->wait_table) | ||
5342 | return -ENOMEM; | ||
5343 | |||
5344 | for (i = 0; i < zone->wait_table_hash_nr_entries; ++i) | ||
5345 | init_waitqueue_head(zone->wait_table + i); | ||
5346 | |||
5347 | return 0; | ||
5348 | } | ||
5349 | |||
5350 | static __meminit void zone_pcp_init(struct zone *zone) | 5241 | static __meminit void zone_pcp_init(struct zone *zone) |
5351 | { | 5242 | { |
5352 | /* | 5243 | /* |
@@ -5367,10 +5258,7 @@ int __meminit init_currently_empty_zone(struct zone *zone, | |||
5367 | unsigned long size) | 5258 | unsigned long size) |
5368 | { | 5259 | { |
5369 | struct pglist_data *pgdat = zone->zone_pgdat; | 5260 | struct pglist_data *pgdat = zone->zone_pgdat; |
5370 | int ret; | 5261 | |
5371 | ret = zone_wait_table_init(zone, size); | ||
5372 | if (ret) | ||
5373 | return ret; | ||
5374 | pgdat->nr_zones = zone_idx(zone) + 1; | 5262 | pgdat->nr_zones = zone_idx(zone) + 1; |
5375 | 5263 | ||
5376 | zone->zone_start_pfn = zone_start_pfn; | 5264 | zone->zone_start_pfn = zone_start_pfn; |
@@ -5382,6 +5270,7 @@ int __meminit init_currently_empty_zone(struct zone *zone, | |||
5382 | zone_start_pfn, (zone_start_pfn + size)); | 5270 | zone_start_pfn, (zone_start_pfn + size)); |
5383 | 5271 | ||
5384 | zone_init_free_lists(zone); | 5272 | zone_init_free_lists(zone); |
5273 | zone->initialized = 1; | ||
5385 | 5274 | ||
5386 | return 0; | 5275 | return 0; |
5387 | } | 5276 | } |