aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mmzone.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r--include/linux/mmzone.h99
1 files changed, 88 insertions, 11 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 59855b8718a0..e339a7345f25 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -218,13 +218,9 @@ struct zone {
218 * under - it drives the swappiness decision: whether to unmap mapped 218 * under - it drives the swappiness decision: whether to unmap mapped
219 * pages. 219 * pages.
220 * 220 *
221 * temp_priority is used to remember the scanning priority at which 221 * Access to both this field is quite racy even on uniprocessor. But
222 * this zone was successfully refilled to free_pages == pages_high.
223 *
224 * Access to both these fields is quite racy even on uniprocessor. But
225 * it is expected to average out OK. 222 * it is expected to average out OK.
226 */ 223 */
227 int temp_priority;
228 int prev_priority; 224 int prev_priority;
229 225
230 226
@@ -282,7 +278,7 @@ struct zone {
282 /* 278 /*
283 * rarely used fields: 279 * rarely used fields:
284 */ 280 */
285 char *name; 281 const char *name;
286} ____cacheline_internodealigned_in_smp; 282} ____cacheline_internodealigned_in_smp;
287 283
288/* 284/*
@@ -292,19 +288,94 @@ struct zone {
292 */ 288 */
293#define DEF_PRIORITY 12 289#define DEF_PRIORITY 12
294 290
291/* Maximum number of zones on a zonelist */
292#define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES)
293
294#ifdef CONFIG_NUMA
295/*
296 * We cache key information from each zonelist for smaller cache
297 * footprint when scanning for free pages in get_page_from_freelist().
298 *
299 * 1) The BITMAP fullzones tracks which zones in a zonelist have come
300 * up short of free memory since the last time (last_fullzone_zap)
301 * we zero'd fullzones.
302 * 2) The array z_to_n[] maps each zone in the zonelist to its node
303 * id, so that we can efficiently evaluate whether that node is
304 * set in the current tasks mems_allowed.
305 *
306 * Both fullzones and z_to_n[] are one-to-one with the zonelist,
307 * indexed by a zones offset in the zonelist zones[] array.
308 *
309 * The get_page_from_freelist() routine does two scans. During the
310 * first scan, we skip zones whose corresponding bit in 'fullzones'
311 * is set or whose corresponding node in current->mems_allowed (which
312 * comes from cpusets) is not set. During the second scan, we bypass
313 * this zonelist_cache, to ensure we look methodically at each zone.
314 *
315 * Once per second, we zero out (zap) fullzones, forcing us to
316 * reconsider nodes that might have regained more free memory.
317 * The field last_full_zap is the time we last zapped fullzones.
318 *
319 * This mechanism reduces the amount of time we waste repeatedly
320 * reexaming zones for free memory when they just came up low on
321 * memory momentarilly ago.
322 *
323 * The zonelist_cache struct members logically belong in struct
324 * zonelist. However, the mempolicy zonelists constructed for
325 * MPOL_BIND are intentionally variable length (and usually much
326 * shorter). A general purpose mechanism for handling structs with
327 * multiple variable length members is more mechanism than we want
328 * here. We resort to some special case hackery instead.
329 *
330 * The MPOL_BIND zonelists don't need this zonelist_cache (in good
331 * part because they are shorter), so we put the fixed length stuff
332 * at the front of the zonelist struct, ending in a variable length
333 * zones[], as is needed by MPOL_BIND.
334 *
335 * Then we put the optional zonelist cache on the end of the zonelist
336 * struct. This optional stuff is found by a 'zlcache_ptr' pointer in
337 * the fixed length portion at the front of the struct. This pointer
338 * both enables us to find the zonelist cache, and in the case of
339 * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL)
340 * to know that the zonelist cache is not there.
341 *
342 * The end result is that struct zonelists come in two flavors:
343 * 1) The full, fixed length version, shown below, and
344 * 2) The custom zonelists for MPOL_BIND.
345 * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache.
346 *
347 * Even though there may be multiple CPU cores on a node modifying
348 * fullzones or last_full_zap in the same zonelist_cache at the same
349 * time, we don't lock it. This is just hint data - if it is wrong now
350 * and then, the allocator will still function, perhaps a bit slower.
351 */
352
353
354struct zonelist_cache {
355 unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */
356 DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */
357 unsigned long last_full_zap; /* when last zap'd (jiffies) */
358};
359#else
360struct zonelist_cache;
361#endif
362
295/* 363/*
296 * One allocation request operates on a zonelist. A zonelist 364 * One allocation request operates on a zonelist. A zonelist
297 * is a list of zones, the first one is the 'goal' of the 365 * is a list of zones, the first one is the 'goal' of the
298 * allocation, the other zones are fallback zones, in decreasing 366 * allocation, the other zones are fallback zones, in decreasing
299 * priority. 367 * priority.
300 * 368 *
301 * Right now a zonelist takes up less than a cacheline. We never 369 * If zlcache_ptr is not NULL, then it is just the address of zlcache,
302 * modify it apart from boot-up, and only a few indices are used, 370 * as explained above. If zlcache_ptr is NULL, there is no zlcache.
303 * so despite the zonelist table being relatively big, the cache
304 * footprint of this construct is very small.
305 */ 371 */
372
306struct zonelist { 373struct zonelist {
307 struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited 374 struct zonelist_cache *zlcache_ptr; // NULL or &zlcache
375 struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited
376#ifdef CONFIG_NUMA
377 struct zonelist_cache zlcache; // optional ...
378#endif
308}; 379};
309 380
310#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 381#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -674,6 +745,12 @@ void sparse_init(void);
674#define sparse_index_init(_sec, _nid) do {} while (0) 745#define sparse_index_init(_sec, _nid) do {} while (0)
675#endif /* CONFIG_SPARSEMEM */ 746#endif /* CONFIG_SPARSEMEM */
676 747
748#ifdef CONFIG_NODES_SPAN_OTHER_NODES
749#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid))
750#else
751#define early_pfn_in_nid(pfn, nid) (1)
752#endif
753
677#ifndef early_pfn_valid 754#ifndef early_pfn_valid
678#define early_pfn_valid(pfn) (1) 755#define early_pfn_valid(pfn) (1)
679#endif 756#endif