aboutsummaryrefslogtreecommitdiffstats
path: root/security/integrity/ima/ima_policy.c
diff options
context:
space:
mode:
Diffstat (limited to 'security/integrity/ima/ima_policy.c')
-rw-r--r--security/integrity/ima/ima_policy.c34
1 files changed, 33 insertions, 1 deletions
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 49998f90e441..c771a2036691 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -245,6 +245,9 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry,
245{ 245{
246 int result; 246 int result;
247 247
248 if (entry->lsm[lsm_rule].rule)
249 return -EINVAL;
250
248 entry->lsm[lsm_rule].type = audit_type; 251 entry->lsm[lsm_rule].type = audit_type;
249 result = security_filter_rule_init(entry->lsm[lsm_rule].type, 252 result = security_filter_rule_init(entry->lsm[lsm_rule].type,
250 Audit_equal, args, 253 Audit_equal, args,
@@ -260,6 +263,7 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry)
260 263
261 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_INTEGRITY_RULE); 264 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_INTEGRITY_RULE);
262 265
266 entry->uid = -1;
263 entry->action = -1; 267 entry->action = -1;
264 while ((p = strsep(&rule, " ")) != NULL) { 268 while ((p = strsep(&rule, " ")) != NULL) {
265 substring_t args[MAX_OPT_ARGS]; 269 substring_t args[MAX_OPT_ARGS];
@@ -274,14 +278,26 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry)
274 switch (token) { 278 switch (token) {
275 case Opt_measure: 279 case Opt_measure:
276 audit_log_format(ab, "%s ", "measure"); 280 audit_log_format(ab, "%s ", "measure");
281
282 if (entry->action != UNKNOWN)
283 result = -EINVAL;
284
277 entry->action = MEASURE; 285 entry->action = MEASURE;
278 break; 286 break;
279 case Opt_dont_measure: 287 case Opt_dont_measure:
280 audit_log_format(ab, "%s ", "dont_measure"); 288 audit_log_format(ab, "%s ", "dont_measure");
289
290 if (entry->action != UNKNOWN)
291 result = -EINVAL;
292
281 entry->action = DONT_MEASURE; 293 entry->action = DONT_MEASURE;
282 break; 294 break;
283 case Opt_func: 295 case Opt_func:
284 audit_log_format(ab, "func=%s ", args[0].from); 296 audit_log_format(ab, "func=%s ", args[0].from);
297
298 if (entry->func)
299 result = -EINVAL;
300
285 if (strcmp(args[0].from, "FILE_CHECK") == 0) 301 if (strcmp(args[0].from, "FILE_CHECK") == 0)
286 entry->func = FILE_CHECK; 302 entry->func = FILE_CHECK;
287 /* PATH_CHECK is for backwards compat */ 303 /* PATH_CHECK is for backwards compat */
@@ -298,6 +314,10 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry)
298 break; 314 break;
299 case Opt_mask: 315 case Opt_mask:
300 audit_log_format(ab, "mask=%s ", args[0].from); 316 audit_log_format(ab, "mask=%s ", args[0].from);
317
318 if (entry->mask)
319 result = -EINVAL;
320
301 if ((strcmp(args[0].from, "MAY_EXEC")) == 0) 321 if ((strcmp(args[0].from, "MAY_EXEC")) == 0)
302 entry->mask = MAY_EXEC; 322 entry->mask = MAY_EXEC;
303 else if (strcmp(args[0].from, "MAY_WRITE") == 0) 323 else if (strcmp(args[0].from, "MAY_WRITE") == 0)
@@ -313,6 +333,12 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry)
313 break; 333 break;
314 case Opt_fsmagic: 334 case Opt_fsmagic:
315 audit_log_format(ab, "fsmagic=%s ", args[0].from); 335 audit_log_format(ab, "fsmagic=%s ", args[0].from);
336
337 if (entry->fsmagic) {
338 result = -EINVAL;
339 break;
340 }
341
316 result = strict_strtoul(args[0].from, 16, 342 result = strict_strtoul(args[0].from, 16,
317 &entry->fsmagic); 343 &entry->fsmagic);
318 if (!result) 344 if (!result)
@@ -320,6 +346,12 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry)
320 break; 346 break;
321 case Opt_uid: 347 case Opt_uid:
322 audit_log_format(ab, "uid=%s ", args[0].from); 348 audit_log_format(ab, "uid=%s ", args[0].from);
349
350 if (entry->uid != -1) {
351 result = -EINVAL;
352 break;
353 }
354
323 result = strict_strtoul(args[0].from, 10, &lnum); 355 result = strict_strtoul(args[0].from, 10, &lnum);
324 if (!result) { 356 if (!result) {
325 entry->uid = (uid_t) lnum; 357 entry->uid = (uid_t) lnum;
@@ -370,7 +402,7 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry)
370 break; 402 break;
371 } 403 }
372 } 404 }
373 if (entry->action == UNKNOWN) 405 if (!result && (entry->action == UNKNOWN))
374 result = -EINVAL; 406 result = -EINVAL;
375 407
376 audit_log_format(ab, "res=%d", !!result); 408 audit_log_format(ab, "res=%d", !!result);
1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
#include <linux/stat.h>
#include <linux/sysctl.h>
#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
#include <linux/syscalls.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/fs.h>
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
#include <linux/file.h>
#include <linux/ctype.h>
#include <linux/netdevice.h>
#include <linux/kernel.h>
#include <linux/slab.h>

#ifdef CONFIG_SYSCTL_SYSCALL

struct bin_table;
typedef ssize_t bin_convert_t(struct file *file,
	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen);

static bin_convert_t bin_dir;
static bin_convert_t bin_string;
static bin_convert_t bin_intvec;
static bin_convert_t bin_ulongvec;
static bin_convert_t bin_uuid;
static bin_convert_t bin_dn_node_address;

#define CTL_DIR   bin_dir
#define CTL_STR   bin_string
#define CTL_INT   bin_intvec
#define CTL_ULONG bin_ulongvec
#define CTL_UUID  bin_uuid
#define CTL_DNADR bin_dn_node_address

#define BUFSZ 256

struct bin_table {
	bin_convert_t		*convert;
	int			ctl_name;
	const char		*procname;
	const struct bin_table	*child;
};

static const struct bin_table bin_random_table[] = {
	{ CTL_INT,	RANDOM_POOLSIZE,	"poolsize" },
	{ CTL_INT,	RANDOM_ENTROPY_COUNT,	"entropy_avail" },
	{ CTL_INT,	RANDOM_READ_THRESH,	"read_wakeup_threshold" },
	{ CTL_INT,	RANDOM_WRITE_THRESH,	"write_wakeup_threshold" },
	{ CTL_UUID,	RANDOM_BOOT_ID,		"boot_id" },
	{ CTL_UUID,	RANDOM_UUID,		"uuid" },
	{}
};

static const struct bin_table bin_pty_table[] = {
	{ CTL_INT,	PTY_MAX,	"max" },
	{ CTL_INT,	PTY_NR,		"nr" },
	{}
};

static const struct bin_table bin_kern_table[] = {
	{ CTL_STR,	KERN_OSTYPE,			"ostype" },
	{ CTL_STR,	KERN_OSRELEASE,			"osrelease" },
	/* KERN_OSREV not used */
	{ CTL_STR,	KERN_VERSION,			"version" },
	/* KERN_SECUREMASK not used */
	/* KERN_PROF not used */
	{ CTL_STR,	KERN_NODENAME,			"hostname" },
	{ CTL_STR,	KERN_DOMAINNAME,		"domainname" },

	{ CTL_INT,	KERN_PANIC,			"panic" },
	{ CTL_INT,	KERN_REALROOTDEV,		"real-root-dev" },

	{ CTL_STR,	KERN_SPARC_REBOOT,		"reboot-cmd" },
	{ CTL_INT,	KERN_CTLALTDEL,			"ctrl-alt-del" },
	{ CTL_INT,	KERN_PRINTK,			"printk" },

	/* KERN_NAMETRANS not used */
	/* KERN_PPC_HTABRECLAIM not used */
	/* KERN_PPC_ZEROPAGED not used */
	{ CTL_INT,	KERN_PPC_POWERSAVE_NAP,		"powersave-nap" },

	{ CTL_STR,	KERN_MODPROBE,			"modprobe" },
	{ CTL_INT,	KERN_SG_BIG_BUFF,		"sg-big-buff" },
	{ CTL_INT,	KERN_ACCT,			"acct" },
	/* KERN_PPC_L2CR "l2cr" no longer used */

	/* KERN_RTSIGNR not used */
	/* KERN_RTSIGMAX not used */

	{ CTL_ULONG,	KERN_SHMMAX,			"shmmax" },
	{ CTL_INT,	KERN_MSGMAX,			"msgmax" },
	{ CTL_INT,	KERN_MSGMNB,			"msgmnb" },
	/* KERN_MSGPOOL not used*/
	{ CTL_INT,	KERN_SYSRQ,			"sysrq" },
	{ CTL_INT,	KERN_MAX_THREADS,		"threads-max" },
	{ CTL_DIR,	KERN_RANDOM,			"random",	bin_random_table },
	{ CTL_ULONG,	KERN_SHMALL,			"shmall" },
	{ CTL_INT,	KERN_MSGMNI,			"msgmni" },
	{ CTL_INT,	KERN_SEM,			"sem" },
	{ CTL_INT,	KERN_SPARC_STOP_A,		"stop-a" },
	{ CTL_INT,	KERN_SHMMNI,			"shmmni" },

	{ CTL_INT,	KERN_OVERFLOWUID,		"overflowuid" },
	{ CTL_INT,	KERN_OVERFLOWGID,		"overflowgid" },

	{ CTL_STR,	KERN_HOTPLUG,			"hotplug", },
	{ CTL_INT,	KERN_IEEE_EMULATION_WARNINGS,	"ieee_emulation_warnings" },

	{ CTL_INT,	KERN_S390_USER_DEBUG_LOGGING,	"userprocess_debug" },
	{ CTL_INT,	KERN_CORE_USES_PID,		"core_uses_pid" },
	/* KERN_TAINTED "tainted" no longer used */
	{ CTL_INT,	KERN_CADPID,			"cad_pid" },
	{ CTL_INT,	KERN_PIDMAX,			"pid_max" },
	{ CTL_STR,	KERN_CORE_PATTERN,		"core_pattern" },
	{ CTL_INT,	KERN_PANIC_ON_OOPS,		"panic_on_oops" },
	{ CTL_INT,	KERN_HPPA_PWRSW,		"soft-power" },
	{ CTL_INT,	KERN_HPPA_UNALIGNED,		"unaligned-trap" },

	{ CTL_INT,	KERN_PRINTK_RATELIMIT,		"printk_ratelimit" },
	{ CTL_INT,	KERN_PRINTK_RATELIMIT_BURST,	"printk_ratelimit_burst" },

	{ CTL_DIR,	KERN_PTY,			"pty",		bin_pty_table },
	{ CTL_INT,	KERN_NGROUPS_MAX,		"ngroups_max" },
	{ CTL_INT,	KERN_SPARC_SCONS_PWROFF,	"scons-poweroff" },
	/* KERN_HZ_TIMER "hz_timer" no longer used */
	{ CTL_INT,	KERN_UNKNOWN_NMI_PANIC,		"unknown_nmi_panic" },
	{ CTL_INT,	KERN_BOOTLOADER_TYPE,		"bootloader_type" },
	{ CTL_INT,	KERN_RANDOMIZE,			"randomize_va_space" },

	{ CTL_INT,	KERN_SPIN_RETRY,		"spin_retry" },
	/* KERN_ACPI_VIDEO_FLAGS "acpi_video_flags" no longer used */
	{ CTL_INT,	KERN_IA64_UNALIGNED,		"ignore-unaligned-usertrap" },
	{ CTL_INT,	KERN_COMPAT_LOG,		"compat-log" },
	{ CTL_INT,	KERN_MAX_LOCK_DEPTH,		"max_lock_depth" },
	{ CTL_INT,	KERN_PANIC_ON_NMI,		"panic_on_unrecovered_nmi" },
	{}
};

static const struct bin_table bin_vm_table[] = {
	{ CTL_INT,	VM_OVERCOMMIT_MEMORY,		"overcommit_memory" },
	{ CTL_INT,	VM_PAGE_CLUSTER,		"page-cluster" },
	{ CTL_INT,	VM_DIRTY_BACKGROUND,		"dirty_background_ratio" },
	{ CTL_INT,	VM_DIRTY_RATIO,			"dirty_ratio" },
	/* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */
	/* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */
	{ CTL_INT,	VM_NR_PDFLUSH_THREADS,		"nr_pdflush_threads" },
	{ CTL_INT,	VM_OVERCOMMIT_RATIO,		"overcommit_ratio" },
	/* VM_PAGEBUF unused */
	/* VM_HUGETLB_PAGES "nr_hugepages" no longer used */
	{ CTL_INT,	VM_SWAPPINESS,			"swappiness" },
	{ CTL_INT,	VM_LOWMEM_RESERVE_RATIO,	"lowmem_reserve_ratio" },
	{ CTL_INT,	VM_MIN_FREE_KBYTES,		"min_free_kbytes" },
	{ CTL_INT,	VM_MAX_MAP_COUNT,		"max_map_count" },
	{ CTL_INT,	VM_LAPTOP_MODE,			"laptop_mode" },
	{ CTL_INT,	VM_BLOCK_DUMP,			"block_dump" },
	{ CTL_INT,	VM_HUGETLB_GROUP,		"hugetlb_shm_group" },
	{ CTL_INT,	VM_VFS_CACHE_PRESSURE,	"vfs_cache_pressure" },
	{ CTL_INT,	VM_LEGACY_VA_LAYOUT,		"legacy_va_layout" },
	/* VM_SWAP_TOKEN_TIMEOUT unused */
	{ CTL_INT,	VM_DROP_PAGECACHE,		"drop_caches" },
	{ CTL_INT,	VM_PERCPU_PAGELIST_FRACTION,	"percpu_pagelist_fraction" },
	{ CTL_INT,	VM_ZONE_RECLAIM_MODE,		"zone_reclaim_mode" },
	{ CTL_INT,	VM_MIN_UNMAPPED,		"min_unmapped_ratio" },
	{ CTL_INT,	VM_PANIC_ON_OOM,		"panic_on_oom" },
	{ CTL_INT,	VM_VDSO_ENABLED,		"vdso_enabled" },
	{ CTL_INT,	VM_MIN_SLAB,			"min_slab_ratio" },

	{}
};

static const struct bin_table bin_net_core_table[] = {
	{ CTL_INT,	NET_CORE_WMEM_MAX,	"wmem_max" },
	{ CTL_INT,	NET_CORE_RMEM_MAX,	"rmem_max" },
	{ CTL_INT,	NET_CORE_WMEM_DEFAULT,	"wmem_default" },
	{ CTL_INT,	NET_CORE_RMEM_DEFAULT,	"rmem_default" },
	/* NET_CORE_DESTROY_DELAY unused */
	{ CTL_INT,	NET_CORE_MAX_BACKLOG,	"netdev_max_backlog" },
	/* NET_CORE_FASTROUTE unused */
	{ CTL_INT,	NET_CORE_MSG_COST,	"message_cost" },
	{ CTL_INT,	NET_CORE_MSG_BURST,	"message_burst" },
	{ CTL_INT,	NET_CORE_OPTMEM_MAX,	"optmem_max" },
	/* NET_CORE_HOT_LIST_LENGTH unused */
	/* NET_CORE_DIVERT_VERSION unused */
	/* NET_CORE_NO_CONG_THRESH unused */
	/* NET_CORE_NO_CONG unused */
	/* NET_CORE_LO_CONG unused */
	/* NET_CORE_MOD_CONG unused */
	{ CTL_INT,	NET_CORE_DEV_WEIGHT,	"dev_weight" },
	{ CTL_INT,	NET_CORE_SOMAXCONN,	"somaxconn" },
	{ CTL_INT,	NET_CORE_BUDGET,	"netdev_budget" },
	{ CTL_INT,	NET_CORE_AEVENT_ETIME,	"xfrm_aevent_etime" },
	{ CTL_INT,	NET_CORE_AEVENT_RSEQTH,	"xfrm_aevent_rseqth" },
	{ CTL_INT,	NET_CORE_WARNINGS,	"warnings" },
	{},
};

static const struct bin_table bin_net_unix_table[] = {
	/* NET_UNIX_DESTROY_DELAY unused */
	/* NET_UNIX_DELETE_DELAY unused */
	{ CTL_INT,	NET_UNIX_MAX_DGRAM_QLEN,	"max_dgram_qlen" },
	{}
};

static const struct bin_table bin_net_ipv4_route_table[] = {
	{ CTL_INT,	NET_IPV4_ROUTE_FLUSH,			"flush" },
	/* NET_IPV4_ROUTE_MIN_DELAY "min_delay" no longer used */
	/* NET_IPV4_ROUTE_MAX_DELAY "max_delay" no longer used */
	{ CTL_INT,	NET_IPV4_ROUTE_GC_THRESH,		"gc_thresh" },
	{ CTL_INT,	NET_IPV4_ROUTE_MAX_SIZE,		"max_size" },
	{ CTL_INT,	NET_IPV4_ROUTE_GC_MIN_INTERVAL,		"gc_min_interval" },
	{ CTL_INT,	NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS,	"gc_min_interval_ms" },
	{ CTL_INT,	NET_IPV4_ROUTE_GC_TIMEOUT,		"gc_timeout" },
	/* NET_IPV4_ROUTE_GC_INTERVAL "gc_interval" no longer used */
	{ CTL_INT,	NET_IPV4_ROUTE_REDIRECT_LOAD,		"redirect_load" },
	{ CTL_INT,	NET_IPV4_ROUTE_REDIRECT_NUMBER,		"redirect_number" },
	{ CTL_INT,	NET_IPV4_ROUTE_REDIRECT_SILENCE,	"redirect_silence" },
	{ CTL_INT,	NET_IPV4_ROUTE_ERROR_COST,		"error_cost" },
	{ CTL_INT,	NET_IPV4_ROUTE_ERROR_BURST,		"error_burst" },
	{ CTL_INT,	NET_IPV4_ROUTE_GC_ELASTICITY,		"gc_elasticity" },
	{ CTL_INT,	NET_IPV4_ROUTE_MTU_EXPIRES,		"mtu_expires" },
	{ CTL_INT,	NET_IPV4_ROUTE_MIN_PMTU,		"min_pmtu" },
	{ CTL_INT,	NET_IPV4_ROUTE_MIN_ADVMSS,		"min_adv_mss" },
	{}
};

static const struct bin_table bin_net_ipv4_conf_vars_table[] = {
	{ CTL_INT,	NET_IPV4_CONF_FORWARDING,		"forwarding" },
	{ CTL_INT,	NET_IPV4_CONF_MC_FORWARDING,		"mc_forwarding" },

	{ CTL_INT,	NET_IPV4_CONF_ACCEPT_REDIRECTS,		"accept_redirects" },
	{ CTL_INT,	NET_IPV4_CONF_SECURE_REDIRECTS,		"secure_redirects" },
	{ CTL_INT,	NET_IPV4_CONF_SEND_REDIRECTS,		"send_redirects" },
	{ CTL_INT,	NET_IPV4_CONF_SHARED_MEDIA,		"shared_media" },
	{ CTL_INT,	NET_IPV4_CONF_RP_FILTER,		"rp_filter" },
	{ CTL_INT,	NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,	"accept_source_route" },
	{ CTL_INT,	NET_IPV4_CONF_PROXY_ARP,		"proxy_arp" },
	{ CTL_INT,	NET_IPV4_CONF_MEDIUM_ID,		"medium_id" },
	{ CTL_INT,	NET_IPV4_CONF_BOOTP_RELAY,		"bootp_relay" },
	{ CTL_INT,	NET_IPV4_CONF_LOG_MARTIANS,		"log_martians" },
	{ CTL_INT,	NET_IPV4_CONF_TAG,			"tag" },
	{ CTL_INT,	NET_IPV4_CONF_ARPFILTER,		"arp_filter" },
	{ CTL_INT,	NET_IPV4_CONF_ARP_ANNOUNCE,		"arp_announce" },
	{ CTL_INT,	NET_IPV4_CONF_ARP_IGNORE,		"arp_ignore" },l opt">;
	enum zone_type classzone_idx;
} pg_data_t;

#define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
#ifdef CONFIG_FLAT_NODE_MEM_MAP
#define pgdat_page_nr(pgdat, pagenr)	((pgdat)->node_mem_map + (pagenr))
#else
#define pgdat_page_nr(pgdat, pagenr)	pfn_to_page((pgdat)->node_start_pfn + (pagenr))
#endif
#define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))

#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)

#define node_end_pfn(nid) ({\
	pg_data_t *__pgdat = NODE_DATA(nid);\
	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
})

#include <linux/memory_hotplug.h>

extern struct mutex zonelists_mutex;
void build_all_zonelists(void *data);
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
		int classzone_idx, int alloc_flags);
bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
		int classzone_idx, int alloc_flags);
enum memmap_context {
	MEMMAP_EARLY,
	MEMMAP_HOTPLUG,
};
extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
				     unsigned long size,
				     enum memmap_context context);

#ifdef CONFIG_HAVE_MEMORY_PRESENT
void memory_present(int nid, unsigned long start, unsigned long end);
#else
static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
#endif

#ifdef CONFIG_HAVE_MEMORYLESS_NODES
int local_memory_node(int node_id);
#else
static inline int local_memory_node(int node_id) { return node_id; };
#endif

#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
#endif

/*
 * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
 */
#define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)

static inline int populated_zone(struct zone *zone)
{
	return (!!zone->present_pages);
}

extern int movable_zone;

static inline int zone_movable_is_highmem(void)
{
#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP)
	return movable_zone == ZONE_HIGHMEM;
#else
	return 0;
#endif
}

static inline int is_highmem_idx(enum zone_type idx)
{
#ifdef CONFIG_HIGHMEM
	return (idx == ZONE_HIGHMEM ||
		(idx == ZONE_MOVABLE && zone_movable_is_highmem()));
#else
	return 0;
#endif
}

static inline int is_normal_idx(enum zone_type idx)
{
	return (idx == ZONE_NORMAL);
}

/**
 * is_highmem - helper function to quickly check if a struct zone is a 
 *              highmem zone or not.  This is an attempt to keep references
 *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
 * @zone - pointer to struct zone variable
 */
static inline int is_highmem(struct zone *zone)
{
#ifdef CONFIG_HIGHMEM
	int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones;
	return zone_off == ZONE_HIGHMEM * sizeof(*zone) ||
	       (zone_off == ZONE_MOVABLE * sizeof(*zone) &&
		zone_movable_is_highmem());
#else
	return 0;
#endif
}

static inline int is_normal(struct zone *zone)
{
	return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL;
}

static inline int is_dma32(struct zone *zone)
{
#ifdef CONFIG_ZONE_DMA32
	return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
#else
	return 0;
#endif
}

static inline int is_dma(struct zone *zone)
{
#ifdef CONFIG_ZONE_DMA
	return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
#else
	return 0;
#endif
}

/* These two functions are used to setup the per zone pages min values */
struct ctl_table;
int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
					void __user *, size_t *, loff_t *);
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
					void __user *, size_t *, loff_t *);
int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
					void __user *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
			void __user *, size_t *, loff_t *);
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
			void __user *, size_t *, loff_t *);

extern int numa_zonelist_order_handler(struct ctl_table *, int,
			void __user *, size_t *, loff_t *);
extern char numa_zonelist_order[];
#define NUMA_ZONELIST_ORDER_LEN 16	/* string buffer size */

#ifndef CONFIG_NEED_MULTIPLE_NODES

extern struct pglist_data contig_page_data;
#define NODE_DATA(nid)		(&contig_page_data)
#define NODE_MEM_MAP(nid)	mem_map

#else /* CONFIG_NEED_MULTIPLE_NODES */

#include <asm/mmzone.h>

#endif /* !CONFIG_NEED_MULTIPLE_NODES */

extern struct pglist_data *first_online_pgdat(void);
extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
extern struct zone *next_zone(struct zone *zone);

/**
 * for_each_online_pgdat - helper macro to iterate over all online nodes
 * @pgdat - pointer to a pg_data_t variable
 */
#define for_each_online_pgdat(pgdat)			\
	for (pgdat = first_online_pgdat();		\
	     pgdat;					\
	     pgdat = next_online_pgdat(pgdat))
/**
 * for_each_zone - helper macro to iterate over all memory zones
 * @zone - pointer to struct zone variable
 *
 * The user only needs to declare the zone variable, for_each_zone
 * fills it in.
 */
#define for_each_zone(zone)			        \
	for (zone = (first_online_pgdat())->node_zones; \
	     zone;					\
	     zone = next_zone(zone))

#define for_each_populated_zone(zone)		        \
	for (zone = (first_online_pgdat())->node_zones; \
	     zone;					\
	     zone = next_zone(zone))			\
		if (!populated_zone(zone))		\
			; /* do nothing */		\
		else

static inline struct zone *zonelist_zone(struct zoneref *zoneref)
{
	return zoneref->zone;
}

static inline int zonelist_zone_idx(struct zoneref *zoneref)
{
	return zoneref->zone_idx;
}

static inline int zonelist_node_idx(struct zoneref *zoneref)
{
#ifdef CONFIG_NUMA
	/* zone_to_nid not available in this context */
	return zoneref->zone->node;
#else
	return 0;
#endif /* CONFIG_NUMA */
}

/**
 * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
 * @z - The cursor used as a starting point for the search
 * @highest_zoneidx - The zone index of the highest zone to return
 * @nodes - An optional nodemask to filter the zonelist with
 * @zone - The first suitable zone found is returned via this parameter
 *
 * This function returns the next zone at or below a given zone index that is
 * within the allowed nodemask using a cursor as the starting point for the
 * search. The zoneref returned is a cursor that represents the current zone
 * being examined. It should be advanced by one before calling
 * next_zones_zonelist again.
 */
struct zoneref *next_zones_zonelist(struct zoneref *z,
					enum zone_type highest_zoneidx,
					nodemask_t *nodes,
					struct zone **zone);

/**
 * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
 * @zonelist - The zonelist to search for a suitable zone
 * @highest_zoneidx - The zone index of the highest zone to return
 * @nodes - An optional nodemask to filter the zonelist with
 * @zone - The first suitable zone found is returned via this parameter
 *
 * This function returns the first zone at or below a given zone index that is
 * within the allowed nodemask. The zoneref returned is a cursor that can be
 * used to iterate the zonelist with next_zones_zonelist by advancing it by
 * one before calling.
 */
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
					enum zone_type highest_zoneidx,
					nodemask_t *nodes,
					struct zone **zone)
{
	return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes,
								zone);
}

/**
 * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
 * @zone - The current zone in the iterator
 * @z - The current pointer within zonelist->zones being iterated
 * @zlist - The zonelist being iterated
 * @highidx - The zone index of the highest zone to return
 * @nodemask - Nodemask allowed by the allocator
 *
 * This iterator iterates though all zones at or below a given zone index and
 * within a given nodemask
 */
#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
	for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone);	\
		zone;							\
		z = next_zones_zonelist(++z, highidx, nodemask, &zone))	\

/**
 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
 * @zone - The current zone in the iterator
 * @z - The current pointer within zonelist->zones being iterated
 * @zlist - The zonelist being iterated
 * @highidx - The zone index of the highest zone to return
 *
 * This iterator iterates though all zones at or below a given zone index.
 */
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
	for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)

#ifdef CONFIG_SPARSEMEM
#include <asm/sparsemem.h>
#endif

#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
	!defined(CONFIG_ARCH_POPULATES_NODE_MAP)
static inline unsigned long early_pfn_to_nid(unsigned long pfn)
{
	return 0;
}
#endif

#ifdef CONFIG_FLATMEM
#define pfn_to_nid(pfn)		(0)
#endif

#ifdef CONFIG_SPARSEMEM

/*
 * SECTION_SHIFT    		#bits space required to store a section #
 *
 * PA_SECTION_SHIFT		physical address to/from section number
 * PFN_SECTION_SHIFT		pfn to/from section number
 */
#define SECTIONS_SHIFT		(MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)

#define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
#define PFN_SECTION_SHIFT	(SECTION_SIZE_BITS - PAGE_SHIFT)

#define NR_MEM_SECTIONS		(1UL << SECTIONS_SHIFT)

#define PAGES_PER_SECTION       (1UL << PFN_SECTION_SHIFT)
#define PAGE_SECTION_MASK	(~(PAGES_PER_SECTION-1))

#define SECTION_BLOCKFLAGS_BITS \
	((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)

#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
#error Allocator MAX_ORDER exceeds SECTION_SIZE
#endif

#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)

#define SECTION_ALIGN_UP(pfn)	(((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
#define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)

struct page;
struct page_cgroup;
struct mem_section {
	/*
	 * This is, logically, a pointer to an array of struct
	 * pages.  However, it is stored with some other magic.
	 * (see sparse.c::sparse_init_one_section())
	 *
	 * Additionally during early boot we encode node id of
	 * the location of the section here to guide allocation.
	 * (see sparse.c::memory_present())
	 *
	 * Making it a UL at least makes someone do a cast
	 * before using it wrong.
	 */
	unsigned long section_mem_map;

	/* See declaration of similar field in struct zone */
	unsigned long *pageblock_flags;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
	/*
	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
	 * section. (see memcontrol.h/page_cgroup.h about this.)
	 */
	struct page_cgroup *page_cgroup;
	unsigned long pad;
#endif
};

#ifdef CONFIG_SPARSEMEM_EXTREME
#define SECTIONS_PER_ROOT       (PAGE_SIZE / sizeof (struct mem_section))
#else
#define SECTIONS_PER_ROOT	1
#endif

#define SECTION_NR_TO_ROOT(sec)	((sec) / SECTIONS_PER_ROOT)
#define NR_SECTION_ROOTS	DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT)
#define SECTION_ROOT_MASK	(SECTIONS_PER_ROOT - 1)

#ifdef CONFIG_SPARSEMEM_EXTREME
extern struct mem_section *mem_section[NR_SECTION_ROOTS];
#else
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif

static inline struct mem_section *__nr_to_section(unsigned long nr)
{
	if (!mem_section[SECTION_NR_TO_ROOT(nr)])
		return NULL;
	return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
extern int __section_nr(struct mem_section* ms);
extern unsigned long usemap_size(void);

/*
 * We use the lower bits of the mem_map pointer to store
 * a little bit of information.  There should be at least
 * 3 bits here due to 32-bit alignment.
 */
#define	SECTION_MARKED_PRESENT	(1UL<<0)
#define SECTION_HAS_MEM_MAP	(1UL<<1)
#define SECTION_MAP_LAST_BIT	(1UL<<2)
#define SECTION_MAP_MASK	(~(SECTION_MAP_LAST_BIT-1))
#define SECTION_NID_SHIFT	2

static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
	unsigned long map = section->section_mem_map;
	map &= SECTION_MAP_MASK;
	return (struct page *)map;
}

static inline int present_section(struct mem_section *section)
{
	return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
}

static inline int present_section_nr(unsigned long nr)
{
	return present_section(__nr_to_section(nr));
}

static inline int valid_section(struct mem_section *section)
{
	return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
}

static inline int valid_section_nr(unsigned long nr)
{
	return valid_section(__nr_to_section(nr));
}

static inline struct mem_section *__pfn_to_section(unsigned long pfn)
{
	return __nr_to_section(pfn_to_section_nr(pfn));
}

#ifndef CONFIG_HAVE_ARCH_PFN_VALID
static inline int pfn_valid(unsigned long pfn)
{
	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
		return 0;
	return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
}
#endif

static inline int pfn_present(unsigned long pfn)
{
	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
		return 0;
	return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
}

/*
 * These are _only_ used during initialisation, therefore they
 * can use __initdata ...  They could have names to indicate
 * this restriction.
 */
#ifdef CONFIG_NUMA
#define pfn_to_nid(pfn)							\
({									\
	unsigned long __pfn_to_nid_pfn = (pfn);				\
	page_to_nid(pfn_to_page(__pfn_to_nid_pfn));			\
})
#else
#define pfn_to_nid(pfn)		(0)
#endif

#define early_pfn_valid(pfn)	pfn_valid(pfn)
void sparse_init(void);
#else
#define sparse_init()	do {} while (0)
#define sparse_index_init(_sec, _nid)  do {} while (0)
#endif /* CONFIG_SPARSEMEM */

#ifdef CONFIG_NODES_SPAN_OTHER_NODES
bool early_pfn_in_nid(unsigned long pfn, int nid);
#else
#define early_pfn_in_nid(pfn, nid)	(1)
#endif

#ifndef early_pfn_valid
#define early_pfn_valid(pfn)	(1)
#endif

void memory_present(int nid, unsigned long start, unsigned long end);
unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);

/*
 * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
 * need to check pfn validility within that MAX_ORDER_NR_PAGES block.
 * pfn_valid_within() should be used in this case; we optimise this away
 * when we have no holes within a MAX_ORDER_NR_PAGES block.
 */
#ifdef CONFIG_HOLES_IN_ZONE
#define pfn_valid_within(pfn) pfn_valid(pfn)
#else
#define pfn_valid_within(pfn) (1)
#endif

#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
/*
 * pfn_valid() is meant to be able to tell if a given PFN has valid memmap
 * associated with it or not. In FLATMEM, it is expected that holes always
 * have valid memmap as long as there is valid PFNs either side of the hole.
 * In SPARSEMEM, it is assumed that a valid section has a memmap for the
 * entire section.
 *
 * However, an ARM, and maybe other embedded architectures in the future
 * free memmap backing holes to save memory on the assumption the memmap is
 * never used. The page_zone linkages are then broken even though pfn_valid()
 * returns true. A walker of the full memmap must then do this additional
 * check to ensure the memmap they are looking at is sane by making sure
 * the zone and PFN linkages are still valid. This is expensive, but walkers
 * of the full memmap are extremely rare.
 */
int memmap_valid_within(unsigned long pfn,
					struct page *page, struct zone *zone);
#else
static inline int memmap_valid_within(unsigned long pfn,
					struct page *page, struct zone *zone)
{
	return 1;
}
#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */

#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */