From 9a30523066cde73c1442b76224bb540de9f9b0b0 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:25 -0800 Subject: hugetlb: add per node hstate attributes Add the per huge page size control/query attributes to the per node sysdevs: /sys/devices/system/node/node/hugepages/hugepages-/ nr_hugepages - r/w free_huge_pages - r/o surplus_huge_pages - r/o The patch attempts to re-use/share as much of the existing global hstate attribute initialization and handling, and the "nodes_allowed" constraint processing as possible. Calling set_max_huge_pages() with no node indicates a change to global hstate parameters. In this case, any non-default task mempolicy will be used to generate the nodes_allowed mask. A valid node id indicates an update to that node's hstate parameters, and the count argument specifies the target count for the specified node. From this info, we compute the target global count for the hstate and construct a nodes_allowed node mask contain only the specified node. Setting the node specific nr_hugepages via the per node attribute effectively ignores any task mempolicy or cpuset constraints. With this patch: (me):ls /sys/devices/system/node/node0/hugepages/hugepages-2048kB ./ ../ free_hugepages nr_hugepages surplus_hugepages Starting from: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 0 Node 2 HugePages_Free: 0 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 0 Allocate 16 persistent huge pages on node 2: (me):echo 16 >/sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages [Note that this is equivalent to: numactl -m 2 hugeadmin --pool-pages-min 2M:+16 ] Yields: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 16 Node 2 HugePages_Free: 16 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 16 Global controls work as expected--reduce pool to 8 persistent huge pages: (me):echo 8 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 8 Node 2 HugePages_Free: 8 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 Signed-off-by: Lee Schermerhorn Acked-by: Mel Gorman Reviewed-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index 1fe5536d404f..f502711d28db 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -173,6 +173,43 @@ static ssize_t node_read_distance(struct sys_device * dev, } static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); +#ifdef CONFIG_HUGETLBFS +/* + * hugetlbfs per node attributes registration interface: + * When/if hugetlb[fs] subsystem initializes [sometime after this module], + * it will register its per node attributes for all nodes online at that + * time. It will also call register_hugetlbfs_with_node(), below, to + * register its attribute registration functions with this node driver. + * Once these hooks have been initialized, the node driver will call into + * the hugetlb module to [un]register attributes for hot-plugged nodes. + */ +static node_registration_func_t __hugetlb_register_node; +static node_registration_func_t __hugetlb_unregister_node; + +static inline void hugetlb_register_node(struct node *node) +{ + if (__hugetlb_register_node) + __hugetlb_register_node(node); +} + +static inline void hugetlb_unregister_node(struct node *node) +{ + if (__hugetlb_unregister_node) + __hugetlb_unregister_node(node); +} + +void register_hugetlbfs_with_node(node_registration_func_t doregister, + node_registration_func_t unregister) +{ + __hugetlb_register_node = doregister; + __hugetlb_unregister_node = unregister; +} +#else +static inline void hugetlb_register_node(struct node *node) {} + +static inline void hugetlb_unregister_node(struct node *node) {} +#endif + /* * register_node - Setup a sysfs device for a node. @@ -196,6 +233,7 @@ int register_node(struct node *node, int num, struct node *parent) sysdev_create_file(&node->sysdev, &attr_distance); scan_unevictable_register_node(node); + hugetlb_register_node(node); } return error; } @@ -216,6 +254,7 @@ void unregister_node(struct node *node) sysdev_remove_file(&node->sysdev, &attr_distance); scan_unevictable_unregister_node(node); + hugetlb_unregister_node(node); sysdev_unregister(&node->sysdev); } -- cgit v1.2.2 From 4faf8d950ec438c49ae4526b897c30f8a2cad741 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:35 -0800 Subject: hugetlb: handle memory hot-plug events Register per node hstate attributes only for nodes with memory. As suggested by David Rientjes. With Memory Hotplug, memory can be added to a memoryless node and a node with memory can become memoryless. Therefore, add a memory on/off-line notifier callback to [un]register a node's attributes on transition to/from memoryless state. N.B., Only tested build, boot, libhugetlbfs regression. i.e., no memory hotplug testing. Signed-off-by: Lee Schermerhorn Reviewed-by: Andi Kleen Acked-by: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Mel Gorman Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index f502711d28db..9e218a6d4a5b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -177,8 +177,8 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); /* * hugetlbfs per node attributes registration interface: * When/if hugetlb[fs] subsystem initializes [sometime after this module], - * it will register its per node attributes for all nodes online at that - * time. It will also call register_hugetlbfs_with_node(), below, to + * it will register its per node attributes for all online nodes with + * memory. It will also call register_hugetlbfs_with_node(), below, to * register its attribute registration functions with this node driver. * Once these hooks have been initialized, the node driver will call into * the hugetlb module to [un]register attributes for hot-plugged nodes. @@ -188,7 +188,8 @@ static node_registration_func_t __hugetlb_unregister_node; static inline void hugetlb_register_node(struct node *node) { - if (__hugetlb_register_node) + if (__hugetlb_register_node && + node_state(node->sysdev.id, N_HIGH_MEMORY)) __hugetlb_register_node(node); } @@ -233,6 +234,7 @@ int register_node(struct node *node, int num, struct node *parent) sysdev_create_file(&node->sysdev, &attr_distance); scan_unevictable_register_node(node); + hugetlb_register_node(node); } return error; @@ -254,7 +256,7 @@ void unregister_node(struct node *node) sysdev_remove_file(&node->sysdev, &attr_distance); scan_unevictable_unregister_node(node); - hugetlb_unregister_node(node); + hugetlb_unregister_node(node); /* no-op, if memoryless node */ sysdev_unregister(&node->sysdev); } @@ -384,8 +386,45 @@ static int link_mem_sections(int nid) } return err; } + +/* + * Handle per node hstate attribute [un]registration on transistions + * to/from memoryless state. + */ + +static int node_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + struct memory_notify *mnb = arg; + int nid = mnb->status_change_nid; + + switch (action) { + case MEM_ONLINE: /* memory successfully brought online */ + if (nid != NUMA_NO_NODE) + hugetlb_register_node(&node_devices[nid]); + break; + case MEM_OFFLINE: /* or offline */ + if (nid != NUMA_NO_NODE) + hugetlb_unregister_node(&node_devices[nid]); + break; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_CANCEL_OFFLINE: + default: + break; + } + + return NOTIFY_OK; +} #else static int link_mem_sections(int nid) { return 0; } + +static inline int node_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + return NOTIFY_OK; +} #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ int register_one_node(int nid) @@ -499,13 +538,17 @@ static int node_states_init(void) return err; } +#define NODE_CALLBACK_PRI 2 /* lower than SLAB */ static int __init register_node_type(void) { int ret; ret = sysdev_class_register(&node_class); - if (!ret) + if (!ret) { ret = node_states_init(); + hotplug_memory_notifier(node_memory_callback, + NODE_CALLBACK_PRI); + } /* * Note: we're not going to unregister the node class if we fail -- cgit v1.2.2 From 39da08cb074cf19cb249832a2a955dfb28837e65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:36 -0800 Subject: hugetlb: offload per node attribute registrations Offload the registration and unregistration of per node hstate sysfs attributes to a worker thread rather than attempt the allocation/attachment or detachment/freeing of the attributes in the context of the memory hotplug handler. I don't know that this is absolutely required, but the registration can sleep in allocations and other mem hot plug handlers do it this way. If it turns out this is NOT required, we can drop this patch. N.B., Only tested build, boot, libhugetlbfs regression. i.e., no memory hotplug testing. Signed-off-by: Lee Schermerhorn Reviewed-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Mel Gorman Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 57 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 10 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index 9e218a6d4a5b..54e5d8eaf70e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -186,11 +186,14 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); static node_registration_func_t __hugetlb_register_node; static node_registration_func_t __hugetlb_unregister_node; -static inline void hugetlb_register_node(struct node *node) +static inline bool hugetlb_register_node(struct node *node) { if (__hugetlb_register_node && - node_state(node->sysdev.id, N_HIGH_MEMORY)) + node_state(node->sysdev.id, N_HIGH_MEMORY)) { __hugetlb_register_node(node); + return true; + } + return false; } static inline void hugetlb_unregister_node(struct node *node) @@ -387,10 +390,31 @@ static int link_mem_sections(int nid) return err; } +#ifdef CONFIG_HUGETLBFS /* * Handle per node hstate attribute [un]registration on transistions * to/from memoryless state. */ +static void node_hugetlb_work(struct work_struct *work) +{ + struct node *node = container_of(work, struct node, node_work); + + /* + * We only get here when a node transitions to/from memoryless state. + * We can detect which transition occurred by examining whether the + * node has memory now. hugetlb_register_node() already check this + * so we try to register the attributes. If that fails, then the + * node has transitioned to memoryless, try to unregister the + * attributes. + */ + if (!hugetlb_register_node(node)) + hugetlb_unregister_node(node); +} + +static void init_node_hugetlb_work(int nid) +{ + INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work); +} static int node_memory_callback(struct notifier_block *self, unsigned long action, void *arg) @@ -399,14 +423,16 @@ static int node_memory_callback(struct notifier_block *self, int nid = mnb->status_change_nid; switch (action) { - case MEM_ONLINE: /* memory successfully brought online */ + case MEM_ONLINE: + case MEM_OFFLINE: + /* + * offload per node hstate [un]registration to a work thread + * when transitioning to/from memoryless state. + */ if (nid != NUMA_NO_NODE) - hugetlb_register_node(&node_devices[nid]); - break; - case MEM_OFFLINE: /* or offline */ - if (nid != NUMA_NO_NODE) - hugetlb_unregister_node(&node_devices[nid]); + schedule_work(&node_devices[nid].node_work); break; + case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: case MEM_CANCEL_ONLINE: @@ -417,15 +443,23 @@ static int node_memory_callback(struct notifier_block *self, return NOTIFY_OK; } -#else +#endif /* CONFIG_HUGETLBFS */ +#else /* !CONFIG_MEMORY_HOTPLUG_SPARSE */ + static int link_mem_sections(int nid) { return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ +#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ + !defined(CONFIG_HUGETLBFS) static inline int node_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { return NOTIFY_OK; } -#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ + +static void init_node_hugetlb_work(int nid) { } + +#endif int register_one_node(int nid) { @@ -449,6 +483,9 @@ int register_one_node(int nid) /* link memory sections under this node */ error = link_mem_sections(nid); + + /* initialize work queue for memory hot plug */ + init_node_hugetlb_work(nid); } return error; -- cgit v1.2.2 From dee5d0d518defd0337a41f1a504428c9acc87be5 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:05 -0800 Subject: mm: add numa node symlink for memory section in sysfs Commit c04fc586c (mm: show node to memory section relationship with symlinks in sysfs) created symlinks from nodes to memory sections, e.g. /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 If you're examining the memory section though and are wondering what node it might belong to, you can find it by grovelling around in sysfs, but it's a little cumbersome. Add a reverse symlink for each memory section that points back to the node to which it belongs. Signed-off-by: Alex Chiang Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Acked-by: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index 54e5d8eaf70e..44eed11bbdf3 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -312,6 +312,7 @@ static int get_nid_for_pfn(unsigned long pfn) /* register memory section under specified node if it spans that node */ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) { + int ret; unsigned long pfn, sect_start_pfn, sect_end_pfn; if (!mem_blk) @@ -328,9 +329,15 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) continue; if (page_nid != nid) continue; - return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, + ret = sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, &mem_blk->sysdev.kobj, kobject_name(&mem_blk->sysdev.kobj)); + if (ret) + return ret; + + return sysfs_create_link_nowarn(&mem_blk->sysdev.kobj, + &node_devices[nid].sysdev.kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); } /* mem section does not span the specified node */ return 0; @@ -359,6 +366,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) continue; sysfs_remove_link(&node_devices[nid].sysdev.kobj, kobject_name(&mem_blk->sysdev.kobj)); + sysfs_remove_link(&mem_blk->sysdev.kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); } return 0; } -- cgit v1.2.2 From f8246f3159dfdf97b8b40f9e03e715bafedd22fc Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:06 -0800 Subject: mm: refactor register_cpu_under_node() By returning early if the node is not online, we can unindent the interesting code by one level. No functional change. Signed-off-by: Alex Chiang Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Cc: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index 44eed11bbdf3..eeae035dadc3 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -271,16 +271,18 @@ struct node node_devices[MAX_NUMNODES]; */ int register_cpu_under_node(unsigned int cpu, unsigned int nid) { - if (node_online(nid)) { - struct sys_device *obj = get_cpu_sysdev(cpu); - if (!obj) - return 0; - return sysfs_create_link(&node_devices[nid].sysdev.kobj, - &obj->kobj, - kobject_name(&obj->kobj)); - } + struct sys_device *obj; - return 0; + if (!node_online(nid)) + return 0; + + obj = get_cpu_sysdev(cpu); + if (!obj) + return 0; + + return sysfs_create_link(&node_devices[nid].sysdev.kobj, + &obj->kobj, + kobject_name(&obj->kobj)); } int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) -- cgit v1.2.2 From b9d52dad9447d0db4b52d67d5e9e9d339b5e8302 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:07 -0800 Subject: mm: refactor unregister_cpu_under_node() By returning early if the node is not online, we can unindent the interesting code by two levels. No functional change. Signed-off-by: Alex Chiang Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Cc: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index eeae035dadc3..9b9acc39a1eb 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -287,12 +287,18 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { - if (node_online(nid)) { - struct sys_device *obj = get_cpu_sysdev(cpu); - if (obj) - sysfs_remove_link(&node_devices[nid].sysdev.kobj, - kobject_name(&obj->kobj)); - } + struct sys_device *obj; + + if (!node_online(nid)) + return 0; + + obj = get_cpu_sysdev(cpu); + if (!obj) + return 0; + + sysfs_remove_link(&node_devices[nid].sysdev.kobj, + kobject_name(&obj->kobj)); + return 0; } -- cgit v1.2.2 From 1830794ae6392ce12d36dbcc5ff52f11298ddab6 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:08 -0800 Subject: mm: add numa node symlink for cpu devices in sysfs You can discover which CPUs belong to a NUMA node by examining /sys/devices/system/node/node#/ However, it's not convenient to go in the other direction, when looking at /sys/devices/system/cpu/cpu#/ Yes, you can muck about in sysfs, but adding these symlinks makes life a lot more convenient. Signed-off-by: Alex Chiang Acked-by: David Rientjes Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Cc: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index 9b9acc39a1eb..41414113b9f0 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -271,6 +271,7 @@ struct node node_devices[MAX_NUMNODES]; */ int register_cpu_under_node(unsigned int cpu, unsigned int nid) { + int ret; struct sys_device *obj; if (!node_online(nid)) @@ -280,9 +281,15 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) if (!obj) return 0; - return sysfs_create_link(&node_devices[nid].sysdev.kobj, + ret = sysfs_create_link(&node_devices[nid].sysdev.kobj, &obj->kobj, kobject_name(&obj->kobj)); + if (ret) + return ret; + + return sysfs_create_link(&obj->kobj, + &node_devices[nid].sysdev.kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); } int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) @@ -298,6 +305,8 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) sysfs_remove_link(&node_devices[nid].sysdev.kobj, kobject_name(&obj->kobj)); + sysfs_remove_link(&obj->kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); return 0; } -- cgit v1.2.2 From 9ae49fab239fb49de92a657c7426271e0793c4e1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 14 Dec 2009 17:59:46 -0800 Subject: mm: slab-allocate memory section nodemask for large systems Nodemasks should not be allocated on the stack for large systems (when it is larger than 256 bytes) since there is a threat of overflow. This patch causes the unregister_mem_sect_under_nodes() nodemask to be allocated on the stack for smaller systems and be allocated by slab for larger systems. GFP_KERNEL is used since remove_memory_block() can block. Cc: Gary Hade Cc: Badari Pulavarty Cc: Alex Chiang Signed-off-by: David Rientjes Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index 41414113b9f0..70122791683d 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -363,12 +363,16 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) /* unregister memory section under all nodes that it spans */ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) { - nodemask_t unlinked_nodes; + NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; - if (!mem_blk) + if (!mem_blk) { + NODEMASK_FREE(unlinked_nodes); return -EFAULT; - nodes_clear(unlinked_nodes); + } + if (!unlinked_nodes) + return -ENOMEM; + nodes_clear(*unlinked_nodes); sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { @@ -379,13 +383,14 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) continue; if (!node_online(nid)) continue; - if (node_test_and_set(nid, unlinked_nodes)) + if (node_test_and_set(nid, *unlinked_nodes)) continue; sysfs_remove_link(&node_devices[nid].sysdev.kobj, kobject_name(&mem_blk->sysdev.kobj)); sysfs_remove_link(&mem_blk->sysdev.kobj, kobject_name(&node_devices[nid].sysdev.kobj)); } + NODEMASK_FREE(unlinked_nodes); return 0; } -- cgit v1.2.2 From 1d531c14d2ed4b24472a4d773f00ed6d1cd34ee7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sun, 13 Dec 2009 20:28:30 +0100 Subject: PM: allow for usage_count > 0 in pm_runtime_get() This patch (as1308c) fixes __pm_runtime_get(). Currently the routine will resume a device if the prior usage count was 0. But this isn't right; thanks to pm_runtime_get_noresume() the usage count can be positive even while the device is suspended. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 5a01ecef4af3..40d7720a4b21 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -701,15 +701,15 @@ EXPORT_SYMBOL_GPL(pm_request_resume); * @dev: Device to handle. * @sync: If set and the device is suspended, resume it synchronously. * - * Increment the usage count of the device and if it was zero previously, - * resume it or submit a resume request for it, depending on the value of @sync. + * Increment the usage count of the device and resume it or submit a resume + * request for it, depending on the value of @sync. */ int __pm_runtime_get(struct device *dev, bool sync) { - int retval = 1; + int retval; - if (atomic_add_return(1, &dev->power.usage_count) == 1) - retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev); + atomic_inc(&dev->power.usage_count); + retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev); return retval; } -- cgit v1.2.2 From f2511774863487e61b56a97da07ebf8dd61d7836 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 13 Dec 2009 20:29:01 +0100 Subject: PM: Add initcall_debug style timing for suspend/resume In order to diagnose overall suspend/resume times, we need basic instrumentation to break down the total time into per device timing, similar to initcall_debug. This patch adds the basic timing instrumentation, needed for a scritps/bootgraph.pl equivalent or humans. The bootgraph.pl program is still a work in progress, but is far enough along to know that this patch is sufficient. Signed-off-by: Arjan van de Ven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 8aa2443182d5..30f0ceebd36c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "../base.h" #include "power.h" @@ -172,6 +173,13 @@ static int pm_op(struct device *dev, pm_message_t state) { int error = 0; + ktime_t calltime, delta, rettime; + + if (initcall_debug) { + pr_info("calling %s+ @ %i\n", + dev_name(dev), task_pid_nr(current)); + calltime = ktime_get(); + } switch (state.event) { #ifdef CONFIG_SUSPEND @@ -219,6 +227,14 @@ static int pm_op(struct device *dev, default: error = -EINVAL; } + + if (initcall_debug) { + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev), + error, (unsigned long long)ktime_to_ns(delta) >> 10); + } + return error; } @@ -236,6 +252,13 @@ static int pm_noirq_op(struct device *dev, pm_message_t state) { int error = 0; + ktime_t calltime, delta, rettime; + + if (initcall_debug) { + pr_info("calling %s_i+ @ %i\n", + dev_name(dev), task_pid_nr(current)); + calltime = ktime_get(); + } switch (state.event) { #ifdef CONFIG_SUSPEND @@ -283,6 +306,14 @@ static int pm_noirq_op(struct device *dev, default: error = -EINVAL; } + + if (initcall_debug) { + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + printk("initcall %s_i+ returned %d after %Ld usecs\n", dev_name(dev), + error, (unsigned long long)ktime_to_ns(delta) >> 10); + } + return error; } -- cgit v1.2.2 From 33c3374031facf7599c30a1548dfa4c83da87da3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 13 Dec 2009 20:31:12 +0100 Subject: PM: Remove unnecessary goto from device_resume_noirq() In device_resume_noirq() there is the 'End' label and the associated goto statement that aren't strictly necessary, so rework the code to get rid of them. Also modify device_suspend_noirq() so that it looks completely analogous to device_resume_noirq(). Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 30f0ceebd36c..df04cb4a3611 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -372,14 +372,11 @@ static int device_resume_noirq(struct device *dev, pm_message_t state) TRACE_DEVICE(dev); TRACE_RESUME(0); - if (!dev->bus) - goto End; - - if (dev->bus->pm) { + if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "EARLY "); error = pm_noirq_op(dev, dev->bus->pm, state); } - End: + TRACE_RESUME(error); return error; } @@ -615,10 +612,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) { int error = 0; - if (!dev->bus) - return 0; - - if (dev->bus->pm) { + if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "LATE "); error = pm_noirq_op(dev, dev->bus->pm, state); } -- cgit v1.2.2 From d8bed5a4f343d1826153ecf8e7932126c757a21d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 13 Dec 2009 20:48:54 +0100 Subject: PM: rwsem.h need not be included into main.c It is not necessary to include into drivers/base/power/main.c, so don't do that. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index df04cb4a3611..1a216c114a0f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include -- cgit v1.2.2 From facb6011f3993947283fa15d039dacb4ad140230 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Dec 2009 12:20:00 +0100 Subject: HWPOISON: Add soft page offline support This is a simpler, gentler variant of memory_failure() for soft page offlining controlled from user space. It doesn't kill anything, just tries to invalidate and if that doesn't work migrate the page away. This is useful for predictive failure analysis, where a page has a high rate of corrected errors, but hasn't gone bad yet. Instead it can be offlined early and avoided. The offlining is controlled from sysfs, including a new generic entry point for hard page offlining for symmetry too. We use the page isolate facility to prevent re-allocation race. Normally this is only used by memory hotplug. To avoid races with memory allocation I am using lock_system_sleep(). This avoids the situation where memory hotplug is about to isolate a page range and then hwpoison undoes that work. This is a big hammer currently, but the simplest solution currently. When the page is not free or LRU we try to free pages from slab and other caches. The slab freeing is currently quite dumb and does not try to focus on the specific slab cache which might own the page. This could be potentially improved later. Thanks to Fengguang Wu and Haicheng Li for some fixes. [Added fix from Andrew Morton to adapt to new migrate_pages prototype] Signed-off-by: Andi Kleen --- drivers/base/memory.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 989429cfed88..c4c8f2e1dd15 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -341,6 +341,64 @@ static inline int memory_probe_init(void) } #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Support for offlining pages of memory + */ + +/* Soft offline a page */ +static ssize_t +store_soft_offline_page(struct class *class, const char *buf, size_t count) +{ + int ret; + u64 pfn; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (strict_strtoull(buf, 0, &pfn) < 0) + return -EINVAL; + pfn >>= PAGE_SHIFT; + if (!pfn_valid(pfn)) + return -ENXIO; + ret = soft_offline_page(pfn_to_page(pfn), 0); + return ret == 0 ? count : ret; +} + +/* Forcibly offline a page, including killing processes. */ +static ssize_t +store_hard_offline_page(struct class *class, const char *buf, size_t count) +{ + int ret; + u64 pfn; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (strict_strtoull(buf, 0, &pfn) < 0) + return -EINVAL; + pfn >>= PAGE_SHIFT; + ret = __memory_failure(pfn, 0, 0); + return ret ? ret : count; +} + +static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page); +static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page); + +static __init int memory_fail_init(void) +{ + int err; + + err = sysfs_create_file(&memory_sysdev_class.kset.kobj, + &class_attr_soft_offline_page.attr); + if (!err) + err = sysfs_create_file(&memory_sysdev_class.kset.kobj, + &class_attr_hard_offline_page.attr); + return err; +} +#else +static inline int memory_fail_init(void) +{ + return 0; +} +#endif + /* * Note that phys_device is optional. It is here to allow for * differentiation between which *physical* devices each @@ -471,6 +529,9 @@ int __init memory_dev_init(void) } err = memory_probe_init(); + if (!ret) + ret = err; + err = memory_fail_init(); if (!ret) ret = err; err = block_size_init(); -- cgit v1.2.2 From 875ab0b74e85d6801a49392447d26e0b28688d86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 18 Dec 2009 01:57:31 +0100 Subject: PM: Make the initcall_debug style timing for suspend/resume complete Commit f2511774863487e61b56a97da07ebf8dd61d7836 (PM: Add initcall_debug style timing for suspend/resume) introduced basic timing instrumentation, needed for a scritps/bootgraph.pl equivalent or humans, but it missed the fact that bus types and device classes which haven't been switched to using struct dev_pm_ops objects yet need special handling. As a result, the suspend/resume timing information is only available for devices whose bus types or device classes use struct dev_pm_ops objects, so the majority of devices is not covered. Fix this by adding basic suspend/resume timing instrumentation for devices whose bus types and device classes still don't use struct dev_pm_ops objects for power management. To reduce code duplication move the timing code to helper functions. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 97 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 20 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1a216c114a0f..c448d5972a0b 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -161,6 +161,32 @@ void device_pm_move_last(struct device *dev) list_move_tail(&dev->power.entry, &dpm_list); } +static ktime_t initcall_debug_start(struct device *dev) +{ + ktime_t calltime = ktime_set(0, 0); + + if (initcall_debug) { + pr_info("calling %s+ @ %i\n", + dev_name(dev), task_pid_nr(current)); + calltime = ktime_get(); + } + + return calltime; +} + +static void initcall_debug_report(struct device *dev, ktime_t calltime, + int error) +{ + ktime_t delta, rettime; + + if (initcall_debug) { + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev), + error, (unsigned long long)ktime_to_ns(delta) >> 10); + } +} + /** * pm_op - Execute the PM operation appropriate for given PM event. * @dev: Device to handle. @@ -172,13 +198,9 @@ static int pm_op(struct device *dev, pm_message_t state) { int error = 0; - ktime_t calltime, delta, rettime; + ktime_t calltime; - if (initcall_debug) { - pr_info("calling %s+ @ %i\n", - dev_name(dev), task_pid_nr(current)); - calltime = ktime_get(); - } + calltime = initcall_debug_start(dev); switch (state.event) { #ifdef CONFIG_SUSPEND @@ -227,12 +249,7 @@ static int pm_op(struct device *dev, error = -EINVAL; } - if (initcall_debug) { - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev), - error, (unsigned long long)ktime_to_ns(delta) >> 10); - } + initcall_debug_report(dev, calltime, error); return error; } @@ -309,8 +326,9 @@ static int pm_noirq_op(struct device *dev, if (initcall_debug) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); - printk("initcall %s_i+ returned %d after %Ld usecs\n", dev_name(dev), - error, (unsigned long long)ktime_to_ns(delta) >> 10); + printk("initcall %s_i+ returned %d after %Ld usecs\n", + dev_name(dev), error, + (unsigned long long)ktime_to_ns(delta) >> 10); } return error; @@ -407,6 +425,26 @@ void dpm_resume_noirq(pm_message_t state) } EXPORT_SYMBOL_GPL(dpm_resume_noirq); +/** + * legacy_resume - Execute a legacy (bus or class) resume callback for device. + * dev: Device to resume. + * cb: Resume callback to execute. + */ +static int legacy_resume(struct device *dev, int (*cb)(struct device *dev)) +{ + int error; + ktime_t calltime; + + calltime = initcall_debug_start(dev); + + error = cb(dev); + suspend_report_result(cb, error); + + initcall_debug_report(dev, calltime, error); + + return error; +} + /** * device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. @@ -427,7 +465,7 @@ static int device_resume(struct device *dev, pm_message_t state) error = pm_op(dev, dev->bus->pm, state); } else if (dev->bus->resume) { pm_dev_dbg(dev, state, "legacy "); - error = dev->bus->resume(dev); + error = legacy_resume(dev, dev->bus->resume); } if (error) goto End; @@ -448,7 +486,7 @@ static int device_resume(struct device *dev, pm_message_t state) error = pm_op(dev, dev->class->pm, state); } else if (dev->class->resume) { pm_dev_dbg(dev, state, "legacy class "); - error = dev->class->resume(dev); + error = legacy_resume(dev, dev->class->resume); } } End: @@ -647,6 +685,27 @@ int dpm_suspend_noirq(pm_message_t state) } EXPORT_SYMBOL_GPL(dpm_suspend_noirq); +/** + * legacy_suspend - Execute a legacy (bus or class) suspend callback for device. + * dev: Device to suspend. + * cb: Suspend callback to execute. + */ +static int legacy_suspend(struct device *dev, pm_message_t state, + int (*cb)(struct device *dev, pm_message_t state)) +{ + int error; + ktime_t calltime; + + calltime = initcall_debug_start(dev); + + error = cb(dev, state); + suspend_report_result(cb, error); + + initcall_debug_report(dev, calltime, error); + + return error; +} + /** * device_suspend - Execute "suspend" callbacks for given device. * @dev: Device to handle. @@ -664,8 +723,7 @@ static int device_suspend(struct device *dev, pm_message_t state) error = pm_op(dev, dev->class->pm, state); } else if (dev->class->suspend) { pm_dev_dbg(dev, state, "legacy class "); - error = dev->class->suspend(dev, state); - suspend_report_result(dev->class->suspend, error); + error = legacy_suspend(dev, state, dev->class->suspend); } if (error) goto End; @@ -686,8 +744,7 @@ static int device_suspend(struct device *dev, pm_message_t state) error = pm_op(dev, dev->bus->pm, state); } else if (dev->bus->suspend) { pm_dev_dbg(dev, state, "legacy "); - error = dev->bus->suspend(dev, state); - suspend_report_result(dev->bus->suspend, error); + error = legacy_suspend(dev, state, dev->bus->suspend); } } End: -- cgit v1.2.2 From ecf762b2581e12ac761d12a6e4e297c2224aa899 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 18 Dec 2009 01:57:47 +0100 Subject: PM: Measure device suspend and resume times Measure and print the time of suspending and resuming all devices. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c448d5972a0b..8052dafc0ba9 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -372,6 +372,23 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info, kobject_name(&dev->kobj), pm_verb(state.event), info, error); } +static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) +{ + ktime_t calltime; + s64 usecs64; + int usecs; + + calltime = ktime_get(); + usecs64 = ktime_to_ns(ktime_sub(calltime, starttime)); + do_div(usecs64, NSEC_PER_USEC); + usecs = usecs64; + if (usecs == 0) + usecs = 1; + pr_info("PM: %s%s%s of devices complete after %ld.%03ld msecs\n", + info ?: "", info ? " " : "", pm_verb(state.event), + usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC); +} + /*------------------------- Resume routines -------------------------*/ /** @@ -408,6 +425,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state) void dpm_resume_noirq(pm_message_t state) { struct device *dev; + ktime_t starttime = ktime_get(); mutex_lock(&dpm_list_mtx); transition_started = false; @@ -421,6 +439,7 @@ void dpm_resume_noirq(pm_message_t state) pm_dev_err(dev, state, " early", error); } mutex_unlock(&dpm_list_mtx); + dpm_show_time(starttime, state, "early"); resume_device_irqs(); } EXPORT_SYMBOL_GPL(dpm_resume_noirq); @@ -506,6 +525,7 @@ static int device_resume(struct device *dev, pm_message_t state) static void dpm_resume(pm_message_t state) { struct list_head list; + ktime_t starttime = ktime_get(); INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); @@ -534,6 +554,7 @@ static void dpm_resume(pm_message_t state) } list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); + dpm_show_time(starttime, state, NULL); } /** @@ -666,6 +687,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) int dpm_suspend_noirq(pm_message_t state) { struct device *dev; + ktime_t starttime = ktime_get(); int error = 0; suspend_device_irqs(); @@ -681,6 +703,8 @@ int dpm_suspend_noirq(pm_message_t state) mutex_unlock(&dpm_list_mtx); if (error) dpm_resume_noirq(resume_event(state)); + else + dpm_show_time(starttime, state, "late"); return error; } EXPORT_SYMBOL_GPL(dpm_suspend_noirq); @@ -760,6 +784,7 @@ static int device_suspend(struct device *dev, pm_message_t state) static int dpm_suspend(pm_message_t state) { struct list_head list; + ktime_t starttime = ktime_get(); int error = 0; INIT_LIST_HEAD(&list); @@ -785,6 +810,8 @@ static int dpm_suspend(pm_message_t state) } list_splice(&list, dpm_list.prev); mutex_unlock(&dpm_list_mtx); + if (!error) + dpm_show_time(starttime, state, NULL); return error; } -- cgit v1.2.2 From 925cc71e512a29e2594bcc17dc58d0a0e9c4d524 Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Thu, 17 Dec 2009 14:44:38 +0000 Subject: mm: Add notifier in pageblock isolation for balloon drivers Memory balloon drivers can allocate a large amount of memory which is not movable but could be freed to accomodate memory hotplug remove. Prior to calling the memory hotplug notifier chain the memory in the pageblock is isolated. Currently, if the migrate type is not MIGRATE_MOVABLE the isolation will not proceed, causing the memory removal for that page range to fail. Rather than failing pageblock isolation if the migrateteype is not MIGRATE_MOVABLE, this patch checks if all of the pages in the pageblock, and not on the LRU, are owned by a registered balloon driver (or other entity) using a notifier chain. If all of the non-movable pages are owned by a balloon, they can be freed later through the memory notifier chain and the range can still be isolated in set_migratetype_isolate(). Signed-off-by: Robert Jennings Cc: Mel Gorman Cc: Ingo Molnar Cc: Brian King Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Gerald Schaefer Cc: KAMEZAWA Hiroyuki Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- drivers/base/memory.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c4c8f2e1dd15..d7d77d4a402c 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_memory_notifier); +static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain); + +int register_memory_isolate_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&memory_isolate_chain, nb); +} +EXPORT_SYMBOL(register_memory_isolate_notifier); + +void unregister_memory_isolate_notifier(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&memory_isolate_chain, nb); +} +EXPORT_SYMBOL(unregister_memory_isolate_notifier); + /* * register_memory - Setup a sysfs device for a memory block */ @@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v) return blocking_notifier_call_chain(&memory_chain, val, v); } +int memory_isolate_notify(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&memory_isolate_chain, val, v); +} + /* * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is * OK to have direct references to sparsemem variables in here. -- cgit v1.2.2 From aa0baaef97c89de2ef216fcc017215ee01662a10 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 Dec 2009 02:46:11 +0100 Subject: PM: Use pm_runtime_put_sync in system resume This patch (as1317) fixes a bug in the PM core. When a device is resumed following a system sleep, the core decrements the device's runtime PM usage counter but doesn't issue an idle notification if the counter reaches 0. This could prevent an otherwise unused device from being runtime-suspended again after the system sleep. The fix is to call pm_runtime_put_sync() instead of pm_runtime_put_noidle(). Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 8052dafc0ba9..48adf80926a0 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -607,7 +607,7 @@ static void dpm_complete(pm_message_t state) mutex_unlock(&dpm_list_mtx); device_complete(dev, state); - pm_runtime_put_noidle(dev); + pm_runtime_put_sync(dev); mutex_lock(&dpm_list_mtx); } @@ -880,7 +880,7 @@ static int dpm_prepare(pm_message_t state) pm_runtime_get_noresume(dev); if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) { /* Wake-up requested during system sleep transition. */ - pm_runtime_put_noidle(dev); + pm_runtime_put_sync(dev); error = -EBUSY; } else { error = device_prepare(dev, state); -- cgit v1.2.2 From a6ab7aa9f432f722808c6fea5a8b7f5f229de031 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Dec 2009 20:43:17 +0100 Subject: PM / Runtime: Use device type and device class callbacks The power management of some devices is handled through device types and device classes rather than through bus types. Since these devices may also benefit from using the run-time power management core, extend it so that the device type and device class run-time PM callbacks can be taken into consideration by it if the bus type callback is not defined. Update the run-time PM core documentation to reflect this change. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 40d7720a4b21..f8b044e8aef7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -84,6 +84,19 @@ static int __pm_runtime_idle(struct device *dev) dev->bus->pm->runtime_idle(dev); + spin_lock_irq(&dev->power.lock); + } else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle) { + spin_unlock_irq(&dev->power.lock); + + dev->type->pm->runtime_idle(dev); + + spin_lock_irq(&dev->power.lock); + } else if (dev->class && dev->class->pm + && dev->class->pm->runtime_idle) { + spin_unlock_irq(&dev->power.lock); + + dev->class->pm->runtime_idle(dev); + spin_lock_irq(&dev->power.lock); } @@ -192,6 +205,22 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq) retval = dev->bus->pm->runtime_suspend(dev); + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else if (dev->type && dev->type->pm + && dev->type->pm->runtime_suspend) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->type->pm->runtime_suspend(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else if (dev->class && dev->class->pm + && dev->class->pm->runtime_suspend) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->class->pm->runtime_suspend(dev); + spin_lock_irq(&dev->power.lock); dev->power.runtime_error = retval; } else { @@ -357,6 +386,22 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) retval = dev->bus->pm->runtime_resume(dev); + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else if (dev->type && dev->type->pm + && dev->type->pm->runtime_resume) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->type->pm->runtime_resume(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else if (dev->class && dev->class->pm + && dev->class->pm->runtime_resume) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->class->pm->runtime_resume(dev); + spin_lock_irq(&dev->power.lock); dev->power.runtime_error = retval; } else { -- cgit v1.2.2 From f1f76f865b5f66db5b5c7f2d19874f2bb9b43b8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Dec 2009 21:31:33 +0000 Subject: devtmpfs: Convert dirlock to a mutex devtmpfs has a rw_lock dirlock which serializes delete_path and create_path. This code was obviously never tested with the usual set of debugging facilities enabled. In the dirlock held sections the code calls: - vfs functions which take mutexes - kmalloc(, GFP_KERNEL) In both code pathes the might sleep warning triggers and spams dmesg. Convert the rw_lock to a mutex. There is no reason why this needs to be a rwlock. Signed-off-by: Thomas Gleixner Cc: Kay Sievers Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/devtmpfs.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 50375bb8e51d..278371c7bf5a 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -32,7 +32,7 @@ static int dev_mount = 1; static int dev_mount; #endif -static rwlock_t dirlock; +static DEFINE_MUTEX(dirlock); static int __init mount_param(char *str) { @@ -93,7 +93,7 @@ static int create_path(const char *nodepath) { int err; - read_lock(&dirlock); + mutex_lock(&dirlock); err = dev_mkdir(nodepath, 0755); if (err == -ENOENT) { char *path; @@ -117,7 +117,7 @@ static int create_path(const char *nodepath) } kfree(path); } - read_unlock(&dirlock); + mutex_unlock(&dirlock); return err; } @@ -229,7 +229,7 @@ static int delete_path(const char *nodepath) if (!path) return -ENOMEM; - write_lock(&dirlock); + mutex_lock(&dirlock); for (;;) { char *base; @@ -241,7 +241,7 @@ static int delete_path(const char *nodepath) if (err) break; } - write_unlock(&dirlock); + mutex_unlock(&dirlock); kfree(path); return err; @@ -352,8 +352,6 @@ int __init devtmpfs_init(void) int err; struct vfsmount *mnt; - rwlock_init(&dirlock); - err = register_filesystem(&dev_fs_type); if (err) { printk(KERN_ERR "devtmpfs: unable to register devtmpfs " -- cgit v1.2.2 From 26579ab70aa0e0ea434e6e100279d2f67c094431 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Fri, 18 Dec 2009 15:34:19 +0200 Subject: Driver core: device_attribute parameters can often be const* Most device_attributes are const, and are begging to be put in a ro section. However, the create and remove file interfaces were failing to propagate the const promise which the only functions they call offer. Signed-off-by: Phil Carmody Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/core.c b/drivers/base/core.c index f1290cbd1350..2fd9e611f8a6 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -446,7 +446,8 @@ struct kset *devices_kset; * @dev: device. * @attr: device attribute descriptor. */ -int device_create_file(struct device *dev, struct device_attribute *attr) +int device_create_file(struct device *dev, + const struct device_attribute *attr) { int error = 0; if (dev) @@ -459,7 +460,8 @@ int device_create_file(struct device *dev, struct device_attribute *attr) * @dev: device. * @attr: device attribute descriptor. */ -void device_remove_file(struct device *dev, struct device_attribute *attr) +void device_remove_file(struct device *dev, + const struct device_attribute *attr) { if (dev) sysfs_remove_file(&dev->kobj, &attr->attr); -- cgit v1.2.2 From 66ecb92be9eb579df93add22d19843e7869f168e Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Fri, 18 Dec 2009 15:34:20 +0200 Subject: Driver core: bin_attribute parameters can often be const* Many struct bin_attribute descriptors are purely read-only structures, and there's no need to change them. Therefore make the promise not to, which will let those descriptors be put in a ro section. Signed-off-by: Phil Carmody Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/core.c b/drivers/base/core.c index 2fd9e611f8a6..83afc8b8f27b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -472,7 +472,8 @@ void device_remove_file(struct device *dev, * @dev: device. * @attr: device binary attribute descriptor. */ -int device_create_bin_file(struct device *dev, struct bin_attribute *attr) +int device_create_bin_file(struct device *dev, + const struct bin_attribute *attr) { int error = -EINVAL; if (dev) @@ -486,7 +487,8 @@ EXPORT_SYMBOL_GPL(device_create_bin_file); * @dev: device. * @attr: device binary attribute descriptor. */ -void device_remove_bin_file(struct device *dev, struct bin_attribute *attr) +void device_remove_bin_file(struct device *dev, + const struct bin_attribute *attr) { if (dev) sysfs_remove_bin_file(&dev->kobj, attr); -- cgit v1.2.2 From 099c2f21d8cf0724b85abb2c589d6276953781b7 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Fri, 18 Dec 2009 15:34:21 +0200 Subject: Driver core: driver_attribute parameters can often be const* Many struct driver_attribute descriptors are purely read-only structures, and there's no need to change them. Therefore make the promise not to, which will let those descriptors be put in a ro section. Signed-off-by: Phil Carmody Signed-off-by: Greg Kroah-Hartman --- drivers/base/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/driver.c b/drivers/base/driver.c index f367885a7646..90c9fff09ead 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(driver_find_device); * @attr: driver attribute descriptor. */ int driver_create_file(struct device_driver *drv, - struct driver_attribute *attr) + const struct driver_attribute *attr) { int error; if (drv) @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(driver_create_file); * @attr: driver attribute descriptor. */ void driver_remove_file(struct device_driver *drv, - struct driver_attribute *attr) + const struct driver_attribute *attr) { if (drv) sysfs_remove_file(&drv->p->kobj, &attr->attr); -- cgit v1.2.2 From e6309e7568d4b9d62298a887b10de42df11cb8c1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2009 19:32:49 +0000 Subject: Driver-core: Fix bogus 0 error return in device_add() If device_add() is called with a device which does not have dev->p set up, then device_private_init() is called. If that succeeds, then the error variable is set to 0. Now if the dev_name(dev) check further down fails, then device_add() correctly terminates, but returns 0. That of course lets the driver progress. If later another driver uses this half set up device as parent then device_add() of the child device explodes and renders sysfs completely unusable. Set the error to -EINVAL if dev_name() check fails. Signed-off-by: Thomas Gleixner Cc: Kay Sievers Cc: "Hans J. Koch" Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/core.c b/drivers/base/core.c index 83afc8b8f27b..282025770429 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -909,8 +909,10 @@ int device_add(struct device *dev) dev->init_name = NULL; } - if (!dev_name(dev)) + if (!dev_name(dev)) { + error = -EINVAL; goto name_error; + } pr_debug("device: '%s': %s\n", dev_name(dev), __func__); -- cgit v1.2.2 From 99b28f1b4126582f87ce454d4affb823bddf2cd8 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Mon, 14 Dec 2009 20:28:12 +0200 Subject: driver core: Prevent reference to freed memory on error path priv is drv->p. So only free drv->p after we've finished using priv. Found using a static code analysis tool Signed-off-by: Phil Carmody Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 63c143e54a57..c0c5a43d9fb3 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -703,9 +703,9 @@ int bus_add_driver(struct device_driver *drv) return 0; out_unregister: + kobject_put(&priv->kobj); kfree(drv->p); drv->p = NULL; - kobject_put(&priv->kobj); out_put_bus: bus_put(bus); return error; -- cgit v1.2.2 From 0787fdf70ba4c41a3350096ebaa347a17e900385 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 21 Dec 2009 11:41:08 -0500 Subject: Driver core: export platform_device_register_data as a GPL symbol This allows MFD's to register/bind drivers for their sub devices while still being compiled as a module. Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/base') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 9d2ee25deaf5..58efaf2f1259 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -441,6 +441,7 @@ error: platform_device_put(pdev); return ERR_PTR(retval); } +EXPORT_SYMBOL_GPL(platform_device_register_data); static int platform_drv_probe(struct device *_dev) { -- cgit v1.2.2 From 8042273801059884da2d53bbca34575d090b6f4e Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 22 Dec 2009 22:25:16 +0100 Subject: devtmpfs: unlock mutex in case of string allocation error Reported-by: Kirill A. Shutemov Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/devtmpfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 278371c7bf5a..090dd4851301 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -101,8 +101,10 @@ static int create_path(const char *nodepath) /* parent directories do not exist, create them */ path = kstrdup(nodepath, GFP_KERNEL); - if (!path) - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out; + } s = path; for (;;) { s = strchr(s, '/'); @@ -117,6 +119,7 @@ static int create_path(const char *nodepath) } kfree(path); } +out: mutex_unlock(&dirlock); return err; } -- cgit v1.2.2 From 0a88422312f5bf7b9e3450e27d8ddc385af38789 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 8 Jan 2010 14:42:57 -0800 Subject: power: fix kernel-doc notation Warning(drivers/base/power/main.c:453): No description found for parameter 'dev' Warning(drivers/base/power/main.c:453): No description found for parameter 'cb' Warning(drivers/base/power/main.c:719): No description found for parameter 'dev' Warning(drivers/base/power/main.c:719): No description found for parameter 'state' Warning(drivers/base/power/main.c:719): No description found for parameter 'cb' Signed-off-by: Randy Dunlap Cc: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/power/main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 48adf80926a0..a5142bddef41 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -446,8 +446,8 @@ EXPORT_SYMBOL_GPL(dpm_resume_noirq); /** * legacy_resume - Execute a legacy (bus or class) resume callback for device. - * dev: Device to resume. - * cb: Resume callback to execute. + * @dev: Device to resume. + * @cb: Resume callback to execute. */ static int legacy_resume(struct device *dev, int (*cb)(struct device *dev)) { @@ -711,8 +711,9 @@ EXPORT_SYMBOL_GPL(dpm_suspend_noirq); /** * legacy_suspend - Execute a legacy (bus or class) suspend callback for device. - * dev: Device to suspend. - * cb: Suspend callback to execute. + * @dev: Device to suspend. + * @state: PM transition of the system being carried out. + * @cb: Suspend callback to execute. */ static int legacy_suspend(struct device *dev, pm_message_t state, int (*cb)(struct device *dev, pm_message_t state)) -- cgit v1.2.2 From ba168fc37dea145deeb8fa9e7e71c748d2e00d74 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 15 Jan 2010 17:01:31 -0800 Subject: memory-hotplug: add 0x prefix to HEX block_size_bytes Signed-off-by: Wu Fengguang Cc: Andi Kleen Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index d7d77d4a402c..bd025059711f 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -311,7 +311,7 @@ static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); static ssize_t print_block_size(struct class *class, char *buf) { - return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); + return sprintf(buf, "%#lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); } static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); -- cgit v1.2.2 From 8ff410daa009c4b44be445ded5b0cec00abc0426 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 15 Jan 2010 17:01:32 -0800 Subject: sysdev: fix prototype for memory_sysdev_class show/store functions The function prototype mismatches in call stack: [] print_block_size+0x58/0x60 [] sysdev_class_show+0x1f/0x30 [] sysfs_read_file+0xcb/0x1f0 [] vfs_read+0xc8/0x180 Due to prototype mismatch, print_block_size() will sprintf() into *attribute instead of *buf, hence user space will read the initial zeros from *buf: $ hexdump /sys/devices/system/memory/block_size_bytes 0000000 0000 0000 0000 0000 0000008 After patch: cat /sys/devices/system/memory/block_size_bytes 0x8000000 This complements commits c29af9636 and 4a0b2b4dbe. Signed-off-by: Wu Fengguang Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: "Zheng, Shaohui" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index bd025059711f..ae6b6c43cff9 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -309,17 +309,19 @@ static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); * Block size attribute stuff */ static ssize_t -print_block_size(struct class *class, char *buf) +print_block_size(struct sysdev_class *class, + struct sysdev_class_attribute *class_attr, + char *buf) { return sprintf(buf, "%#lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); } -static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); +static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); static int block_size_init(void) { return sysfs_create_file(&memory_sysdev_class.kset.kobj, - &class_attr_block_size_bytes.attr); + &attr_block_size_bytes.attr); } /* @@ -330,7 +332,9 @@ static int block_size_init(void) */ #ifdef CONFIG_ARCH_MEMORY_PROBE static ssize_t -memory_probe_store(struct class *class, const char *buf, size_t count) +memory_probe_store(struct sysdev_class *class, + struct sysdev_class_attribute *class_attr, + const char *buf, size_t count) { u64 phys_addr; int nid; @@ -346,12 +350,12 @@ memory_probe_store(struct class *class, const char *buf, size_t count) return count; } -static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store); +static SYSDEV_CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store); static int memory_probe_init(void) { return sysfs_create_file(&memory_sysdev_class.kset.kobj, - &class_attr_probe.attr); + &attr_probe.attr); } #else static inline int memory_probe_init(void) @@ -367,7 +371,9 @@ static inline int memory_probe_init(void) /* Soft offline a page */ static ssize_t -store_soft_offline_page(struct class *class, const char *buf, size_t count) +store_soft_offline_page(struct sysdev_class *class, + struct sysdev_class_attribute *class_attr, + const char *buf, size_t count) { int ret; u64 pfn; @@ -384,7 +390,9 @@ store_soft_offline_page(struct class *class, const char *buf, size_t count) /* Forcibly offline a page, including killing processes. */ static ssize_t -store_hard_offline_page(struct class *class, const char *buf, size_t count) +store_hard_offline_page(struct sysdev_class *class, + struct sysdev_class_attribute *class_attr, + const char *buf, size_t count) { int ret; u64 pfn; @@ -397,18 +405,18 @@ store_hard_offline_page(struct class *class, const char *buf, size_t count) return ret ? ret : count; } -static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page); -static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page); +static SYSDEV_CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page); +static SYSDEV_CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page); static __init int memory_fail_init(void) { int err; err = sysfs_create_file(&memory_sysdev_class.kset.kobj, - &class_attr_soft_offline_page.attr); + &attr_soft_offline_page.attr); if (!err) err = sysfs_create_file(&memory_sysdev_class.kset.kobj, - &class_attr_hard_offline_page.attr); + &attr_hard_offline_page.attr); return err; } #else -- cgit v1.2.2 From f776c5ec4690b21b3668ad5956774a22c86f541a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Jan 2010 14:36:12 +0100 Subject: driver-core: fix devtmpfs crash on s390 On Mon, Jan 18, 2010 at 05:26:20PM +0530, Sachin Sant wrote: > Hello Heiko, > > Today while trying to boot next-20100118 i came across > the following Oops : > > Brought up 4 CPUs > Unable to handle kernel pointer dereference at virtual kernel address 0000000000 > 543000 > Oops: 0004 #1 SMP > Modules linked in: > CPU: 0 Not tainted 2.6.33-rc4-autotest-next-20100118-5-default #1 > Process swapper (pid: 1, task: 00000000fd792038, ksp: 00000000fd797a30) > Krnl PSW : 0704200180000000 00000000001eb0b8 (shmem_parse_options+0xc0/0x328) > R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3 > Krnl GPRS: 000000000054388a 000000000000003d 0000000000543836 000000000000003d > 0000000000000000 0000000000483f28 0000000000536112 00000000fd797d00 > 00000000fd4ba100 0000000000000100 0000000000483978 0000000000543832 > 0000000000000000 0000000000465958 00000000001eb0b0 00000000fd797c58 > Krnl Code: 00000000001eb0aa: c0e5000994f1 brasl %r14,31da8c > 00000000001eb0b0: b9020022 ltgr %r2,%r2 > 00000000001eb0b4: a784010b brc 8,1eb2ca > >00000000001eb0b8: 92002000 mvi 0(%r2),0 > 00000000001eb0bc: a7080000 lhi %r0,0 > 00000000001eb0c0: 41902001 la %r9,1(%r2) > 00000000001eb0c4: b9040016 lgr %r1,%r6 > 00000000001eb0c8: b904002b lgr %r2,%r11 > Call Trace: > (<00000000fd797c50> 0xfd797c50) > <00000000001eb5da> shmem_fill_super+0x13a/0x25c > <0000000000228cfa> get_sb_single+0xbe/0xdc > <000000000034ffc0> dev_get_sb+0x2c/0x38 > <000000000066c602> devtmpfs_init+0x46/0xc0 > <000000000066c53e> driver_init+0x22/0x60 > <000000000064d40a> kernel_init+0x24e/0x3d0 > <000000000010a7ea> kernel_thread_starter+0x6/0xc > <000000000010a7e4> kernel_thread_starter+0x0/0xc > > I never tried to boot a kernel with DEVTMPFS enabled on a s390 box. > So am wondering if this is supported or not ? If you think this > is supported i will send a mail to community on this. There is nothing arch specific to devtmpfs. This part crashes because the kernel tries to modify the data read-only section which is write protected on s390. Signed-off-by: Heiko Carstens Acked-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/devtmpfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 090dd4851301..42ae452b36b0 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -354,6 +354,7 @@ int __init devtmpfs_init(void) { int err; struct vfsmount *mnt; + char options[] = "mode=0755"; err = register_filesystem(&dev_fs_type); if (err) { @@ -362,7 +363,7 @@ int __init devtmpfs_init(void) return err; } - mnt = kern_mount_data(&dev_fs_type, "mode=0755"); + mnt = kern_mount_data(&dev_fs_type, options); if (IS_ERR(mnt)) { err = PTR_ERR(mnt); printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err); -- cgit v1.2.2 From bd796671f093d5b1841d383674d5650f5ec6c9c6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Jan 2010 13:08:16 -0800 Subject: Revert "sysdev: fix prototype for memory_sysdev_class show/store functions" This reverts commit 8ff410daa009c4b44be445ded5b0cec00abc0426 It should not have been sent to Linus's tree yet, as it depends on changes that are queued up in my driver-core for the .34 kernel merge. Cc: Wu Fengguang Cc: Andi Kleen Cc: "Zheng, Shaohui" Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index ae6b6c43cff9..bd025059711f 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -309,19 +309,17 @@ static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); * Block size attribute stuff */ static ssize_t -print_block_size(struct sysdev_class *class, - struct sysdev_class_attribute *class_attr, - char *buf) +print_block_size(struct class *class, char *buf) { return sprintf(buf, "%#lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); } -static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); +static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); static int block_size_init(void) { return sysfs_create_file(&memory_sysdev_class.kset.kobj, - &attr_block_size_bytes.attr); + &class_attr_block_size_bytes.attr); } /* @@ -332,9 +330,7 @@ static int block_size_init(void) */ #ifdef CONFIG_ARCH_MEMORY_PROBE static ssize_t -memory_probe_store(struct sysdev_class *class, - struct sysdev_class_attribute *class_attr, - const char *buf, size_t count) +memory_probe_store(struct class *class, const char *buf, size_t count) { u64 phys_addr; int nid; @@ -350,12 +346,12 @@ memory_probe_store(struct sysdev_class *class, return count; } -static SYSDEV_CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store); +static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store); static int memory_probe_init(void) { return sysfs_create_file(&memory_sysdev_class.kset.kobj, - &attr_probe.attr); + &class_attr_probe.attr); } #else static inline int memory_probe_init(void) @@ -371,9 +367,7 @@ static inline int memory_probe_init(void) /* Soft offline a page */ static ssize_t -store_soft_offline_page(struct sysdev_class *class, - struct sysdev_class_attribute *class_attr, - const char *buf, size_t count) +store_soft_offline_page(struct class *class, const char *buf, size_t count) { int ret; u64 pfn; @@ -390,9 +384,7 @@ store_soft_offline_page(struct sysdev_class *class, /* Forcibly offline a page, including killing processes. */ static ssize_t -store_hard_offline_page(struct sysdev_class *class, - struct sysdev_class_attribute *class_attr, - const char *buf, size_t count) +store_hard_offline_page(struct class *class, const char *buf, size_t count) { int ret; u64 pfn; @@ -405,18 +397,18 @@ store_hard_offline_page(struct sysdev_class *class, return ret ? ret : count; } -static SYSDEV_CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page); -static SYSDEV_CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page); +static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page); +static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page); static __init int memory_fail_init(void) { int err; err = sysfs_create_file(&memory_sysdev_class.kset.kobj, - &attr_soft_offline_page.attr); + &class_attr_soft_offline_page.attr); if (!err) err = sysfs_create_file(&memory_sysdev_class.kset.kobj, - &attr_hard_offline_page.attr); + &class_attr_hard_offline_page.attr); return err; } #else -- cgit v1.2.2