From 9ac023989e6dd1b97140b47fb942a7940d0b2af2 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 27 Jun 2006 02:53:27 -0700
Subject: [PATCH] acpi memory hotplug cannot manage _CRS with plural resoureces

Current acpi memory hotplug just looks into the first entry of resources in
_CRS.  But, _CRS can contain plural resources.  So, if _CRS contains plural
resoureces, acpi memory hot add cannot add all memory.

With this patch, acpi memory hotplug can deal with Memory Device, whose
_CRS contains plural resources.

Tested on ia64 memory hotplug test envrionment (not emulation, uses alpha
version firmware which supports dynamic reconfiguration of NUMA.)

Note: Microsoft's Windows Server 2003 requires big (>4G)resoureces to be
      divided into small (<4G) resources. looks crazy, but not invalid.
      (See http://www.microsoft.com/whdc/system/pnppwr/hotadd/hotaddmem.mspx)
      For this reason, a firmware vendor who supports Windows writes plural
      resources in a _CRS even if they are contiguous.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/acpi/acpi_memhotplug.c | 112 ++++++++++++++++++++++++++++-------------
 1 file changed, 77 insertions(+), 35 deletions(-)

(limited to 'drivers/acpi')

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index e0a95ba72371..1486e03bb41a 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -68,45 +68,75 @@ static struct acpi_driver acpi_memory_device_driver = {
 		},
 };
 
+struct acpi_memory_info {
+	struct list_head list;
+	u64 start_addr;		/* Memory Range start physical addr */
+	u64 length;		/* Memory Range length */
+	unsigned short caching;	/* memory cache attribute */
+	unsigned short write_protect;	/* memory read/write attribute */
+	unsigned int enabled:1;
+};
+
 struct acpi_memory_device {
 	acpi_handle handle;
 	unsigned int state;	/* State of the memory device */
-	unsigned short caching;	/* memory cache attribute */
-	unsigned short write_protect;	/* memory read/write attribute */
-	u64 start_addr;		/* Memory Range start physical addr */
-	u64 length;		/* Memory Range length */
+	struct list_head res_list;
 };
 
+static acpi_status
+acpi_memory_get_resource(struct acpi_resource *resource, void *context)
+{
+	struct acpi_memory_device *mem_device = context;
+	struct acpi_resource_address64 address64;
+	struct acpi_memory_info *info, *new;
+	acpi_status status;
+
+	status = acpi_resource_to_address64(resource, &address64);
+	if (ACPI_FAILURE(status) ||
+	    (address64.resource_type != ACPI_MEMORY_RANGE))
+		return AE_OK;
+
+	list_for_each_entry(info, &mem_device->res_list, list) {
+		/* Can we combine the resource range information? */
+		if ((info->caching == address64.info.mem.caching) &&
+		    (info->write_protect == address64.info.mem.write_protect) &&
+		    (info->start_addr + info->length == address64.minimum)) {
+			info->length += address64.address_length;
+			return AE_OK;
+		}
+	}
+
+	new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL);
+	if (!new)
+		return AE_ERROR;
+
+	INIT_LIST_HEAD(&new->list);
+	new->caching = address64.info.mem.caching;
+	new->write_protect = address64.info.mem.write_protect;
+	new->start_addr = address64.minimum;
+	new->length = address64.address_length;
+	list_add_tail(&new->list, &mem_device->res_list);
+
+	return AE_OK;
+}
+
 static int
 acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
 {
 	acpi_status status;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_resource *resource = NULL;
-	struct acpi_resource_address64 address64;
+	struct acpi_memory_info *info, *n;
 
 	ACPI_FUNCTION_TRACE("acpi_memory_get_device_resources");
 
-	/* Get the range from the _CRS */
-	status = acpi_get_current_resources(mem_device->handle, &buffer);
-	if (ACPI_FAILURE(status))
-		return_VALUE(-EINVAL);
-
-	resource = (struct acpi_resource *)buffer.pointer;
-	status = acpi_resource_to_address64(resource, &address64);
-	if (ACPI_SUCCESS(status)) {
-		if (address64.resource_type == ACPI_MEMORY_RANGE) {
-			/* Populate the structure */
-			mem_device->caching = address64.info.mem.caching;
-			mem_device->write_protect =
-			    address64.info.mem.write_protect;
-			mem_device->start_addr = address64.minimum;
-			mem_device->length = address64.address_length;
-		}
+	status = acpi_walk_resources(mem_device->handle, METHOD_NAME__CRS,
+				     acpi_memory_get_resource, mem_device);
+	if (ACPI_FAILURE(status)) {
+		list_for_each_entry_safe(info, n, &mem_device->res_list, list)
+			kfree(info);
+		return -EINVAL;
 	}
 
-	acpi_os_free(buffer.pointer);
-	return_VALUE(0);
+	return 0;
 }
 
 static int
@@ -181,7 +211,8 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
 
 static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 {
-	int result;
+	int result, num_enabled = 0;
+	struct acpi_memory_info *info;
 
 	ACPI_FUNCTION_TRACE("acpi_memory_enable_device");
 
@@ -197,12 +228,20 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 	/*
 	 * Tell the VM there is more memory here...
 	 * Note: Assume that this function returns zero on success
+	 * We don't have memory-hot-add rollback function,now.
+	 * (i.e. memory-hot-remove function)
 	 */
-	result = add_memory(mem_device->start_addr, mem_device->length);
-	if (result) {
+	list_for_each_entry(info, &mem_device->res_list, list) {
+		result = add_memory(info->start_addr, info->length);
+		if (result)
+			continue;
+		info->enabled = 1;
+		num_enabled++;
+	}
+	if (!num_enabled) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "\nadd_memory failed\n"));
 		mem_device->state = MEMORY_INVALID_STATE;
-		return result;
+		return -EINVAL;
 	}
 
 	return result;
@@ -246,8 +285,7 @@ static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device)
 static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
 {
 	int result;
-	u64 start = mem_device->start_addr;
-	u64 len = mem_device->length;
+	struct acpi_memory_info *info, *n;
 
 	ACPI_FUNCTION_TRACE("acpi_memory_disable_device");
 
@@ -255,10 +293,13 @@ static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
 	 * Ask the VM to offline this memory range.
 	 * Note: Assume that this function returns zero on success
 	 */
-	result = remove_memory(start, len);
-	if (result) {
-		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Hot-Remove failed.\n"));
-		return_VALUE(result);
+	list_for_each_entry_safe(info, n, &mem_device->res_list, list) {
+		if (info->enabled) {
+			result = remove_memory(info->start_addr, info->length);
+			if (result)
+				return result;
+		}
+		kfree(info);
 	}
 
 	/* Power-off and eject the device */
@@ -356,6 +397,7 @@ static int acpi_memory_device_add(struct acpi_device *device)
 		return_VALUE(-ENOMEM);
 	memset(mem_device, 0, sizeof(struct acpi_memory_device));
 
+	INIT_LIST_HEAD(&mem_device->res_list);
 	mem_device->handle = device->handle;
 	sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
-- 
cgit v1.2.2


From 1f425994f96d85540d47eee98daabc1e211b454e Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Tue, 27 Jun 2006 02:53:28 -0700
Subject: [PATCH] Catch notification of memory add event of ACPI via container
 driver. (register start func for memory device)

This is a patch to call add_memroy() when notify reaches for new node's add
event.

When new node is added, notify of ACPI reaches container device which means
the node.

Container device driver calls acpi_bus_scan() to find and add belonging
devices (which means cpu, memory and so on).  Its function calls add and
start function of belonging devices's driver.

Howevever, current memory hotplug driver just register add function to
create sysfs file for its memory.  But, acpi_memory_enable_device() is not
called because it is considered just the case that notify reaches memory
device directly.  So, if notify reaches container device nothing can call
add_memory().

This is a patch to create start function which calls add_memory().
add_memory() can be called by this when notify reaches container device.

[akpm@osdl.org: coding cleanups]
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: "Brown, Len" <len.brown@intel.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/acpi/acpi_memhotplug.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'drivers/acpi')

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 1486e03bb41a..3721f8dd0de3 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -57,6 +57,7 @@ MODULE_LICENSE("GPL");
 
 static int acpi_memory_device_add(struct acpi_device *device);
 static int acpi_memory_device_remove(struct acpi_device *device, int type);
+static int acpi_memory_device_start(struct acpi_device *device);
 
 static struct acpi_driver acpi_memory_device_driver = {
 	.name = ACPI_MEMORY_DEVICE_DRIVER_NAME,
@@ -65,6 +66,7 @@ static struct acpi_driver acpi_memory_device_driver = {
 	.ops = {
 		.add = acpi_memory_device_add,
 		.remove = acpi_memory_device_remove,
+		.start = acpi_memory_device_start,
 		},
 };
 
@@ -433,6 +435,25 @@ static int acpi_memory_device_remove(struct acpi_device *device, int type)
 	return_VALUE(0);
 }
 
+static int acpi_memory_device_start (struct acpi_device *device)
+{
+	struct acpi_memory_device *mem_device;
+	int result = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_memory_device_start");
+
+	mem_device = acpi_driver_data(device);
+
+	if (!acpi_memory_check_device(mem_device)) {
+		/* call add_memory func */
+		result = acpi_memory_enable_device(mem_device);
+		if (result)
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				"Error in acpi_memory_enable_device\n"));
+	}
+	return_VALUE(result);
+}
+
 /*
  * Helper function to check for memory device
  */
-- 
cgit v1.2.2


From dd56a8e36f91f63c0a31e8a118d87b7cf01526b8 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Tue, 27 Jun 2006 02:53:29 -0700
Subject: [PATCH] Catch notification of memory add event of ACPI via container
 driver. (avoid redundant call add_memory)

When acpi_memory_device_init() is called at boottime to register struct
memory acpi_memory_device, acpi_bus_add() are called via
acpi_driver_attach().

But it also calls ops->start() function.  It is called even if the memory
blocks are initialized at early boottime.  In this case add_memory() return
-EEXIST, and the memory blocks becomes INVALID state even if it is normal.

This is patch to avoid calling add_memory() for already available memory.

[akpm@osdl.org: coding cleanups]
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: "Brown, Len" <len.brown@intel.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/acpi/acpi_memhotplug.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'drivers/acpi')

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 3721f8dd0de3..5652569b3762 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -234,6 +234,17 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 	 * (i.e. memory-hot-remove function)
 	 */
 	list_for_each_entry(info, &mem_device->res_list, list) {
+		u64 start_pfn, end_pfn;
+
+		start_pfn = info->start_addr >> PAGE_SHIFT;
+		end_pfn = (info->start_addr + info->length - 1) >> PAGE_SHIFT;
+
+		if (pfn_valid(start_pfn) || pfn_valid(end_pfn)) {
+			/* already enabled. try next area */
+			num_enabled++;
+			continue;
+		}
+
 		result = add_memory(info->start_addr, info->length);
 		if (result)
 			continue;
-- 
cgit v1.2.2


From bc02af93dd2bbddce1b55e0a493f833a1b7cf140 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Tue, 27 Jun 2006 02:53:30 -0700
Subject: [PATCH] pgdat allocation for new node add (specify node id)

Change the name of old add_memory() to arch_add_memory.  And use node id to
get pgdat for the node at NODE_DATA().

Note: Powerpc's old add_memory() is defined as __devinit. However,
      add_memory() is usually called only after bootup.
      I suppose it may be redundant. But, I'm not well known about powerpc.
      So, I keep it. (But, __meminit is better at least.)

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/acpi/Kconfig           | 2 +-
 drivers/acpi/acpi_memhotplug.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/acpi')

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 94b8d820c512..610d2cc02cf8 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -328,7 +328,7 @@ config ACPI_CONTAINER
 config ACPI_HOTPLUG_MEMORY
 	tristate "Memory Hotplug"
 	depends on ACPI
-	depends on MEMORY_HOTPLUG || X86_64
+	depends on MEMORY_HOTPLUG
 	default n
 	help
 	  This driver adds supports for ACPI Memory Hotplug.  This driver
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 5652569b3762..0424326eae15 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -215,6 +215,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 {
 	int result, num_enabled = 0;
 	struct acpi_memory_info *info;
+	int node = 0;
 
 	ACPI_FUNCTION_TRACE("acpi_memory_enable_device");
 
@@ -245,7 +246,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 			continue;
 		}
 
-		result = add_memory(info->start_addr, info->length);
+		result = add_memory(node, info->start_addr, info->length);
 		if (result)
 			continue;
 		info->enabled = 1;
-- 
cgit v1.2.2


From 1e3590e2e4a38e8390fdac5bda23330bf2801838 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Tue, 27 Jun 2006 02:53:31 -0700
Subject: [PATCH] pgdat allocation for new node add (get node id by acpi)

This is to find node id from acpi's handle of memory_device in DSDT.  _PXM for
the new node can be found by acpi_get_pxm() by using new memory's handle.  So,
node id can be found by pxm_to_nid_map[].

  This patch becomes simpler than v2 of node hot-add patch.
  Because old add_memory() function doesn't have node id parameter.
  So, kernel must find its handle by physical address via DSDT again.
  But, v3 just give node id to add_memory() now.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/acpi/acpi_memhotplug.c |  3 ++-
 drivers/acpi/numa.c            | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'drivers/acpi')

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 0424326eae15..1012284ff4f7 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -215,7 +215,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 {
 	int result, num_enabled = 0;
 	struct acpi_memory_info *info;
-	int node = 0;
+	int node;
 
 	ACPI_FUNCTION_TRACE("acpi_memory_enable_device");
 
@@ -228,6 +228,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 		return result;
 	}
 
+	node = acpi_get_node(mem_device->handle);
 	/*
 	 * Tell the VM there is more memory here...
 	 * Note: Assume that this function returns zero on success
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index e2c1a16078c9..13d6d5bdea26 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -254,5 +254,18 @@ int acpi_get_pxm(acpi_handle h)
 	} while (ACPI_SUCCESS(status));
 	return -1;
 }
-
 EXPORT_SYMBOL(acpi_get_pxm);
+
+int acpi_get_node(acpi_handle *handle)
+{
+	int pxm, node = -1;
+
+	ACPI_FUNCTION_TRACE("acpi_get_node");
+
+	pxm = acpi_get_pxm(handle);
+	if (pxm >= 0)
+		node = acpi_map_pxm_to_node(pxm);
+
+	return_VALUE(node);
+}
+EXPORT_SYMBOL(acpi_get_node);
-- 
cgit v1.2.2