diff options
Diffstat (limited to 'drivers')
128 files changed, 4677 insertions, 1850 deletions
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 7a0f4aa4fa1e..9a62224cc278 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c | |||
| @@ -38,6 +38,9 @@ | |||
| 38 | 38 | ||
| 39 | #define _COMPONENT ACPI_MEMORY_DEVICE_COMPONENT | 39 | #define _COMPONENT ACPI_MEMORY_DEVICE_COMPONENT |
| 40 | 40 | ||
| 41 | #undef PREFIX | ||
| 42 | #define PREFIX "ACPI:memory_hp:" | ||
| 43 | |||
| 41 | ACPI_MODULE_NAME("acpi_memhotplug"); | 44 | ACPI_MODULE_NAME("acpi_memhotplug"); |
| 42 | MODULE_AUTHOR("Naveen B S <naveen.b.s@intel.com>"); | 45 | MODULE_AUTHOR("Naveen B S <naveen.b.s@intel.com>"); |
| 43 | MODULE_DESCRIPTION("Hotplug Mem Driver"); | 46 | MODULE_DESCRIPTION("Hotplug Mem Driver"); |
| @@ -153,6 +156,7 @@ acpi_memory_get_device(acpi_handle handle, | |||
| 153 | acpi_handle phandle; | 156 | acpi_handle phandle; |
| 154 | struct acpi_device *device = NULL; | 157 | struct acpi_device *device = NULL; |
| 155 | struct acpi_device *pdevice = NULL; | 158 | struct acpi_device *pdevice = NULL; |
| 159 | int result; | ||
| 156 | 160 | ||
| 157 | 161 | ||
| 158 | if (!acpi_bus_get_device(handle, &device) && device) | 162 | if (!acpi_bus_get_device(handle, &device) && device) |
| @@ -165,9 +169,9 @@ acpi_memory_get_device(acpi_handle handle, | |||
| 165 | } | 169 | } |
| 166 | 170 | ||
| 167 | /* Get the parent device */ | 171 | /* Get the parent device */ |
| 168 | status = acpi_bus_get_device(phandle, &pdevice); | 172 | result = acpi_bus_get_device(phandle, &pdevice); |
| 169 | if (ACPI_FAILURE(status)) { | 173 | if (result) { |
| 170 | ACPI_EXCEPTION((AE_INFO, status, "Cannot get acpi bus device")); | 174 | printk(KERN_WARNING PREFIX "Cannot get acpi bus device"); |
| 171 | return -EINVAL; | 175 | return -EINVAL; |
| 172 | } | 176 | } |
| 173 | 177 | ||
| @@ -175,9 +179,9 @@ acpi_memory_get_device(acpi_handle handle, | |||
| 175 | * Now add the notified device. This creates the acpi_device | 179 | * Now add the notified device. This creates the acpi_device |
| 176 | * and invokes .add function | 180 | * and invokes .add function |
| 177 | */ | 181 | */ |
| 178 | status = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE); | 182 | result = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE); |
| 179 | if (ACPI_FAILURE(status)) { | 183 | if (result) { |
| 180 | ACPI_EXCEPTION((AE_INFO, status, "Cannot add acpi bus")); | 184 | printk(KERN_WARNING PREFIX "Cannot add acpi bus"); |
| 181 | return -EINVAL; | 185 | return -EINVAL; |
| 182 | } | 186 | } |
| 183 | 187 | ||
| @@ -238,7 +242,12 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) | |||
| 238 | num_enabled++; | 242 | num_enabled++; |
| 239 | continue; | 243 | continue; |
| 240 | } | 244 | } |
| 241 | 245 | /* | |
| 246 | * If the memory block size is zero, please ignore it. | ||
| 247 | * Don't try to do the following memory hotplug flowchart. | ||
| 248 | */ | ||
| 249 | if (!info->length) | ||
| 250 | continue; | ||
| 242 | if (node < 0) | 251 | if (node < 0) |
| 243 | node = memory_add_physaddr_to_nid(info->start_addr); | 252 | node = memory_add_physaddr_to_nid(info->start_addr); |
| 244 | 253 | ||
| @@ -253,8 +262,15 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) | |||
| 253 | mem_device->state = MEMORY_INVALID_STATE; | 262 | mem_device->state = MEMORY_INVALID_STATE; |
| 254 | return -EINVAL; | 263 | return -EINVAL; |
| 255 | } | 264 | } |
| 256 | 265 | /* | |
| 257 | return result; | 266 | * Sometimes the memory device will contain several memory blocks. |
| 267 | * When one memory block is hot-added to the system memory, it will | ||
| 268 | * be regarded as a success. | ||
| 269 | * Otherwise if the last memory block can't be hot-added to the system | ||
| 270 | * memory, it will be failure and the memory device can't be bound with | ||
| 271 | * driver. | ||
| 272 | */ | ||
| 273 | return 0; | ||
| 258 | } | 274 | } |
| 259 | 275 | ||
| 260 | static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device) | 276 | static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device) |
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h index 544dcf834922..eb6f038b03d9 100644 --- a/drivers/acpi/acpica/acobject.h +++ b/drivers/acpi/acpica/acobject.h | |||
| @@ -97,6 +97,7 @@ | |||
| 97 | #define AOPOBJ_OBJECT_INITIALIZED 0x08 | 97 | #define AOPOBJ_OBJECT_INITIALIZED 0x08 |
| 98 | #define AOPOBJ_SETUP_COMPLETE 0x10 | 98 | #define AOPOBJ_SETUP_COMPLETE 0x10 |
| 99 | #define AOPOBJ_SINGLE_DATUM 0x20 | 99 | #define AOPOBJ_SINGLE_DATUM 0x20 |
| 100 | #define AOPOBJ_INVALID 0x40 /* Used if host OS won't allow an op_region address */ | ||
| 100 | 101 | ||
| 101 | /****************************************************************************** | 102 | /****************************************************************************** |
| 102 | * | 103 | * |
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c index 584d766e6f12..b79978f7bc71 100644 --- a/drivers/acpi/acpica/dsopcode.c +++ b/drivers/acpi/acpica/dsopcode.c | |||
| @@ -397,6 +397,30 @@ acpi_status acpi_ds_get_region_arguments(union acpi_operand_object *obj_desc) | |||
| 397 | status = acpi_ds_execute_arguments(node, acpi_ns_get_parent_node(node), | 397 | status = acpi_ds_execute_arguments(node, acpi_ns_get_parent_node(node), |
| 398 | extra_desc->extra.aml_length, | 398 | extra_desc->extra.aml_length, |
| 399 | extra_desc->extra.aml_start); | 399 | extra_desc->extra.aml_start); |
| 400 | if (ACPI_FAILURE(status)) { | ||
| 401 | return_ACPI_STATUS(status); | ||
| 402 | } | ||
| 403 | |||
| 404 | /* Validate the region address/length via the host OS */ | ||
| 405 | |||
| 406 | status = acpi_os_validate_address(obj_desc->region.space_id, | ||
| 407 | obj_desc->region.address, | ||
| 408 | (acpi_size) obj_desc->region.length, | ||
| 409 | acpi_ut_get_node_name(node)); | ||
| 410 | |||
| 411 | if (ACPI_FAILURE(status)) { | ||
| 412 | /* | ||
| 413 | * Invalid address/length. We will emit an error message and mark | ||
| 414 | * the region as invalid, so that it will cause an additional error if | ||
| 415 | * it is ever used. Then return AE_OK. | ||
| 416 | */ | ||
| 417 | ACPI_EXCEPTION((AE_INFO, status, | ||
| 418 | "During address validation of OpRegion [%4.4s]", | ||
| 419 | node->name.ascii)); | ||
| 420 | obj_desc->common.flags |= AOPOBJ_INVALID; | ||
| 421 | status = AE_OK; | ||
| 422 | } | ||
| 423 | |||
| 400 | return_ACPI_STATUS(status); | 424 | return_ACPI_STATUS(status); |
| 401 | } | 425 | } |
| 402 | 426 | ||
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index d4075b821021..6687be167f5f 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c | |||
| @@ -113,6 +113,12 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc, | |||
| 113 | } | 113 | } |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | /* Exit if Address/Length have been disallowed by the host OS */ | ||
| 117 | |||
| 118 | if (rgn_desc->common.flags & AOPOBJ_INVALID) { | ||
| 119 | return_ACPI_STATUS(AE_AML_ILLEGAL_ADDRESS); | ||
| 120 | } | ||
| 121 | |||
| 116 | /* | 122 | /* |
| 117 | * Exit now for SMBus address space, it has a non-linear address space | 123 | * Exit now for SMBus address space, it has a non-linear address space |
| 118 | * and the request cannot be directly validated | 124 | * and the request cannot be directly validated |
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 71670719d61a..5691f165a952 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c | |||
| @@ -189,11 +189,36 @@ acpi_status __init acpi_os_initialize(void) | |||
| 189 | return AE_OK; | 189 | return AE_OK; |
| 190 | } | 190 | } |
| 191 | 191 | ||
| 192 | static void bind_to_cpu0(struct work_struct *work) | ||
| 193 | { | ||
| 194 | set_cpus_allowed(current, cpumask_of_cpu(0)); | ||
| 195 | kfree(work); | ||
| 196 | } | ||
| 197 | |||
| 198 | static void bind_workqueue(struct workqueue_struct *wq) | ||
| 199 | { | ||
| 200 | struct work_struct *work; | ||
| 201 | |||
| 202 | work = kzalloc(sizeof(struct work_struct), GFP_KERNEL); | ||
| 203 | INIT_WORK(work, bind_to_cpu0); | ||
| 204 | queue_work(wq, work); | ||
| 205 | } | ||
| 206 | |||
| 192 | acpi_status acpi_os_initialize1(void) | 207 | acpi_status acpi_os_initialize1(void) |
| 193 | { | 208 | { |
| 209 | /* | ||
| 210 | * On some machines, a software-initiated SMI causes corruption unless | ||
| 211 | * the SMI runs on CPU 0. An SMI can be initiated by any AML, but | ||
| 212 | * typically it's done in GPE-related methods that are run via | ||
| 213 | * workqueues, so we can avoid the known corruption cases by binding | ||
| 214 | * the workqueues to CPU 0. | ||
| 215 | */ | ||
| 194 | kacpid_wq = create_singlethread_workqueue("kacpid"); | 216 | kacpid_wq = create_singlethread_workqueue("kacpid"); |
| 217 | bind_workqueue(kacpid_wq); | ||
| 195 | kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); | 218 | kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); |
| 219 | bind_workqueue(kacpi_notify_wq); | ||
| 196 | kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug"); | 220 | kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug"); |
| 221 | bind_workqueue(kacpi_hotplug_wq); | ||
| 197 | BUG_ON(!kacpid_wq); | 222 | BUG_ON(!kacpid_wq); |
| 198 | BUG_ON(!kacpi_notify_wq); | 223 | BUG_ON(!kacpi_notify_wq); |
| 199 | BUG_ON(!kacpi_hotplug_wq); | 224 | BUG_ON(!kacpi_hotplug_wq); |
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index 0944daec064f..9c61ab2177cf 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c | |||
| @@ -121,7 +121,7 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr, | |||
| 121 | table_attr->attr.size = 0; | 121 | table_attr->attr.size = 0; |
| 122 | table_attr->attr.read = acpi_table_show; | 122 | table_attr->attr.read = acpi_table_show; |
| 123 | table_attr->attr.attr.name = table_attr->name; | 123 | table_attr->attr.attr.name = table_attr->name; |
| 124 | table_attr->attr.attr.mode = 0444; | 124 | table_attr->attr.attr.mode = 0400; |
| 125 | 125 | ||
| 126 | return; | 126 | return; |
| 127 | } | 127 | } |
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index f703f5478246..6d7fbaa92248 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c | |||
| @@ -36,7 +36,6 @@ | |||
| 36 | 36 | ||
| 37 | /* Register offsets */ | 37 | /* Register offsets */ |
| 38 | #define MG_BUFF_OFFSET 0x8000 | 38 | #define MG_BUFF_OFFSET 0x8000 |
| 39 | #define MG_STORAGE_BUFFER_SIZE 0x200 | ||
| 40 | #define MG_REG_OFFSET 0xC000 | 39 | #define MG_REG_OFFSET 0xC000 |
| 41 | #define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ | 40 | #define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ |
| 42 | #define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ | 41 | #define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ |
| @@ -219,6 +218,16 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) | |||
| 219 | host->error = MG_ERR_NONE; | 218 | host->error = MG_ERR_NONE; |
| 220 | expire = jiffies + msecs_to_jiffies(msec); | 219 | expire = jiffies + msecs_to_jiffies(msec); |
| 221 | 220 | ||
| 221 | /* These 2 times dummy status read prevents reading invalid | ||
| 222 | * status. A very little time (3 times of mflash operating clk) | ||
| 223 | * is required for busy bit is set. Use dummy read instead of | ||
| 224 | * busy wait, because mflash's PLL is machine dependent. | ||
| 225 | */ | ||
| 226 | if (prv_data->use_polling) { | ||
| 227 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | ||
| 228 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | ||
| 229 | } | ||
| 230 | |||
| 222 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | 231 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); |
| 223 | 232 | ||
| 224 | do { | 233 | do { |
| @@ -245,8 +254,6 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) | |||
| 245 | mg_dump_status("not ready", status, host); | 254 | mg_dump_status("not ready", status, host); |
| 246 | return MG_ERR_INV_STAT; | 255 | return MG_ERR_INV_STAT; |
| 247 | } | 256 | } |
| 248 | if (prv_data->use_polling) | ||
| 249 | msleep(1); | ||
| 250 | 257 | ||
| 251 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | 258 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); |
| 252 | } while (time_before(cur_jiffies, expire)); | 259 | } while (time_before(cur_jiffies, expire)); |
| @@ -469,9 +476,18 @@ static unsigned int mg_out(struct mg_host *host, | |||
| 469 | return MG_ERR_NONE; | 476 | return MG_ERR_NONE; |
| 470 | } | 477 | } |
| 471 | 478 | ||
| 479 | static void mg_read_one(struct mg_host *host, struct request *req) | ||
| 480 | { | ||
| 481 | u16 *buff = (u16 *)req->buffer; | ||
| 482 | u32 i; | ||
| 483 | |||
| 484 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | ||
| 485 | *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + | ||
| 486 | (i << 1)); | ||
| 487 | } | ||
| 488 | |||
| 472 | static void mg_read(struct request *req) | 489 | static void mg_read(struct request *req) |
| 473 | { | 490 | { |
| 474 | u32 j; | ||
| 475 | struct mg_host *host = req->rq_disk->private_data; | 491 | struct mg_host *host = req->rq_disk->private_data; |
| 476 | 492 | ||
| 477 | if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), | 493 | if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), |
| @@ -482,49 +498,65 @@ static void mg_read(struct request *req) | |||
| 482 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); | 498 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); |
| 483 | 499 | ||
| 484 | do { | 500 | do { |
| 485 | u16 *buff = (u16 *)req->buffer; | ||
| 486 | |||
| 487 | if (mg_wait(host, ATA_DRQ, | 501 | if (mg_wait(host, ATA_DRQ, |
| 488 | MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { | 502 | MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { |
| 489 | mg_bad_rw_intr(host); | 503 | mg_bad_rw_intr(host); |
| 490 | return; | 504 | return; |
| 491 | } | 505 | } |
| 492 | for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) | 506 | |
| 493 | *buff++ = inw((unsigned long)host->dev_base + | 507 | mg_read_one(host, req); |
| 494 | MG_BUFF_OFFSET + (j << 1)); | ||
| 495 | 508 | ||
| 496 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + | 509 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + |
| 497 | MG_REG_COMMAND); | 510 | MG_REG_COMMAND); |
| 498 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); | 511 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); |
| 499 | } | 512 | } |
| 500 | 513 | ||
| 514 | static void mg_write_one(struct mg_host *host, struct request *req) | ||
| 515 | { | ||
| 516 | u16 *buff = (u16 *)req->buffer; | ||
| 517 | u32 i; | ||
| 518 | |||
| 519 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | ||
| 520 | outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET + | ||
| 521 | (i << 1)); | ||
| 522 | } | ||
| 523 | |||
| 501 | static void mg_write(struct request *req) | 524 | static void mg_write(struct request *req) |
| 502 | { | 525 | { |
| 503 | u32 j; | ||
| 504 | struct mg_host *host = req->rq_disk->private_data; | 526 | struct mg_host *host = req->rq_disk->private_data; |
| 527 | unsigned int rem = blk_rq_sectors(req); | ||
| 505 | 528 | ||
| 506 | if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), | 529 | if (mg_out(host, blk_rq_pos(req), rem, |
| 507 | MG_CMD_WR, NULL) != MG_ERR_NONE) { | 530 | MG_CMD_WR, NULL) != MG_ERR_NONE) { |
| 508 | mg_bad_rw_intr(host); | 531 | mg_bad_rw_intr(host); |
| 509 | return; | 532 | return; |
| 510 | } | 533 | } |
| 511 | 534 | ||
| 512 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", | 535 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", |
| 513 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); | 536 | rem, blk_rq_pos(req), req->buffer); |
| 537 | |||
| 538 | if (mg_wait(host, ATA_DRQ, | ||
| 539 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | ||
| 540 | mg_bad_rw_intr(host); | ||
| 541 | return; | ||
| 542 | } | ||
| 514 | 543 | ||
| 515 | do { | 544 | do { |
| 516 | u16 *buff = (u16 *)req->buffer; | 545 | mg_write_one(host, req); |
| 517 | 546 | ||
| 518 | if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | 547 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + |
| 548 | MG_REG_COMMAND); | ||
| 549 | |||
| 550 | rem--; | ||
| 551 | if (rem > 1 && mg_wait(host, ATA_DRQ, | ||
| 552 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | ||
| 553 | mg_bad_rw_intr(host); | ||
| 554 | return; | ||
| 555 | } else if (mg_wait(host, MG_STAT_READY, | ||
| 556 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | ||
| 519 | mg_bad_rw_intr(host); | 557 | mg_bad_rw_intr(host); |
| 520 | return; | 558 | return; |
| 521 | } | 559 | } |
| 522 | for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) | ||
| 523 | outw(*buff++, (unsigned long)host->dev_base + | ||
| 524 | MG_BUFF_OFFSET + (j << 1)); | ||
| 525 | |||
| 526 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + | ||
| 527 | MG_REG_COMMAND); | ||
| 528 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); | 560 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); |
| 529 | } | 561 | } |
| 530 | 562 | ||
| @@ -532,7 +564,6 @@ static void mg_read_intr(struct mg_host *host) | |||
| 532 | { | 564 | { |
| 533 | struct request *req = host->req; | 565 | struct request *req = host->req; |
| 534 | u32 i; | 566 | u32 i; |
| 535 | u16 *buff; | ||
| 536 | 567 | ||
| 537 | /* check status */ | 568 | /* check status */ |
| 538 | do { | 569 | do { |
| @@ -550,13 +581,7 @@ static void mg_read_intr(struct mg_host *host) | |||
| 550 | return; | 581 | return; |
| 551 | 582 | ||
| 552 | ok_to_read: | 583 | ok_to_read: |
| 553 | /* get current segment of request */ | 584 | mg_read_one(host, req); |
| 554 | buff = (u16 *)req->buffer; | ||
| 555 | |||
| 556 | /* read 1 sector */ | ||
| 557 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | ||
| 558 | *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + | ||
| 559 | (i << 1)); | ||
| 560 | 585 | ||
| 561 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 586 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
| 562 | blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); | 587 | blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); |
| @@ -575,8 +600,7 @@ ok_to_read: | |||
| 575 | static void mg_write_intr(struct mg_host *host) | 600 | static void mg_write_intr(struct mg_host *host) |
| 576 | { | 601 | { |
| 577 | struct request *req = host->req; | 602 | struct request *req = host->req; |
| 578 | u32 i, j; | 603 | u32 i; |
| 579 | u16 *buff; | ||
| 580 | bool rem; | 604 | bool rem; |
| 581 | 605 | ||
| 582 | /* check status */ | 606 | /* check status */ |
| @@ -597,12 +621,7 @@ static void mg_write_intr(struct mg_host *host) | |||
| 597 | ok_to_write: | 621 | ok_to_write: |
| 598 | if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) { | 622 | if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) { |
| 599 | /* write 1 sector and set handler if remains */ | 623 | /* write 1 sector and set handler if remains */ |
| 600 | buff = (u16 *)req->buffer; | 624 | mg_write_one(host, req); |
| 601 | for (j = 0; j < MG_STORAGE_BUFFER_SIZE >> 1; j++) { | ||
| 602 | outw(*buff, (unsigned long)host->dev_base + | ||
| 603 | MG_BUFF_OFFSET + (j << 1)); | ||
| 604 | buff++; | ||
| 605 | } | ||
| 606 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 625 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
| 607 | blk_rq_pos(req), blk_rq_sectors(req), req->buffer); | 626 | blk_rq_pos(req), blk_rq_sectors(req), req->buffer); |
| 608 | host->mg_do_intr = mg_write_intr; | 627 | host->mg_do_intr = mg_write_intr; |
| @@ -667,9 +686,6 @@ static unsigned int mg_issue_req(struct request *req, | |||
| 667 | unsigned int sect_num, | 686 | unsigned int sect_num, |
| 668 | unsigned int sect_cnt) | 687 | unsigned int sect_cnt) |
| 669 | { | 688 | { |
| 670 | u16 *buff; | ||
| 671 | u32 i; | ||
| 672 | |||
| 673 | switch (rq_data_dir(req)) { | 689 | switch (rq_data_dir(req)) { |
| 674 | case READ: | 690 | case READ: |
| 675 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) | 691 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) |
| @@ -693,12 +709,7 @@ static unsigned int mg_issue_req(struct request *req, | |||
| 693 | mg_bad_rw_intr(host); | 709 | mg_bad_rw_intr(host); |
| 694 | return host->error; | 710 | return host->error; |
| 695 | } | 711 | } |
| 696 | buff = (u16 *)req->buffer; | 712 | mg_write_one(host, req); |
| 697 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) { | ||
| 698 | outw(*buff, (unsigned long)host->dev_base + | ||
| 699 | MG_BUFF_OFFSET + (i << 1)); | ||
| 700 | buff++; | ||
| 701 | } | ||
| 702 | mod_timer(&host->timer, jiffies + 3 * HZ); | 713 | mod_timer(&host->timer, jiffies + 3 * HZ); |
| 703 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + | 714 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + |
| 704 | MG_REG_COMMAND); | 715 | MG_REG_COMMAND); |
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index f4bb43fb8016..e077701ae3d9 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c | |||
| @@ -225,7 +225,7 @@ static const struct agp_bridge_driver parisc_agp_driver = { | |||
| 225 | .configure = parisc_agp_configure, | 225 | .configure = parisc_agp_configure, |
| 226 | .fetch_size = parisc_agp_fetch_size, | 226 | .fetch_size = parisc_agp_fetch_size, |
| 227 | .tlb_flush = parisc_agp_tlbflush, | 227 | .tlb_flush = parisc_agp_tlbflush, |
| 228 | .mask_memory = parisc_agp_mask_memory, | 228 | .mask_memory = parisc_agp_page_mask_memory, |
| 229 | .masks = parisc_agp_masks, | 229 | .masks = parisc_agp_masks, |
| 230 | .agp_enable = parisc_agp_enable, | 230 | .agp_enable = parisc_agp_enable, |
| 231 | .cache_flush = global_cache_flush, | 231 | .cache_flush = global_cache_flush, |
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index acd76b767d4c..1733d3439ad2 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c | |||
| @@ -48,6 +48,41 @@ static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); | |||
| 48 | /* Line disc dispatch table */ | 48 | /* Line disc dispatch table */ |
| 49 | static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; | 49 | static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; |
| 50 | 50 | ||
| 51 | static inline struct tty_ldisc *get_ldisc(struct tty_ldisc *ld) | ||
| 52 | { | ||
| 53 | if (ld) | ||
| 54 | atomic_inc(&ld->users); | ||
| 55 | return ld; | ||
| 56 | } | ||
| 57 | |||
| 58 | static void put_ldisc(struct tty_ldisc *ld) | ||
| 59 | { | ||
| 60 | unsigned long flags; | ||
| 61 | |||
| 62 | if (WARN_ON_ONCE(!ld)) | ||
| 63 | return; | ||
| 64 | |||
| 65 | /* | ||
| 66 | * If this is the last user, free the ldisc, and | ||
| 67 | * release the ldisc ops. | ||
| 68 | * | ||
| 69 | * We really want an "atomic_dec_and_lock_irqsave()", | ||
| 70 | * but we don't have it, so this does it by hand. | ||
| 71 | */ | ||
| 72 | local_irq_save(flags); | ||
| 73 | if (atomic_dec_and_lock(&ld->users, &tty_ldisc_lock)) { | ||
| 74 | struct tty_ldisc_ops *ldo = ld->ops; | ||
| 75 | |||
| 76 | ldo->refcount--; | ||
| 77 | module_put(ldo->owner); | ||
| 78 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
| 79 | |||
| 80 | kfree(ld); | ||
| 81 | return; | ||
| 82 | } | ||
| 83 | local_irq_restore(flags); | ||
| 84 | } | ||
| 85 | |||
| 51 | /** | 86 | /** |
| 52 | * tty_register_ldisc - install a line discipline | 87 | * tty_register_ldisc - install a line discipline |
| 53 | * @disc: ldisc number | 88 | * @disc: ldisc number |
| @@ -142,7 +177,7 @@ static struct tty_ldisc *tty_ldisc_try_get(int disc) | |||
| 142 | /* lock it */ | 177 | /* lock it */ |
| 143 | ldops->refcount++; | 178 | ldops->refcount++; |
| 144 | ld->ops = ldops; | 179 | ld->ops = ldops; |
| 145 | ld->refcount = 0; | 180 | atomic_set(&ld->users, 1); |
| 146 | err = 0; | 181 | err = 0; |
| 147 | } | 182 | } |
| 148 | } | 183 | } |
| @@ -181,35 +216,6 @@ static struct tty_ldisc *tty_ldisc_get(int disc) | |||
| 181 | return ld; | 216 | return ld; |
| 182 | } | 217 | } |
| 183 | 218 | ||
| 184 | /** | ||
| 185 | * tty_ldisc_put - drop ldisc reference | ||
| 186 | * @ld: ldisc | ||
| 187 | * | ||
| 188 | * Drop a reference to a line discipline. Manage refcounts and | ||
| 189 | * module usage counts. Free the ldisc once the recount hits zero. | ||
| 190 | * | ||
| 191 | * Locking: | ||
| 192 | * takes tty_ldisc_lock to guard against ldisc races | ||
| 193 | */ | ||
| 194 | |||
| 195 | static void tty_ldisc_put(struct tty_ldisc *ld) | ||
| 196 | { | ||
| 197 | unsigned long flags; | ||
| 198 | int disc = ld->ops->num; | ||
| 199 | struct tty_ldisc_ops *ldo; | ||
| 200 | |||
| 201 | BUG_ON(disc < N_TTY || disc >= NR_LDISCS); | ||
| 202 | |||
| 203 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
| 204 | ldo = tty_ldiscs[disc]; | ||
| 205 | BUG_ON(ldo->refcount == 0); | ||
| 206 | ldo->refcount--; | ||
| 207 | module_put(ldo->owner); | ||
| 208 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
| 209 | WARN_ON(ld->refcount); | ||
| 210 | kfree(ld); | ||
| 211 | } | ||
| 212 | |||
| 213 | static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) | 219 | static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) |
| 214 | { | 220 | { |
| 215 | return (*pos < NR_LDISCS) ? pos : NULL; | 221 | return (*pos < NR_LDISCS) ? pos : NULL; |
| @@ -234,7 +240,7 @@ static int tty_ldiscs_seq_show(struct seq_file *m, void *v) | |||
| 234 | if (IS_ERR(ld)) | 240 | if (IS_ERR(ld)) |
| 235 | return 0; | 241 | return 0; |
| 236 | seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); | 242 | seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); |
| 237 | tty_ldisc_put(ld); | 243 | put_ldisc(ld); |
| 238 | return 0; | 244 | return 0; |
| 239 | } | 245 | } |
| 240 | 246 | ||
| @@ -288,20 +294,17 @@ static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) | |||
| 288 | * Locking: takes tty_ldisc_lock | 294 | * Locking: takes tty_ldisc_lock |
| 289 | */ | 295 | */ |
| 290 | 296 | ||
| 291 | static int tty_ldisc_try(struct tty_struct *tty) | 297 | static struct tty_ldisc *tty_ldisc_try(struct tty_struct *tty) |
| 292 | { | 298 | { |
| 293 | unsigned long flags; | 299 | unsigned long flags; |
| 294 | struct tty_ldisc *ld; | 300 | struct tty_ldisc *ld; |
| 295 | int ret = 0; | ||
| 296 | 301 | ||
| 297 | spin_lock_irqsave(&tty_ldisc_lock, flags); | 302 | spin_lock_irqsave(&tty_ldisc_lock, flags); |
| 298 | ld = tty->ldisc; | 303 | ld = NULL; |
| 299 | if (test_bit(TTY_LDISC, &tty->flags)) { | 304 | if (test_bit(TTY_LDISC, &tty->flags)) |
| 300 | ld->refcount++; | 305 | ld = get_ldisc(tty->ldisc); |
| 301 | ret = 1; | ||
| 302 | } | ||
| 303 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | 306 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); |
| 304 | return ret; | 307 | return ld; |
| 305 | } | 308 | } |
| 306 | 309 | ||
| 307 | /** | 310 | /** |
| @@ -322,10 +325,11 @@ static int tty_ldisc_try(struct tty_struct *tty) | |||
| 322 | 325 | ||
| 323 | struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) | 326 | struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) |
| 324 | { | 327 | { |
| 328 | struct tty_ldisc *ld; | ||
| 329 | |||
| 325 | /* wait_event is a macro */ | 330 | /* wait_event is a macro */ |
| 326 | wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); | 331 | wait_event(tty_ldisc_wait, (ld = tty_ldisc_try(tty)) != NULL); |
| 327 | WARN_ON(tty->ldisc->refcount == 0); | 332 | return ld; |
| 328 | return tty->ldisc; | ||
| 329 | } | 333 | } |
| 330 | EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); | 334 | EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); |
| 331 | 335 | ||
| @@ -342,9 +346,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); | |||
| 342 | 346 | ||
| 343 | struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) | 347 | struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) |
| 344 | { | 348 | { |
| 345 | if (tty_ldisc_try(tty)) | 349 | return tty_ldisc_try(tty); |
| 346 | return tty->ldisc; | ||
| 347 | return NULL; | ||
| 348 | } | 350 | } |
| 349 | EXPORT_SYMBOL_GPL(tty_ldisc_ref); | 351 | EXPORT_SYMBOL_GPL(tty_ldisc_ref); |
| 350 | 352 | ||
| @@ -360,21 +362,15 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref); | |||
| 360 | 362 | ||
| 361 | void tty_ldisc_deref(struct tty_ldisc *ld) | 363 | void tty_ldisc_deref(struct tty_ldisc *ld) |
| 362 | { | 364 | { |
| 363 | unsigned long flags; | 365 | put_ldisc(ld); |
| 364 | |||
| 365 | BUG_ON(ld == NULL); | ||
| 366 | |||
| 367 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
| 368 | if (ld->refcount == 0) | ||
| 369 | printk(KERN_ERR "tty_ldisc_deref: no references.\n"); | ||
| 370 | else | ||
| 371 | ld->refcount--; | ||
| 372 | if (ld->refcount == 0) | ||
| 373 | wake_up(&tty_ldisc_wait); | ||
| 374 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
| 375 | } | 366 | } |
| 376 | EXPORT_SYMBOL_GPL(tty_ldisc_deref); | 367 | EXPORT_SYMBOL_GPL(tty_ldisc_deref); |
| 377 | 368 | ||
| 369 | static inline void tty_ldisc_put(struct tty_ldisc *ld) | ||
| 370 | { | ||
| 371 | put_ldisc(ld); | ||
| 372 | } | ||
| 373 | |||
| 378 | /** | 374 | /** |
| 379 | * tty_ldisc_enable - allow ldisc use | 375 | * tty_ldisc_enable - allow ldisc use |
| 380 | * @tty: terminal to activate ldisc on | 376 | * @tty: terminal to activate ldisc on |
| @@ -523,31 +519,6 @@ static int tty_ldisc_halt(struct tty_struct *tty) | |||
| 523 | } | 519 | } |
| 524 | 520 | ||
| 525 | /** | 521 | /** |
| 526 | * tty_ldisc_wait_idle - wait for the ldisc to become idle | ||
| 527 | * @tty: tty to wait for | ||
| 528 | * | ||
| 529 | * Wait for the line discipline to become idle. The discipline must | ||
| 530 | * have been halted for this to guarantee it remains idle. | ||
| 531 | * | ||
| 532 | * tty_ldisc_lock protects the ref counts currently. | ||
| 533 | */ | ||
| 534 | |||
| 535 | static int tty_ldisc_wait_idle(struct tty_struct *tty) | ||
| 536 | { | ||
| 537 | unsigned long flags; | ||
| 538 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
| 539 | while (tty->ldisc->refcount) { | ||
| 540 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
| 541 | if (wait_event_timeout(tty_ldisc_wait, | ||
| 542 | tty->ldisc->refcount == 0, 5 * HZ) == 0) | ||
| 543 | return -EBUSY; | ||
| 544 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
| 545 | } | ||
| 546 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
| 547 | return 0; | ||
| 548 | } | ||
| 549 | |||
| 550 | /** | ||
| 551 | * tty_set_ldisc - set line discipline | 522 | * tty_set_ldisc - set line discipline |
| 552 | * @tty: the terminal to set | 523 | * @tty: the terminal to set |
| 553 | * @ldisc: the line discipline | 524 | * @ldisc: the line discipline |
| @@ -642,14 +613,6 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc) | |||
| 642 | 613 | ||
| 643 | flush_scheduled_work(); | 614 | flush_scheduled_work(); |
| 644 | 615 | ||
| 645 | /* Let any existing reference holders finish */ | ||
| 646 | retval = tty_ldisc_wait_idle(tty); | ||
| 647 | if (retval < 0) { | ||
| 648 | clear_bit(TTY_LDISC_CHANGING, &tty->flags); | ||
| 649 | tty_ldisc_put(new_ldisc); | ||
| 650 | return retval; | ||
| 651 | } | ||
| 652 | |||
| 653 | mutex_lock(&tty->ldisc_mutex); | 616 | mutex_lock(&tty->ldisc_mutex); |
| 654 | if (test_bit(TTY_HUPPED, &tty->flags)) { | 617 | if (test_bit(TTY_HUPPED, &tty->flags)) { |
| 655 | /* We were raced by the hangup method. It will have stomped | 618 | /* We were raced by the hangup method. It will have stomped |
| @@ -795,7 +758,6 @@ void tty_ldisc_hangup(struct tty_struct *tty) | |||
| 795 | if (tty->ldisc) { /* Not yet closed */ | 758 | if (tty->ldisc) { /* Not yet closed */ |
| 796 | /* Switch back to N_TTY */ | 759 | /* Switch back to N_TTY */ |
| 797 | tty_ldisc_halt(tty); | 760 | tty_ldisc_halt(tty); |
| 798 | tty_ldisc_wait_idle(tty); | ||
| 799 | tty_ldisc_reinit(tty); | 761 | tty_ldisc_reinit(tty); |
| 800 | /* At this point we have a closed ldisc and we want to | 762 | /* At this point we have a closed ldisc and we want to |
| 801 | reopen it. We could defer this to the next open but | 763 | reopen it. We could defer this to the next open but |
| @@ -860,14 +822,6 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) | |||
| 860 | tty_ldisc_halt(tty); | 822 | tty_ldisc_halt(tty); |
| 861 | flush_scheduled_work(); | 823 | flush_scheduled_work(); |
| 862 | 824 | ||
| 863 | /* | ||
| 864 | * Wait for any short term users (we know they are just driver | ||
| 865 | * side waiters as the file is closing so user count on the file | ||
| 866 | * side is zero. | ||
| 867 | */ | ||
| 868 | |||
| 869 | tty_ldisc_wait_idle(tty); | ||
| 870 | |||
| 871 | mutex_lock(&tty->ldisc_mutex); | 825 | mutex_lock(&tty->ldisc_mutex); |
| 872 | /* | 826 | /* |
| 873 | * Now kill off the ldisc | 827 | * Now kill off the ldisc |
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b90eda8b3440..fd69086d08d5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c | |||
| @@ -858,6 +858,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) | |||
| 858 | 858 | ||
| 859 | /* Check for existing affected CPUs. | 859 | /* Check for existing affected CPUs. |
| 860 | * They may not be aware of it due to CPU Hotplug. | 860 | * They may not be aware of it due to CPU Hotplug. |
| 861 | * cpufreq_cpu_put is called when the device is removed | ||
| 862 | * in __cpufreq_remove_dev() | ||
| 861 | */ | 863 | */ |
| 862 | managed_policy = cpufreq_cpu_get(j); | 864 | managed_policy = cpufreq_cpu_get(j); |
| 863 | if (unlikely(managed_policy)) { | 865 | if (unlikely(managed_policy)) { |
| @@ -884,7 +886,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) | |||
| 884 | ret = sysfs_create_link(&sys_dev->kobj, | 886 | ret = sysfs_create_link(&sys_dev->kobj, |
| 885 | &managed_policy->kobj, | 887 | &managed_policy->kobj, |
| 886 | "cpufreq"); | 888 | "cpufreq"); |
| 887 | if (!ret) | 889 | if (ret) |
| 888 | cpufreq_cpu_put(managed_policy); | 890 | cpufreq_cpu_put(managed_policy); |
| 889 | /* | 891 | /* |
| 890 | * Success. We only needed to be added to the mask. | 892 | * Success. We only needed to be added to the mask. |
| @@ -924,6 +926,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) | |||
| 924 | 926 | ||
| 925 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 927 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
| 926 | for_each_cpu(j, policy->cpus) { | 928 | for_each_cpu(j, policy->cpus) { |
| 929 | if (!cpu_online(j)) | ||
| 930 | continue; | ||
| 927 | per_cpu(cpufreq_cpu_data, j) = policy; | 931 | per_cpu(cpufreq_cpu_data, j) = policy; |
| 928 | per_cpu(policy_cpu, j) = policy->cpu; | 932 | per_cpu(policy_cpu, j) = policy->cpu; |
| 929 | } | 933 | } |
| @@ -1244,13 +1248,22 @@ EXPORT_SYMBOL(cpufreq_get); | |||
| 1244 | 1248 | ||
| 1245 | static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) | 1249 | static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) |
| 1246 | { | 1250 | { |
| 1247 | int cpu = sysdev->id; | ||
| 1248 | int ret = 0; | 1251 | int ret = 0; |
| 1252 | |||
| 1253 | #ifdef __powerpc__ | ||
| 1254 | int cpu = sysdev->id; | ||
| 1249 | unsigned int cur_freq = 0; | 1255 | unsigned int cur_freq = 0; |
| 1250 | struct cpufreq_policy *cpu_policy; | 1256 | struct cpufreq_policy *cpu_policy; |
| 1251 | 1257 | ||
| 1252 | dprintk("suspending cpu %u\n", cpu); | 1258 | dprintk("suspending cpu %u\n", cpu); |
| 1253 | 1259 | ||
| 1260 | /* | ||
| 1261 | * This whole bogosity is here because Powerbooks are made of fail. | ||
| 1262 | * No sane platform should need any of the code below to be run. | ||
| 1263 | * (it's entirely the wrong thing to do, as driver->get may | ||
| 1264 | * reenable interrupts on some architectures). | ||
| 1265 | */ | ||
| 1266 | |||
| 1254 | if (!cpu_online(cpu)) | 1267 | if (!cpu_online(cpu)) |
| 1255 | return 0; | 1268 | return 0; |
| 1256 | 1269 | ||
| @@ -1309,6 +1322,7 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) | |||
| 1309 | 1322 | ||
| 1310 | out: | 1323 | out: |
| 1311 | cpufreq_cpu_put(cpu_policy); | 1324 | cpufreq_cpu_put(cpu_policy); |
| 1325 | #endif /* __powerpc__ */ | ||
| 1312 | return ret; | 1326 | return ret; |
| 1313 | } | 1327 | } |
| 1314 | 1328 | ||
| @@ -1322,12 +1336,18 @@ out: | |||
| 1322 | */ | 1336 | */ |
| 1323 | static int cpufreq_resume(struct sys_device *sysdev) | 1337 | static int cpufreq_resume(struct sys_device *sysdev) |
| 1324 | { | 1338 | { |
| 1325 | int cpu = sysdev->id; | ||
| 1326 | int ret = 0; | 1339 | int ret = 0; |
| 1340 | |||
| 1341 | #ifdef __powerpc__ | ||
| 1342 | int cpu = sysdev->id; | ||
| 1327 | struct cpufreq_policy *cpu_policy; | 1343 | struct cpufreq_policy *cpu_policy; |
| 1328 | 1344 | ||
| 1329 | dprintk("resuming cpu %u\n", cpu); | 1345 | dprintk("resuming cpu %u\n", cpu); |
| 1330 | 1346 | ||
| 1347 | /* As with the ->suspend method, all the code below is | ||
| 1348 | * only necessary because Powerbooks suck. | ||
| 1349 | * See commit 42d4dc3f4e1e for jokes. */ | ||
| 1350 | |||
| 1331 | if (!cpu_online(cpu)) | 1351 | if (!cpu_online(cpu)) |
| 1332 | return 0; | 1352 | return 0; |
| 1333 | 1353 | ||
| @@ -1391,6 +1411,7 @@ out: | |||
| 1391 | schedule_work(&cpu_policy->update); | 1411 | schedule_work(&cpu_policy->update); |
| 1392 | fail: | 1412 | fail: |
| 1393 | cpufreq_cpu_put(cpu_policy); | 1413 | cpufreq_cpu_put(cpu_policy); |
| 1414 | #endif /* __powerpc__ */ | ||
| 1394 | return ret; | 1415 | return ret; |
| 1395 | } | 1416 | } |
| 1396 | 1417 | ||
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 57490502b21c..bdea7e2f94ba 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c | |||
| @@ -63,6 +63,7 @@ struct cpu_dbs_info_s { | |||
| 63 | unsigned int down_skip; | 63 | unsigned int down_skip; |
| 64 | unsigned int requested_freq; | 64 | unsigned int requested_freq; |
| 65 | int cpu; | 65 | int cpu; |
| 66 | unsigned int enable:1; | ||
| 66 | /* | 67 | /* |
| 67 | * percpu mutex that serializes governor limit change with | 68 | * percpu mutex that serializes governor limit change with |
| 68 | * do_dbs_timer invocation. We do not want do_dbs_timer to run | 69 | * do_dbs_timer invocation. We do not want do_dbs_timer to run |
| @@ -141,6 +142,9 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
| 141 | 142 | ||
| 142 | struct cpufreq_policy *policy; | 143 | struct cpufreq_policy *policy; |
| 143 | 144 | ||
| 145 | if (!this_dbs_info->enable) | ||
| 146 | return 0; | ||
| 147 | |||
| 144 | policy = this_dbs_info->cur_policy; | 148 | policy = this_dbs_info->cur_policy; |
| 145 | 149 | ||
| 146 | /* | 150 | /* |
| @@ -497,6 +501,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) | |||
| 497 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); | 501 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); |
| 498 | delay -= jiffies % delay; | 502 | delay -= jiffies % delay; |
| 499 | 503 | ||
| 504 | dbs_info->enable = 1; | ||
| 500 | INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); | 505 | INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); |
| 501 | queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, | 506 | queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, |
| 502 | delay); | 507 | delay); |
| @@ -504,6 +509,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) | |||
| 504 | 509 | ||
| 505 | static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) | 510 | static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) |
| 506 | { | 511 | { |
| 512 | dbs_info->enable = 0; | ||
| 507 | cancel_delayed_work_sync(&dbs_info->work); | 513 | cancel_delayed_work_sync(&dbs_info->work); |
| 508 | } | 514 | } |
| 509 | 515 | ||
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 070357aaedbc..81e1020fb514 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | menuconfig DMADEVICES | 5 | menuconfig DMADEVICES |
| 6 | bool "DMA Engine support" | 6 | bool "DMA Engine support" |
| 7 | depends on !HIGHMEM64G && HAS_DMA | 7 | depends on HAS_DMA |
| 8 | help | 8 | help |
| 9 | DMA engines can do asynchronous data transfers without | 9 | DMA engines can do asynchronous data transfers without |
| 10 | involving the host CPU. Currently, this framework can be | 10 | involving the host CPU. Currently, this framework can be |
| @@ -46,6 +46,14 @@ config DW_DMAC | |||
| 46 | Support the Synopsys DesignWare AHB DMA controller. This | 46 | Support the Synopsys DesignWare AHB DMA controller. This |
| 47 | can be integrated in chips such as the Atmel AT32ap7000. | 47 | can be integrated in chips such as the Atmel AT32ap7000. |
| 48 | 48 | ||
| 49 | config AT_HDMAC | ||
| 50 | tristate "Atmel AHB DMA support" | ||
| 51 | depends on ARCH_AT91SAM9RL | ||
| 52 | select DMA_ENGINE | ||
| 53 | help | ||
| 54 | Support the Atmel AHB DMA controller. This can be integrated in | ||
| 55 | chips such as the Atmel AT91SAM9RL. | ||
| 56 | |||
| 49 | config FSL_DMA | 57 | config FSL_DMA |
| 50 | tristate "Freescale Elo and Elo Plus DMA support" | 58 | tristate "Freescale Elo and Elo Plus DMA support" |
| 51 | depends on FSL_SOC | 59 | depends on FSL_SOC |
| @@ -108,7 +116,7 @@ config NET_DMA | |||
| 108 | 116 | ||
| 109 | config ASYNC_TX_DMA | 117 | config ASYNC_TX_DMA |
| 110 | bool "Async_tx: Offload support for the async_tx api" | 118 | bool "Async_tx: Offload support for the async_tx api" |
| 111 | depends on DMA_ENGINE | 119 | depends on DMA_ENGINE && !HIGHMEM64G |
| 112 | help | 120 | help |
| 113 | This allows the async_tx api to take advantage of offload engines for | 121 | This allows the async_tx api to take advantage of offload engines for |
| 114 | memcpy, memset, xor, and raid6 p+q operations. If your platform has | 122 | memcpy, memset, xor, and raid6 p+q operations. If your platform has |
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index a0b6564800c4..40e1e0083571 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile | |||
| @@ -7,5 +7,6 @@ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o | |||
| 7 | obj-$(CONFIG_FSL_DMA) += fsldma.o | 7 | obj-$(CONFIG_FSL_DMA) += fsldma.o |
| 8 | obj-$(CONFIG_MV_XOR) += mv_xor.o | 8 | obj-$(CONFIG_MV_XOR) += mv_xor.o |
| 9 | obj-$(CONFIG_DW_DMAC) += dw_dmac.o | 9 | obj-$(CONFIG_DW_DMAC) += dw_dmac.o |
| 10 | obj-$(CONFIG_AT_HDMAC) += at_hdmac.o | ||
| 10 | obj-$(CONFIG_MX3_IPU) += ipu/ | 11 | obj-$(CONFIG_MX3_IPU) += ipu/ |
| 11 | obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o | 12 | obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o |
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c new file mode 100644 index 000000000000..9a1e5fb412ed --- /dev/null +++ b/drivers/dma/at_hdmac.c | |||
| @@ -0,0 +1,1213 @@ | |||
| 1 | /* | ||
| 2 | * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems) | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008 Atmel Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * | ||
| 12 | * This supports the Atmel AHB DMA Controller, | ||
| 13 | * | ||
| 14 | * The driver has currently been tested with the Atmel AT91SAM9RL | ||
| 15 | * and AT91SAM9G45 series. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/clk.h> | ||
| 19 | #include <linux/dmaengine.h> | ||
| 20 | #include <linux/dma-mapping.h> | ||
| 21 | #include <linux/dmapool.h> | ||
| 22 | #include <linux/interrupt.h> | ||
| 23 | #include <linux/module.h> | ||
| 24 | #include <linux/platform_device.h> | ||
| 25 | |||
| 26 | #include "at_hdmac_regs.h" | ||
| 27 | |||
| 28 | /* | ||
| 29 | * Glossary | ||
| 30 | * -------- | ||
| 31 | * | ||
| 32 | * at_hdmac : Name of the ATmel AHB DMA Controller | ||
| 33 | * at_dma_ / atdma : ATmel DMA controller entity related | ||
| 34 | * atc_ / atchan : ATmel DMA Channel entity related | ||
| 35 | */ | ||
| 36 | |||
| 37 | #define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) | ||
| 38 | #define ATC_DEFAULT_CTRLA (0) | ||
| 39 | #define ATC_DEFAULT_CTRLB (ATC_SIF(0) \ | ||
| 40 | |ATC_DIF(1)) | ||
| 41 | |||
| 42 | /* | ||
| 43 | * Initial number of descriptors to allocate for each channel. This could | ||
| 44 | * be increased during dma usage. | ||
| 45 | */ | ||
| 46 | static unsigned int init_nr_desc_per_channel = 64; | ||
| 47 | module_param(init_nr_desc_per_channel, uint, 0644); | ||
| 48 | MODULE_PARM_DESC(init_nr_desc_per_channel, | ||
| 49 | "initial descriptors per channel (default: 64)"); | ||
| 50 | |||
| 51 | |||
| 52 | /* prototypes */ | ||
| 53 | static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx); | ||
| 54 | |||
| 55 | |||
| 56 | /*----------------------------------------------------------------------*/ | ||
| 57 | |||
| 58 | static struct at_desc *atc_first_active(struct at_dma_chan *atchan) | ||
| 59 | { | ||
| 60 | return list_first_entry(&atchan->active_list, | ||
| 61 | struct at_desc, desc_node); | ||
| 62 | } | ||
| 63 | |||
| 64 | static struct at_desc *atc_first_queued(struct at_dma_chan *atchan) | ||
| 65 | { | ||
| 66 | return list_first_entry(&atchan->queue, | ||
| 67 | struct at_desc, desc_node); | ||
| 68 | } | ||
| 69 | |||
| 70 | /** | ||
| 71 | * atc_alloc_descriptor - allocate and return an initilized descriptor | ||
| 72 | * @chan: the channel to allocate descriptors for | ||
| 73 | * @gfp_flags: GFP allocation flags | ||
| 74 | * | ||
| 75 | * Note: The ack-bit is positioned in the descriptor flag at creation time | ||
| 76 | * to make initial allocation more convenient. This bit will be cleared | ||
| 77 | * and control will be given to client at usage time (during | ||
| 78 | * preparation functions). | ||
| 79 | */ | ||
| 80 | static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, | ||
| 81 | gfp_t gfp_flags) | ||
| 82 | { | ||
| 83 | struct at_desc *desc = NULL; | ||
| 84 | struct at_dma *atdma = to_at_dma(chan->device); | ||
| 85 | dma_addr_t phys; | ||
| 86 | |||
| 87 | desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); | ||
| 88 | if (desc) { | ||
| 89 | memset(desc, 0, sizeof(struct at_desc)); | ||
| 90 | dma_async_tx_descriptor_init(&desc->txd, chan); | ||
| 91 | /* txd.flags will be overwritten in prep functions */ | ||
| 92 | desc->txd.flags = DMA_CTRL_ACK; | ||
| 93 | desc->txd.tx_submit = atc_tx_submit; | ||
| 94 | desc->txd.phys = phys; | ||
| 95 | } | ||
| 96 | |||
| 97 | return desc; | ||
| 98 | } | ||
| 99 | |||
| 100 | /** | ||
| 101 | * atc_desc_get - get a unsused descriptor from free_list | ||
| 102 | * @atchan: channel we want a new descriptor for | ||
| 103 | */ | ||
| 104 | static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) | ||
| 105 | { | ||
| 106 | struct at_desc *desc, *_desc; | ||
| 107 | struct at_desc *ret = NULL; | ||
| 108 | unsigned int i = 0; | ||
| 109 | LIST_HEAD(tmp_list); | ||
| 110 | |||
| 111 | spin_lock_bh(&atchan->lock); | ||
| 112 | list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { | ||
| 113 | i++; | ||
| 114 | if (async_tx_test_ack(&desc->txd)) { | ||
| 115 | list_del(&desc->desc_node); | ||
| 116 | ret = desc; | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | dev_dbg(chan2dev(&atchan->chan_common), | ||
| 120 | "desc %p not ACKed\n", desc); | ||
| 121 | } | ||
| 122 | spin_unlock_bh(&atchan->lock); | ||
| 123 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
| 124 | "scanned %u descriptors on freelist\n", i); | ||
| 125 | |||
| 126 | /* no more descriptor available in initial pool: create one more */ | ||
| 127 | if (!ret) { | ||
| 128 | ret = atc_alloc_descriptor(&atchan->chan_common, GFP_ATOMIC); | ||
| 129 | if (ret) { | ||
| 130 | spin_lock_bh(&atchan->lock); | ||
| 131 | atchan->descs_allocated++; | ||
| 132 | spin_unlock_bh(&atchan->lock); | ||
| 133 | } else { | ||
| 134 | dev_err(chan2dev(&atchan->chan_common), | ||
| 135 | "not enough descriptors available\n"); | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | return ret; | ||
| 140 | } | ||
| 141 | |||
| 142 | /** | ||
| 143 | * atc_desc_put - move a descriptor, including any children, to the free list | ||
| 144 | * @atchan: channel we work on | ||
| 145 | * @desc: descriptor, at the head of a chain, to move to free list | ||
| 146 | */ | ||
| 147 | static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) | ||
| 148 | { | ||
| 149 | if (desc) { | ||
| 150 | struct at_desc *child; | ||
| 151 | |||
| 152 | spin_lock_bh(&atchan->lock); | ||
| 153 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | ||
| 154 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
| 155 | "moving child desc %p to freelist\n", | ||
| 156 | child); | ||
| 157 | list_splice_init(&desc->txd.tx_list, &atchan->free_list); | ||
| 158 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
| 159 | "moving desc %p to freelist\n", desc); | ||
| 160 | list_add(&desc->desc_node, &atchan->free_list); | ||
| 161 | spin_unlock_bh(&atchan->lock); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | /** | ||
| 166 | * atc_assign_cookie - compute and assign new cookie | ||
| 167 | * @atchan: channel we work on | ||
| 168 | * @desc: descriptor to asign cookie for | ||
| 169 | * | ||
| 170 | * Called with atchan->lock held and bh disabled | ||
| 171 | */ | ||
| 172 | static dma_cookie_t | ||
| 173 | atc_assign_cookie(struct at_dma_chan *atchan, struct at_desc *desc) | ||
| 174 | { | ||
| 175 | dma_cookie_t cookie = atchan->chan_common.cookie; | ||
| 176 | |||
| 177 | if (++cookie < 0) | ||
| 178 | cookie = 1; | ||
| 179 | |||
| 180 | atchan->chan_common.cookie = cookie; | ||
| 181 | desc->txd.cookie = cookie; | ||
| 182 | |||
| 183 | return cookie; | ||
| 184 | } | ||
| 185 | |||
| 186 | /** | ||
| 187 | * atc_dostart - starts the DMA engine for real | ||
| 188 | * @atchan: the channel we want to start | ||
| 189 | * @first: first descriptor in the list we want to begin with | ||
| 190 | * | ||
| 191 | * Called with atchan->lock held and bh disabled | ||
| 192 | */ | ||
| 193 | static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) | ||
| 194 | { | ||
| 195 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
| 196 | |||
| 197 | /* ASSERT: channel is idle */ | ||
| 198 | if (atc_chan_is_enabled(atchan)) { | ||
| 199 | dev_err(chan2dev(&atchan->chan_common), | ||
| 200 | "BUG: Attempted to start non-idle channel\n"); | ||
| 201 | dev_err(chan2dev(&atchan->chan_common), | ||
| 202 | " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", | ||
| 203 | channel_readl(atchan, SADDR), | ||
| 204 | channel_readl(atchan, DADDR), | ||
| 205 | channel_readl(atchan, CTRLA), | ||
| 206 | channel_readl(atchan, CTRLB), | ||
| 207 | channel_readl(atchan, DSCR)); | ||
| 208 | |||
| 209 | /* The tasklet will hopefully advance the queue... */ | ||
| 210 | return; | ||
| 211 | } | ||
| 212 | |||
| 213 | vdbg_dump_regs(atchan); | ||
| 214 | |||
| 215 | /* clear any pending interrupt */ | ||
| 216 | while (dma_readl(atdma, EBCISR)) | ||
| 217 | cpu_relax(); | ||
| 218 | |||
| 219 | channel_writel(atchan, SADDR, 0); | ||
| 220 | channel_writel(atchan, DADDR, 0); | ||
| 221 | channel_writel(atchan, CTRLA, 0); | ||
| 222 | channel_writel(atchan, CTRLB, 0); | ||
| 223 | channel_writel(atchan, DSCR, first->txd.phys); | ||
| 224 | dma_writel(atdma, CHER, atchan->mask); | ||
| 225 | |||
| 226 | vdbg_dump_regs(atchan); | ||
| 227 | } | ||
| 228 | |||
| 229 | /** | ||
| 230 | * atc_chain_complete - finish work for one transaction chain | ||
| 231 | * @atchan: channel we work on | ||
| 232 | * @desc: descriptor at the head of the chain we want do complete | ||
| 233 | * | ||
| 234 | * Called with atchan->lock held and bh disabled */ | ||
| 235 | static void | ||
| 236 | atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) | ||
| 237 | { | ||
| 238 | dma_async_tx_callback callback; | ||
| 239 | void *param; | ||
| 240 | struct dma_async_tx_descriptor *txd = &desc->txd; | ||
| 241 | |||
| 242 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
| 243 | "descriptor %u complete\n", txd->cookie); | ||
| 244 | |||
| 245 | atchan->completed_cookie = txd->cookie; | ||
| 246 | callback = txd->callback; | ||
| 247 | param = txd->callback_param; | ||
| 248 | |||
| 249 | /* move children to free_list */ | ||
| 250 | list_splice_init(&txd->tx_list, &atchan->free_list); | ||
| 251 | /* move myself to free_list */ | ||
| 252 | list_move(&desc->desc_node, &atchan->free_list); | ||
| 253 | |||
| 254 | /* unmap dma addresses */ | ||
| 255 | if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
| 256 | if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) | ||
| 257 | dma_unmap_single(chan2parent(&atchan->chan_common), | ||
| 258 | desc->lli.daddr, | ||
| 259 | desc->len, DMA_FROM_DEVICE); | ||
| 260 | else | ||
| 261 | dma_unmap_page(chan2parent(&atchan->chan_common), | ||
| 262 | desc->lli.daddr, | ||
| 263 | desc->len, DMA_FROM_DEVICE); | ||
| 264 | } | ||
| 265 | if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
| 266 | if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) | ||
| 267 | dma_unmap_single(chan2parent(&atchan->chan_common), | ||
| 268 | desc->lli.saddr, | ||
| 269 | desc->len, DMA_TO_DEVICE); | ||
| 270 | else | ||
| 271 | dma_unmap_page(chan2parent(&atchan->chan_common), | ||
| 272 | desc->lli.saddr, | ||
| 273 | desc->len, DMA_TO_DEVICE); | ||
| 274 | } | ||
| 275 | |||
| 276 | /* | ||
| 277 | * The API requires that no submissions are done from a | ||
| 278 | * callback, so we don't need to drop the lock here | ||
| 279 | */ | ||
| 280 | if (callback) | ||
| 281 | callback(param); | ||
| 282 | |||
| 283 | dma_run_dependencies(txd); | ||
| 284 | } | ||
| 285 | |||
| 286 | /** | ||
| 287 | * atc_complete_all - finish work for all transactions | ||
| 288 | * @atchan: channel to complete transactions for | ||
| 289 | * | ||
| 290 | * Eventually submit queued descriptors if any | ||
| 291 | * | ||
| 292 | * Assume channel is idle while calling this function | ||
| 293 | * Called with atchan->lock held and bh disabled | ||
| 294 | */ | ||
| 295 | static void atc_complete_all(struct at_dma_chan *atchan) | ||
| 296 | { | ||
| 297 | struct at_desc *desc, *_desc; | ||
| 298 | LIST_HEAD(list); | ||
| 299 | |||
| 300 | dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); | ||
| 301 | |||
| 302 | BUG_ON(atc_chan_is_enabled(atchan)); | ||
| 303 | |||
| 304 | /* | ||
| 305 | * Submit queued descriptors ASAP, i.e. before we go through | ||
| 306 | * the completed ones. | ||
| 307 | */ | ||
| 308 | if (!list_empty(&atchan->queue)) | ||
| 309 | atc_dostart(atchan, atc_first_queued(atchan)); | ||
| 310 | /* empty active_list now it is completed */ | ||
| 311 | list_splice_init(&atchan->active_list, &list); | ||
| 312 | /* empty queue list by moving descriptors (if any) to active_list */ | ||
| 313 | list_splice_init(&atchan->queue, &atchan->active_list); | ||
| 314 | |||
| 315 | list_for_each_entry_safe(desc, _desc, &list, desc_node) | ||
| 316 | atc_chain_complete(atchan, desc); | ||
| 317 | } | ||
| 318 | |||
| 319 | /** | ||
| 320 | * atc_cleanup_descriptors - cleanup up finished descriptors in active_list | ||
| 321 | * @atchan: channel to be cleaned up | ||
| 322 | * | ||
| 323 | * Called with atchan->lock held and bh disabled | ||
| 324 | */ | ||
| 325 | static void atc_cleanup_descriptors(struct at_dma_chan *atchan) | ||
| 326 | { | ||
| 327 | struct at_desc *desc, *_desc; | ||
| 328 | struct at_desc *child; | ||
| 329 | |||
| 330 | dev_vdbg(chan2dev(&atchan->chan_common), "cleanup descriptors\n"); | ||
| 331 | |||
| 332 | list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { | ||
| 333 | if (!(desc->lli.ctrla & ATC_DONE)) | ||
| 334 | /* This one is currently in progress */ | ||
| 335 | return; | ||
| 336 | |||
| 337 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | ||
| 338 | if (!(child->lli.ctrla & ATC_DONE)) | ||
| 339 | /* Currently in progress */ | ||
| 340 | return; | ||
| 341 | |||
| 342 | /* | ||
| 343 | * No descriptors so far seem to be in progress, i.e. | ||
| 344 | * this chain must be done. | ||
| 345 | */ | ||
| 346 | atc_chain_complete(atchan, desc); | ||
| 347 | } | ||
| 348 | } | ||
| 349 | |||
| 350 | /** | ||
| 351 | * atc_advance_work - at the end of a transaction, move forward | ||
| 352 | * @atchan: channel where the transaction ended | ||
| 353 | * | ||
| 354 | * Called with atchan->lock held and bh disabled | ||
| 355 | */ | ||
| 356 | static void atc_advance_work(struct at_dma_chan *atchan) | ||
| 357 | { | ||
| 358 | dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); | ||
| 359 | |||
| 360 | if (list_empty(&atchan->active_list) || | ||
| 361 | list_is_singular(&atchan->active_list)) { | ||
| 362 | atc_complete_all(atchan); | ||
| 363 | } else { | ||
| 364 | atc_chain_complete(atchan, atc_first_active(atchan)); | ||
| 365 | /* advance work */ | ||
| 366 | atc_dostart(atchan, atc_first_active(atchan)); | ||
| 367 | } | ||
| 368 | } | ||
| 369 | |||
| 370 | |||
| 371 | /** | ||
| 372 | * atc_handle_error - handle errors reported by DMA controller | ||
| 373 | * @atchan: channel where error occurs | ||
| 374 | * | ||
| 375 | * Called with atchan->lock held and bh disabled | ||
| 376 | */ | ||
| 377 | static void atc_handle_error(struct at_dma_chan *atchan) | ||
| 378 | { | ||
| 379 | struct at_desc *bad_desc; | ||
| 380 | struct at_desc *child; | ||
| 381 | |||
| 382 | /* | ||
| 383 | * The descriptor currently at the head of the active list is | ||
| 384 | * broked. Since we don't have any way to report errors, we'll | ||
| 385 | * just have to scream loudly and try to carry on. | ||
| 386 | */ | ||
| 387 | bad_desc = atc_first_active(atchan); | ||
| 388 | list_del_init(&bad_desc->desc_node); | ||
| 389 | |||
| 390 | /* As we are stopped, take advantage to push queued descriptors | ||
| 391 | * in active_list */ | ||
| 392 | list_splice_init(&atchan->queue, atchan->active_list.prev); | ||
| 393 | |||
| 394 | /* Try to restart the controller */ | ||
| 395 | if (!list_empty(&atchan->active_list)) | ||
| 396 | atc_dostart(atchan, atc_first_active(atchan)); | ||
| 397 | |||
| 398 | /* | ||
| 399 | * KERN_CRITICAL may seem harsh, but since this only happens | ||
| 400 | * when someone submits a bad physical address in a | ||
| 401 | * descriptor, we should consider ourselves lucky that the | ||
| 402 | * controller flagged an error instead of scribbling over | ||
| 403 | * random memory locations. | ||
| 404 | */ | ||
| 405 | dev_crit(chan2dev(&atchan->chan_common), | ||
| 406 | "Bad descriptor submitted for DMA!\n"); | ||
| 407 | dev_crit(chan2dev(&atchan->chan_common), | ||
| 408 | " cookie: %d\n", bad_desc->txd.cookie); | ||
| 409 | atc_dump_lli(atchan, &bad_desc->lli); | ||
| 410 | list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) | ||
| 411 | atc_dump_lli(atchan, &child->lli); | ||
| 412 | |||
| 413 | /* Pretend the descriptor completed successfully */ | ||
| 414 | atc_chain_complete(atchan, bad_desc); | ||
| 415 | } | ||
| 416 | |||
| 417 | |||
| 418 | /*-- IRQ & Tasklet ---------------------------------------------------*/ | ||
| 419 | |||
| 420 | static void atc_tasklet(unsigned long data) | ||
| 421 | { | ||
| 422 | struct at_dma_chan *atchan = (struct at_dma_chan *)data; | ||
| 423 | |||
| 424 | /* Channel cannot be enabled here */ | ||
| 425 | if (atc_chan_is_enabled(atchan)) { | ||
| 426 | dev_err(chan2dev(&atchan->chan_common), | ||
| 427 | "BUG: channel enabled in tasklet\n"); | ||
| 428 | return; | ||
| 429 | } | ||
| 430 | |||
| 431 | spin_lock(&atchan->lock); | ||
| 432 | if (test_and_clear_bit(0, &atchan->error_status)) | ||
| 433 | atc_handle_error(atchan); | ||
| 434 | else | ||
| 435 | atc_advance_work(atchan); | ||
| 436 | |||
| 437 | spin_unlock(&atchan->lock); | ||
| 438 | } | ||
| 439 | |||
| 440 | static irqreturn_t at_dma_interrupt(int irq, void *dev_id) | ||
| 441 | { | ||
| 442 | struct at_dma *atdma = (struct at_dma *)dev_id; | ||
| 443 | struct at_dma_chan *atchan; | ||
| 444 | int i; | ||
| 445 | u32 status, pending, imr; | ||
| 446 | int ret = IRQ_NONE; | ||
| 447 | |||
| 448 | do { | ||
| 449 | imr = dma_readl(atdma, EBCIMR); | ||
| 450 | status = dma_readl(atdma, EBCISR); | ||
| 451 | pending = status & imr; | ||
| 452 | |||
| 453 | if (!pending) | ||
| 454 | break; | ||
| 455 | |||
| 456 | dev_vdbg(atdma->dma_common.dev, | ||
| 457 | "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n", | ||
| 458 | status, imr, pending); | ||
| 459 | |||
| 460 | for (i = 0; i < atdma->dma_common.chancnt; i++) { | ||
| 461 | atchan = &atdma->chan[i]; | ||
| 462 | if (pending & (AT_DMA_CBTC(i) | AT_DMA_ERR(i))) { | ||
| 463 | if (pending & AT_DMA_ERR(i)) { | ||
| 464 | /* Disable channel on AHB error */ | ||
| 465 | dma_writel(atdma, CHDR, atchan->mask); | ||
| 466 | /* Give information to tasklet */ | ||
| 467 | set_bit(0, &atchan->error_status); | ||
| 468 | } | ||
| 469 | tasklet_schedule(&atchan->tasklet); | ||
| 470 | ret = IRQ_HANDLED; | ||
| 471 | } | ||
| 472 | } | ||
| 473 | |||
| 474 | } while (pending); | ||
| 475 | |||
| 476 | return ret; | ||
| 477 | } | ||
| 478 | |||
| 479 | |||
| 480 | /*-- DMA Engine API --------------------------------------------------*/ | ||
| 481 | |||
| 482 | /** | ||
| 483 | * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine | ||
| 484 | * @desc: descriptor at the head of the transaction chain | ||
| 485 | * | ||
| 486 | * Queue chain if DMA engine is working already | ||
| 487 | * | ||
| 488 | * Cookie increment and adding to active_list or queue must be atomic | ||
| 489 | */ | ||
| 490 | static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) | ||
| 491 | { | ||
| 492 | struct at_desc *desc = txd_to_at_desc(tx); | ||
| 493 | struct at_dma_chan *atchan = to_at_dma_chan(tx->chan); | ||
| 494 | dma_cookie_t cookie; | ||
| 495 | |||
| 496 | spin_lock_bh(&atchan->lock); | ||
| 497 | cookie = atc_assign_cookie(atchan, desc); | ||
| 498 | |||
| 499 | if (list_empty(&atchan->active_list)) { | ||
| 500 | dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", | ||
| 501 | desc->txd.cookie); | ||
| 502 | atc_dostart(atchan, desc); | ||
| 503 | list_add_tail(&desc->desc_node, &atchan->active_list); | ||
| 504 | } else { | ||
| 505 | dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", | ||
| 506 | desc->txd.cookie); | ||
| 507 | list_add_tail(&desc->desc_node, &atchan->queue); | ||
| 508 | } | ||
| 509 | |||
| 510 | spin_unlock_bh(&atchan->lock); | ||
| 511 | |||
| 512 | return cookie; | ||
| 513 | } | ||
| 514 | |||
| 515 | /** | ||
| 516 | * atc_prep_dma_memcpy - prepare a memcpy operation | ||
| 517 | * @chan: the channel to prepare operation on | ||
| 518 | * @dest: operation virtual destination address | ||
| 519 | * @src: operation virtual source address | ||
| 520 | * @len: operation length | ||
| 521 | * @flags: tx descriptor status flags | ||
| 522 | */ | ||
| 523 | static struct dma_async_tx_descriptor * | ||
| 524 | atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, | ||
| 525 | size_t len, unsigned long flags) | ||
| 526 | { | ||
| 527 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 528 | struct at_desc *desc = NULL; | ||
| 529 | struct at_desc *first = NULL; | ||
| 530 | struct at_desc *prev = NULL; | ||
| 531 | size_t xfer_count; | ||
| 532 | size_t offset; | ||
| 533 | unsigned int src_width; | ||
| 534 | unsigned int dst_width; | ||
| 535 | u32 ctrla; | ||
| 536 | u32 ctrlb; | ||
| 537 | |||
| 538 | dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n", | ||
| 539 | dest, src, len, flags); | ||
| 540 | |||
| 541 | if (unlikely(!len)) { | ||
| 542 | dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); | ||
| 543 | return NULL; | ||
| 544 | } | ||
| 545 | |||
| 546 | ctrla = ATC_DEFAULT_CTRLA; | ||
| 547 | ctrlb = ATC_DEFAULT_CTRLB | ||
| 548 | | ATC_SRC_ADDR_MODE_INCR | ||
| 549 | | ATC_DST_ADDR_MODE_INCR | ||
| 550 | | ATC_FC_MEM2MEM; | ||
| 551 | |||
| 552 | /* | ||
| 553 | * We can be a lot more clever here, but this should take care | ||
| 554 | * of the most common optimization. | ||
| 555 | */ | ||
| 556 | if (!((src | dest | len) & 3)) { | ||
| 557 | ctrla |= ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD; | ||
| 558 | src_width = dst_width = 2; | ||
| 559 | } else if (!((src | dest | len) & 1)) { | ||
| 560 | ctrla |= ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD; | ||
| 561 | src_width = dst_width = 1; | ||
| 562 | } else { | ||
| 563 | ctrla |= ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE; | ||
| 564 | src_width = dst_width = 0; | ||
| 565 | } | ||
| 566 | |||
| 567 | for (offset = 0; offset < len; offset += xfer_count << src_width) { | ||
| 568 | xfer_count = min_t(size_t, (len - offset) >> src_width, | ||
| 569 | ATC_BTSIZE_MAX); | ||
| 570 | |||
| 571 | desc = atc_desc_get(atchan); | ||
| 572 | if (!desc) | ||
| 573 | goto err_desc_get; | ||
| 574 | |||
| 575 | desc->lli.saddr = src + offset; | ||
| 576 | desc->lli.daddr = dest + offset; | ||
| 577 | desc->lli.ctrla = ctrla | xfer_count; | ||
| 578 | desc->lli.ctrlb = ctrlb; | ||
| 579 | |||
| 580 | desc->txd.cookie = 0; | ||
| 581 | async_tx_ack(&desc->txd); | ||
| 582 | |||
| 583 | if (!first) { | ||
| 584 | first = desc; | ||
| 585 | } else { | ||
| 586 | /* inform the HW lli about chaining */ | ||
| 587 | prev->lli.dscr = desc->txd.phys; | ||
| 588 | /* insert the link descriptor to the LD ring */ | ||
| 589 | list_add_tail(&desc->desc_node, | ||
| 590 | &first->txd.tx_list); | ||
| 591 | } | ||
| 592 | prev = desc; | ||
| 593 | } | ||
| 594 | |||
| 595 | /* First descriptor of the chain embedds additional information */ | ||
| 596 | first->txd.cookie = -EBUSY; | ||
| 597 | first->len = len; | ||
| 598 | |||
| 599 | /* set end-of-link to the last link descriptor of list*/ | ||
| 600 | set_desc_eol(desc); | ||
| 601 | |||
| 602 | desc->txd.flags = flags; /* client is in control of this ack */ | ||
| 603 | |||
| 604 | return &first->txd; | ||
| 605 | |||
| 606 | err_desc_get: | ||
| 607 | atc_desc_put(atchan, first); | ||
| 608 | return NULL; | ||
| 609 | } | ||
| 610 | |||
| 611 | |||
| 612 | /** | ||
| 613 | * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction | ||
| 614 | * @chan: DMA channel | ||
| 615 | * @sgl: scatterlist to transfer to/from | ||
| 616 | * @sg_len: number of entries in @scatterlist | ||
| 617 | * @direction: DMA direction | ||
| 618 | * @flags: tx descriptor status flags | ||
| 619 | */ | ||
| 620 | static struct dma_async_tx_descriptor * | ||
| 621 | atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | ||
| 622 | unsigned int sg_len, enum dma_data_direction direction, | ||
| 623 | unsigned long flags) | ||
| 624 | { | ||
| 625 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 626 | struct at_dma_slave *atslave = chan->private; | ||
| 627 | struct at_desc *first = NULL; | ||
| 628 | struct at_desc *prev = NULL; | ||
| 629 | u32 ctrla; | ||
| 630 | u32 ctrlb; | ||
| 631 | dma_addr_t reg; | ||
| 632 | unsigned int reg_width; | ||
| 633 | unsigned int mem_width; | ||
| 634 | unsigned int i; | ||
| 635 | struct scatterlist *sg; | ||
| 636 | size_t total_len = 0; | ||
| 637 | |||
| 638 | dev_vdbg(chan2dev(chan), "prep_slave_sg: %s f0x%lx\n", | ||
| 639 | direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE", | ||
| 640 | flags); | ||
| 641 | |||
| 642 | if (unlikely(!atslave || !sg_len)) { | ||
| 643 | dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); | ||
| 644 | return NULL; | ||
| 645 | } | ||
| 646 | |||
| 647 | reg_width = atslave->reg_width; | ||
| 648 | |||
| 649 | sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); | ||
| 650 | |||
| 651 | ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; | ||
| 652 | ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; | ||
| 653 | |||
| 654 | switch (direction) { | ||
| 655 | case DMA_TO_DEVICE: | ||
| 656 | ctrla |= ATC_DST_WIDTH(reg_width); | ||
| 657 | ctrlb |= ATC_DST_ADDR_MODE_FIXED | ||
| 658 | | ATC_SRC_ADDR_MODE_INCR | ||
| 659 | | ATC_FC_MEM2PER; | ||
| 660 | reg = atslave->tx_reg; | ||
| 661 | for_each_sg(sgl, sg, sg_len, i) { | ||
| 662 | struct at_desc *desc; | ||
| 663 | u32 len; | ||
| 664 | u32 mem; | ||
| 665 | |||
| 666 | desc = atc_desc_get(atchan); | ||
| 667 | if (!desc) | ||
| 668 | goto err_desc_get; | ||
| 669 | |||
| 670 | mem = sg_phys(sg); | ||
| 671 | len = sg_dma_len(sg); | ||
| 672 | mem_width = 2; | ||
| 673 | if (unlikely(mem & 3 || len & 3)) | ||
| 674 | mem_width = 0; | ||
| 675 | |||
| 676 | desc->lli.saddr = mem; | ||
| 677 | desc->lli.daddr = reg; | ||
| 678 | desc->lli.ctrla = ctrla | ||
| 679 | | ATC_SRC_WIDTH(mem_width) | ||
| 680 | | len >> mem_width; | ||
| 681 | desc->lli.ctrlb = ctrlb; | ||
| 682 | |||
| 683 | if (!first) { | ||
| 684 | first = desc; | ||
| 685 | } else { | ||
| 686 | /* inform the HW lli about chaining */ | ||
| 687 | prev->lli.dscr = desc->txd.phys; | ||
| 688 | /* insert the link descriptor to the LD ring */ | ||
| 689 | list_add_tail(&desc->desc_node, | ||
| 690 | &first->txd.tx_list); | ||
| 691 | } | ||
| 692 | prev = desc; | ||
| 693 | total_len += len; | ||
| 694 | } | ||
| 695 | break; | ||
| 696 | case DMA_FROM_DEVICE: | ||
| 697 | ctrla |= ATC_SRC_WIDTH(reg_width); | ||
| 698 | ctrlb |= ATC_DST_ADDR_MODE_INCR | ||
| 699 | | ATC_SRC_ADDR_MODE_FIXED | ||
| 700 | | ATC_FC_PER2MEM; | ||
| 701 | |||
| 702 | reg = atslave->rx_reg; | ||
| 703 | for_each_sg(sgl, sg, sg_len, i) { | ||
| 704 | struct at_desc *desc; | ||
| 705 | u32 len; | ||
| 706 | u32 mem; | ||
| 707 | |||
| 708 | desc = atc_desc_get(atchan); | ||
| 709 | if (!desc) | ||
| 710 | goto err_desc_get; | ||
| 711 | |||
| 712 | mem = sg_phys(sg); | ||
| 713 | len = sg_dma_len(sg); | ||
| 714 | mem_width = 2; | ||
| 715 | if (unlikely(mem & 3 || len & 3)) | ||
| 716 | mem_width = 0; | ||
| 717 | |||
| 718 | desc->lli.saddr = reg; | ||
| 719 | desc->lli.daddr = mem; | ||
| 720 | desc->lli.ctrla = ctrla | ||
| 721 | | ATC_DST_WIDTH(mem_width) | ||
| 722 | | len >> mem_width; | ||
| 723 | desc->lli.ctrlb = ctrlb; | ||
| 724 | |||
| 725 | if (!first) { | ||
| 726 | first = desc; | ||
| 727 | } else { | ||
| 728 | /* inform the HW lli about chaining */ | ||
| 729 | prev->lli.dscr = desc->txd.phys; | ||
| 730 | /* insert the link descriptor to the LD ring */ | ||
| 731 | list_add_tail(&desc->desc_node, | ||
| 732 | &first->txd.tx_list); | ||
| 733 | } | ||
| 734 | prev = desc; | ||
| 735 | total_len += len; | ||
| 736 | } | ||
| 737 | break; | ||
| 738 | default: | ||
| 739 | return NULL; | ||
| 740 | } | ||
| 741 | |||
| 742 | /* set end-of-link to the last link descriptor of list*/ | ||
| 743 | set_desc_eol(prev); | ||
| 744 | |||
| 745 | /* First descriptor of the chain embedds additional information */ | ||
| 746 | first->txd.cookie = -EBUSY; | ||
| 747 | first->len = total_len; | ||
| 748 | |||
| 749 | /* last link descriptor of list is responsible of flags */ | ||
| 750 | prev->txd.flags = flags; /* client is in control of this ack */ | ||
| 751 | |||
| 752 | return &first->txd; | ||
| 753 | |||
| 754 | err_desc_get: | ||
| 755 | dev_err(chan2dev(chan), "not enough descriptors available\n"); | ||
| 756 | atc_desc_put(atchan, first); | ||
| 757 | return NULL; | ||
| 758 | } | ||
| 759 | |||
| 760 | static void atc_terminate_all(struct dma_chan *chan) | ||
| 761 | { | ||
| 762 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 763 | struct at_dma *atdma = to_at_dma(chan->device); | ||
| 764 | struct at_desc *desc, *_desc; | ||
| 765 | LIST_HEAD(list); | ||
| 766 | |||
| 767 | /* | ||
| 768 | * This is only called when something went wrong elsewhere, so | ||
| 769 | * we don't really care about the data. Just disable the | ||
| 770 | * channel. We still have to poll the channel enable bit due | ||
| 771 | * to AHB/HSB limitations. | ||
| 772 | */ | ||
| 773 | spin_lock_bh(&atchan->lock); | ||
| 774 | |||
| 775 | dma_writel(atdma, CHDR, atchan->mask); | ||
| 776 | |||
| 777 | /* confirm that this channel is disabled */ | ||
| 778 | while (dma_readl(atdma, CHSR) & atchan->mask) | ||
| 779 | cpu_relax(); | ||
| 780 | |||
| 781 | /* active_list entries will end up before queued entries */ | ||
| 782 | list_splice_init(&atchan->queue, &list); | ||
| 783 | list_splice_init(&atchan->active_list, &list); | ||
| 784 | |||
| 785 | spin_unlock_bh(&atchan->lock); | ||
| 786 | |||
| 787 | /* Flush all pending and queued descriptors */ | ||
| 788 | list_for_each_entry_safe(desc, _desc, &list, desc_node) | ||
| 789 | atc_chain_complete(atchan, desc); | ||
| 790 | } | ||
| 791 | |||
| 792 | /** | ||
| 793 | * atc_is_tx_complete - poll for transaction completion | ||
| 794 | * @chan: DMA channel | ||
| 795 | * @cookie: transaction identifier to check status of | ||
| 796 | * @done: if not %NULL, updated with last completed transaction | ||
| 797 | * @used: if not %NULL, updated with last used transaction | ||
| 798 | * | ||
| 799 | * If @done and @used are passed in, upon return they reflect the driver | ||
| 800 | * internal state and can be used with dma_async_is_complete() to check | ||
| 801 | * the status of multiple cookies without re-checking hardware state. | ||
| 802 | */ | ||
| 803 | static enum dma_status | ||
| 804 | atc_is_tx_complete(struct dma_chan *chan, | ||
| 805 | dma_cookie_t cookie, | ||
| 806 | dma_cookie_t *done, dma_cookie_t *used) | ||
| 807 | { | ||
| 808 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 809 | dma_cookie_t last_used; | ||
| 810 | dma_cookie_t last_complete; | ||
| 811 | enum dma_status ret; | ||
| 812 | |||
| 813 | dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", | ||
| 814 | cookie, done ? *done : 0, used ? *used : 0); | ||
| 815 | |||
| 816 | spin_lock_bh(atchan->lock); | ||
| 817 | |||
| 818 | last_complete = atchan->completed_cookie; | ||
| 819 | last_used = chan->cookie; | ||
| 820 | |||
| 821 | ret = dma_async_is_complete(cookie, last_complete, last_used); | ||
| 822 | if (ret != DMA_SUCCESS) { | ||
| 823 | atc_cleanup_descriptors(atchan); | ||
| 824 | |||
| 825 | last_complete = atchan->completed_cookie; | ||
| 826 | last_used = chan->cookie; | ||
| 827 | |||
| 828 | ret = dma_async_is_complete(cookie, last_complete, last_used); | ||
| 829 | } | ||
| 830 | |||
| 831 | spin_unlock_bh(atchan->lock); | ||
| 832 | |||
| 833 | if (done) | ||
| 834 | *done = last_complete; | ||
| 835 | if (used) | ||
| 836 | *used = last_used; | ||
| 837 | |||
| 838 | return ret; | ||
| 839 | } | ||
| 840 | |||
| 841 | /** | ||
| 842 | * atc_issue_pending - try to finish work | ||
| 843 | * @chan: target DMA channel | ||
| 844 | */ | ||
| 845 | static void atc_issue_pending(struct dma_chan *chan) | ||
| 846 | { | ||
| 847 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 848 | |||
| 849 | dev_vdbg(chan2dev(chan), "issue_pending\n"); | ||
| 850 | |||
| 851 | if (!atc_chan_is_enabled(atchan)) { | ||
| 852 | spin_lock_bh(&atchan->lock); | ||
| 853 | atc_advance_work(atchan); | ||
| 854 | spin_unlock_bh(&atchan->lock); | ||
| 855 | } | ||
| 856 | } | ||
| 857 | |||
| 858 | /** | ||
| 859 | * atc_alloc_chan_resources - allocate resources for DMA channel | ||
| 860 | * @chan: allocate descriptor resources for this channel | ||
| 861 | * @client: current client requesting the channel be ready for requests | ||
| 862 | * | ||
| 863 | * return - the number of allocated descriptors | ||
| 864 | */ | ||
| 865 | static int atc_alloc_chan_resources(struct dma_chan *chan) | ||
| 866 | { | ||
| 867 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 868 | struct at_dma *atdma = to_at_dma(chan->device); | ||
| 869 | struct at_desc *desc; | ||
| 870 | struct at_dma_slave *atslave; | ||
| 871 | int i; | ||
| 872 | u32 cfg; | ||
| 873 | LIST_HEAD(tmp_list); | ||
| 874 | |||
| 875 | dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); | ||
| 876 | |||
| 877 | /* ASSERT: channel is idle */ | ||
| 878 | if (atc_chan_is_enabled(atchan)) { | ||
| 879 | dev_dbg(chan2dev(chan), "DMA channel not idle ?\n"); | ||
| 880 | return -EIO; | ||
| 881 | } | ||
| 882 | |||
| 883 | cfg = ATC_DEFAULT_CFG; | ||
| 884 | |||
| 885 | atslave = chan->private; | ||
| 886 | if (atslave) { | ||
| 887 | /* | ||
| 888 | * We need controller-specific data to set up slave | ||
| 889 | * transfers. | ||
| 890 | */ | ||
| 891 | BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev); | ||
| 892 | |||
| 893 | /* if cfg configuration specified take it instad of default */ | ||
| 894 | if (atslave->cfg) | ||
| 895 | cfg = atslave->cfg; | ||
| 896 | } | ||
| 897 | |||
| 898 | /* have we already been set up? | ||
| 899 | * reconfigure channel but no need to reallocate descriptors */ | ||
| 900 | if (!list_empty(&atchan->free_list)) | ||
| 901 | return atchan->descs_allocated; | ||
| 902 | |||
| 903 | /* Allocate initial pool of descriptors */ | ||
| 904 | for (i = 0; i < init_nr_desc_per_channel; i++) { | ||
| 905 | desc = atc_alloc_descriptor(chan, GFP_KERNEL); | ||
| 906 | if (!desc) { | ||
| 907 | dev_err(atdma->dma_common.dev, | ||
| 908 | "Only %d initial descriptors\n", i); | ||
| 909 | break; | ||
| 910 | } | ||
| 911 | list_add_tail(&desc->desc_node, &tmp_list); | ||
| 912 | } | ||
| 913 | |||
| 914 | spin_lock_bh(&atchan->lock); | ||
| 915 | atchan->descs_allocated = i; | ||
| 916 | list_splice(&tmp_list, &atchan->free_list); | ||
| 917 | atchan->completed_cookie = chan->cookie = 1; | ||
| 918 | spin_unlock_bh(&atchan->lock); | ||
| 919 | |||
| 920 | /* channel parameters */ | ||
| 921 | channel_writel(atchan, CFG, cfg); | ||
| 922 | |||
| 923 | dev_dbg(chan2dev(chan), | ||
| 924 | "alloc_chan_resources: allocated %d descriptors\n", | ||
| 925 | atchan->descs_allocated); | ||
| 926 | |||
| 927 | return atchan->descs_allocated; | ||
| 928 | } | ||
| 929 | |||
| 930 | /** | ||
| 931 | * atc_free_chan_resources - free all channel resources | ||
| 932 | * @chan: DMA channel | ||
| 933 | */ | ||
| 934 | static void atc_free_chan_resources(struct dma_chan *chan) | ||
| 935 | { | ||
| 936 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 937 | struct at_dma *atdma = to_at_dma(chan->device); | ||
| 938 | struct at_desc *desc, *_desc; | ||
| 939 | LIST_HEAD(list); | ||
| 940 | |||
| 941 | dev_dbg(chan2dev(chan), "free_chan_resources: (descs allocated=%u)\n", | ||
| 942 | atchan->descs_allocated); | ||
| 943 | |||
| 944 | /* ASSERT: channel is idle */ | ||
| 945 | BUG_ON(!list_empty(&atchan->active_list)); | ||
| 946 | BUG_ON(!list_empty(&atchan->queue)); | ||
| 947 | BUG_ON(atc_chan_is_enabled(atchan)); | ||
| 948 | |||
| 949 | list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { | ||
| 950 | dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); | ||
| 951 | list_del(&desc->desc_node); | ||
| 952 | /* free link descriptor */ | ||
| 953 | dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys); | ||
| 954 | } | ||
| 955 | list_splice_init(&atchan->free_list, &list); | ||
| 956 | atchan->descs_allocated = 0; | ||
| 957 | |||
| 958 | dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); | ||
| 959 | } | ||
| 960 | |||
| 961 | |||
| 962 | /*-- Module Management -----------------------------------------------*/ | ||
| 963 | |||
| 964 | /** | ||
| 965 | * at_dma_off - disable DMA controller | ||
| 966 | * @atdma: the Atmel HDAMC device | ||
| 967 | */ | ||
| 968 | static void at_dma_off(struct at_dma *atdma) | ||
| 969 | { | ||
| 970 | dma_writel(atdma, EN, 0); | ||
| 971 | |||
| 972 | /* disable all interrupts */ | ||
| 973 | dma_writel(atdma, EBCIDR, -1L); | ||
| 974 | |||
| 975 | /* confirm that all channels are disabled */ | ||
| 976 | while (dma_readl(atdma, CHSR) & atdma->all_chan_mask) | ||
| 977 | cpu_relax(); | ||
| 978 | } | ||
| 979 | |||
| 980 | static int __init at_dma_probe(struct platform_device *pdev) | ||
| 981 | { | ||
| 982 | struct at_dma_platform_data *pdata; | ||
| 983 | struct resource *io; | ||
| 984 | struct at_dma *atdma; | ||
| 985 | size_t size; | ||
| 986 | int irq; | ||
| 987 | int err; | ||
| 988 | int i; | ||
| 989 | |||
| 990 | /* get DMA Controller parameters from platform */ | ||
| 991 | pdata = pdev->dev.platform_data; | ||
| 992 | if (!pdata || pdata->nr_channels > AT_DMA_MAX_NR_CHANNELS) | ||
| 993 | return -EINVAL; | ||
| 994 | |||
| 995 | io = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
| 996 | if (!io) | ||
| 997 | return -EINVAL; | ||
| 998 | |||
| 999 | irq = platform_get_irq(pdev, 0); | ||
| 1000 | if (irq < 0) | ||
| 1001 | return irq; | ||
| 1002 | |||
| 1003 | size = sizeof(struct at_dma); | ||
| 1004 | size += pdata->nr_channels * sizeof(struct at_dma_chan); | ||
| 1005 | atdma = kzalloc(size, GFP_KERNEL); | ||
| 1006 | if (!atdma) | ||
| 1007 | return -ENOMEM; | ||
| 1008 | |||
| 1009 | /* discover transaction capabilites from the platform data */ | ||
| 1010 | atdma->dma_common.cap_mask = pdata->cap_mask; | ||
| 1011 | atdma->all_chan_mask = (1 << pdata->nr_channels) - 1; | ||
| 1012 | |||
| 1013 | size = io->end - io->start + 1; | ||
| 1014 | if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { | ||
| 1015 | err = -EBUSY; | ||
| 1016 | goto err_kfree; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | atdma->regs = ioremap(io->start, size); | ||
| 1020 | if (!atdma->regs) { | ||
| 1021 | err = -ENOMEM; | ||
| 1022 | goto err_release_r; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | atdma->clk = clk_get(&pdev->dev, "dma_clk"); | ||
| 1026 | if (IS_ERR(atdma->clk)) { | ||
| 1027 | err = PTR_ERR(atdma->clk); | ||
| 1028 | goto err_clk; | ||
| 1029 | } | ||
| 1030 | clk_enable(atdma->clk); | ||
| 1031 | |||
| 1032 | /* force dma off, just in case */ | ||
| 1033 | at_dma_off(atdma); | ||
| 1034 | |||
| 1035 | err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma); | ||
| 1036 | if (err) | ||
| 1037 | goto err_irq; | ||
| 1038 | |||
| 1039 | platform_set_drvdata(pdev, atdma); | ||
| 1040 | |||
| 1041 | /* create a pool of consistent memory blocks for hardware descriptors */ | ||
| 1042 | atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool", | ||
| 1043 | &pdev->dev, sizeof(struct at_desc), | ||
| 1044 | 4 /* word alignment */, 0); | ||
| 1045 | if (!atdma->dma_desc_pool) { | ||
| 1046 | dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); | ||
| 1047 | err = -ENOMEM; | ||
| 1048 | goto err_pool_create; | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | /* clear any pending interrupt */ | ||
| 1052 | while (dma_readl(atdma, EBCISR)) | ||
| 1053 | cpu_relax(); | ||
| 1054 | |||
| 1055 | /* initialize channels related values */ | ||
| 1056 | INIT_LIST_HEAD(&atdma->dma_common.channels); | ||
| 1057 | for (i = 0; i < pdata->nr_channels; i++, atdma->dma_common.chancnt++) { | ||
| 1058 | struct at_dma_chan *atchan = &atdma->chan[i]; | ||
| 1059 | |||
| 1060 | atchan->chan_common.device = &atdma->dma_common; | ||
| 1061 | atchan->chan_common.cookie = atchan->completed_cookie = 1; | ||
| 1062 | atchan->chan_common.chan_id = i; | ||
| 1063 | list_add_tail(&atchan->chan_common.device_node, | ||
| 1064 | &atdma->dma_common.channels); | ||
| 1065 | |||
| 1066 | atchan->ch_regs = atdma->regs + ch_regs(i); | ||
| 1067 | spin_lock_init(&atchan->lock); | ||
| 1068 | atchan->mask = 1 << i; | ||
| 1069 | |||
| 1070 | INIT_LIST_HEAD(&atchan->active_list); | ||
| 1071 | INIT_LIST_HEAD(&atchan->queue); | ||
| 1072 | INIT_LIST_HEAD(&atchan->free_list); | ||
| 1073 | |||
| 1074 | tasklet_init(&atchan->tasklet, atc_tasklet, | ||
| 1075 | (unsigned long)atchan); | ||
| 1076 | atc_enable_irq(atchan); | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | /* set base routines */ | ||
| 1080 | atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources; | ||
| 1081 | atdma->dma_common.device_free_chan_resources = atc_free_chan_resources; | ||
| 1082 | atdma->dma_common.device_is_tx_complete = atc_is_tx_complete; | ||
| 1083 | atdma->dma_common.device_issue_pending = atc_issue_pending; | ||
| 1084 | atdma->dma_common.dev = &pdev->dev; | ||
| 1085 | |||
| 1086 | /* set prep routines based on capability */ | ||
| 1087 | if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) | ||
| 1088 | atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; | ||
| 1089 | |||
| 1090 | if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { | ||
| 1091 | atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; | ||
| 1092 | atdma->dma_common.device_terminate_all = atc_terminate_all; | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | dma_writel(atdma, EN, AT_DMA_ENABLE); | ||
| 1096 | |||
| 1097 | dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n", | ||
| 1098 | dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", | ||
| 1099 | dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", | ||
| 1100 | atdma->dma_common.chancnt); | ||
| 1101 | |||
| 1102 | dma_async_device_register(&atdma->dma_common); | ||
| 1103 | |||
| 1104 | return 0; | ||
| 1105 | |||
| 1106 | err_pool_create: | ||
| 1107 | platform_set_drvdata(pdev, NULL); | ||
| 1108 | free_irq(platform_get_irq(pdev, 0), atdma); | ||
| 1109 | err_irq: | ||
| 1110 | clk_disable(atdma->clk); | ||
| 1111 | clk_put(atdma->clk); | ||
| 1112 | err_clk: | ||
| 1113 | iounmap(atdma->regs); | ||
| 1114 | atdma->regs = NULL; | ||
| 1115 | err_release_r: | ||
| 1116 | release_mem_region(io->start, size); | ||
| 1117 | err_kfree: | ||
| 1118 | kfree(atdma); | ||
| 1119 | return err; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | static int __exit at_dma_remove(struct platform_device *pdev) | ||
| 1123 | { | ||
| 1124 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
| 1125 | struct dma_chan *chan, *_chan; | ||
| 1126 | struct resource *io; | ||
| 1127 | |||
| 1128 | at_dma_off(atdma); | ||
| 1129 | dma_async_device_unregister(&atdma->dma_common); | ||
| 1130 | |||
| 1131 | dma_pool_destroy(atdma->dma_desc_pool); | ||
| 1132 | platform_set_drvdata(pdev, NULL); | ||
| 1133 | free_irq(platform_get_irq(pdev, 0), atdma); | ||
| 1134 | |||
| 1135 | list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, | ||
| 1136 | device_node) { | ||
| 1137 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
| 1138 | |||
| 1139 | /* Disable interrupts */ | ||
| 1140 | atc_disable_irq(atchan); | ||
| 1141 | tasklet_disable(&atchan->tasklet); | ||
| 1142 | |||
| 1143 | tasklet_kill(&atchan->tasklet); | ||
| 1144 | list_del(&chan->device_node); | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | clk_disable(atdma->clk); | ||
| 1148 | clk_put(atdma->clk); | ||
| 1149 | |||
| 1150 | iounmap(atdma->regs); | ||
| 1151 | atdma->regs = NULL; | ||
| 1152 | |||
| 1153 | io = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
| 1154 | release_mem_region(io->start, io->end - io->start + 1); | ||
| 1155 | |||
| 1156 | kfree(atdma); | ||
| 1157 | |||
| 1158 | return 0; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | static void at_dma_shutdown(struct platform_device *pdev) | ||
| 1162 | { | ||
| 1163 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
| 1164 | |||
| 1165 | at_dma_off(platform_get_drvdata(pdev)); | ||
| 1166 | clk_disable(atdma->clk); | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | static int at_dma_suspend_late(struct platform_device *pdev, pm_message_t mesg) | ||
| 1170 | { | ||
| 1171 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
| 1172 | |||
| 1173 | at_dma_off(platform_get_drvdata(pdev)); | ||
| 1174 | clk_disable(atdma->clk); | ||
| 1175 | return 0; | ||
| 1176 | } | ||
| 1177 | |||
| 1178 | static int at_dma_resume_early(struct platform_device *pdev) | ||
| 1179 | { | ||
| 1180 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
| 1181 | |||
| 1182 | clk_enable(atdma->clk); | ||
| 1183 | dma_writel(atdma, EN, AT_DMA_ENABLE); | ||
| 1184 | return 0; | ||
| 1185 | |||
| 1186 | } | ||
| 1187 | |||
| 1188 | static struct platform_driver at_dma_driver = { | ||
| 1189 | .remove = __exit_p(at_dma_remove), | ||
| 1190 | .shutdown = at_dma_shutdown, | ||
| 1191 | .suspend_late = at_dma_suspend_late, | ||
| 1192 | .resume_early = at_dma_resume_early, | ||
| 1193 | .driver = { | ||
| 1194 | .name = "at_hdmac", | ||
| 1195 | }, | ||
| 1196 | }; | ||
| 1197 | |||
| 1198 | static int __init at_dma_init(void) | ||
| 1199 | { | ||
| 1200 | return platform_driver_probe(&at_dma_driver, at_dma_probe); | ||
| 1201 | } | ||
| 1202 | module_init(at_dma_init); | ||
| 1203 | |||
| 1204 | static void __exit at_dma_exit(void) | ||
| 1205 | { | ||
| 1206 | platform_driver_unregister(&at_dma_driver); | ||
| 1207 | } | ||
| 1208 | module_exit(at_dma_exit); | ||
| 1209 | |||
| 1210 | MODULE_DESCRIPTION("Atmel AHB DMA Controller driver"); | ||
| 1211 | MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>"); | ||
| 1212 | MODULE_LICENSE("GPL"); | ||
| 1213 | MODULE_ALIAS("platform:at_hdmac"); | ||
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h new file mode 100644 index 000000000000..4c972afc49ec --- /dev/null +++ b/drivers/dma/at_hdmac_regs.h | |||
| @@ -0,0 +1,353 @@ | |||
| 1 | /* | ||
| 2 | * Header file for the Atmel AHB DMA Controller driver | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008 Atmel Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #ifndef AT_HDMAC_REGS_H | ||
| 12 | #define AT_HDMAC_REGS_H | ||
| 13 | |||
| 14 | #include <mach/at_hdmac.h> | ||
| 15 | |||
| 16 | #define AT_DMA_MAX_NR_CHANNELS 8 | ||
| 17 | |||
| 18 | |||
| 19 | #define AT_DMA_GCFG 0x00 /* Global Configuration Register */ | ||
| 20 | #define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ | ||
| 21 | #define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ | ||
| 22 | #define AT_DMA_ARB_CFG_FIXED (0x0 << 4) | ||
| 23 | #define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) | ||
| 24 | |||
| 25 | #define AT_DMA_EN 0x04 /* Controller Enable Register */ | ||
| 26 | #define AT_DMA_ENABLE (0x1 << 0) | ||
| 27 | |||
| 28 | #define AT_DMA_SREQ 0x08 /* Software Single Request Register */ | ||
| 29 | #define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ | ||
| 30 | #define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ | ||
| 31 | |||
| 32 | #define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ | ||
| 33 | #define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ | ||
| 34 | #define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ | ||
| 35 | |||
| 36 | #define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ | ||
| 37 | #define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ | ||
| 38 | #define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ | ||
| 39 | |||
| 40 | #define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ | ||
| 41 | #define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ | ||
| 42 | |||
| 43 | /* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ | ||
| 44 | #define AT_DMA_EBCIER 0x18 /* Enable register */ | ||
| 45 | #define AT_DMA_EBCIDR 0x1C /* Disable register */ | ||
| 46 | #define AT_DMA_EBCIMR 0x20 /* Mask Register */ | ||
| 47 | #define AT_DMA_EBCISR 0x24 /* Status Register */ | ||
| 48 | #define AT_DMA_CBTC_OFFSET 8 | ||
| 49 | #define AT_DMA_ERR_OFFSET 16 | ||
| 50 | #define AT_DMA_BTC(x) (0x1 << (x)) | ||
| 51 | #define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) | ||
| 52 | #define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) | ||
| 53 | |||
| 54 | #define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ | ||
| 55 | #define AT_DMA_ENA(x) (0x1 << (x)) | ||
| 56 | #define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) | ||
| 57 | #define AT_DMA_KEEP(x) (0x1 << (24 + (x))) | ||
| 58 | |||
| 59 | #define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ | ||
| 60 | #define AT_DMA_DIS(x) (0x1 << (x)) | ||
| 61 | #define AT_DMA_RES(x) (0x1 << ( 8 + (x))) | ||
| 62 | |||
| 63 | #define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ | ||
| 64 | #define AT_DMA_EMPT(x) (0x1 << (16 + (x))) | ||
| 65 | #define AT_DMA_STAL(x) (0x1 << (24 + (x))) | ||
| 66 | |||
| 67 | |||
| 68 | #define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ | ||
| 69 | #define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ | ||
| 70 | |||
| 71 | /* Hardware register offset for each channel */ | ||
| 72 | #define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ | ||
| 73 | #define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ | ||
| 74 | #define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ | ||
| 75 | #define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ | ||
| 76 | #define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ | ||
| 77 | #define ATC_CFG_OFFSET 0x14 /* Configuration Register */ | ||
| 78 | #define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ | ||
| 79 | #define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ | ||
| 80 | |||
| 81 | |||
| 82 | /* Bitfield definitions */ | ||
| 83 | |||
| 84 | /* Bitfields in DSCR */ | ||
| 85 | #define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ | ||
| 86 | |||
| 87 | /* Bitfields in CTRLA */ | ||
| 88 | #define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ | ||
| 89 | #define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ | ||
| 90 | /* Chunck Tranfer size definitions are in at_hdmac.h */ | ||
| 91 | #define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ | ||
| 92 | #define ATC_SRC_WIDTH(x) ((x) << 24) | ||
| 93 | #define ATC_SRC_WIDTH_BYTE (0x0 << 24) | ||
| 94 | #define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) | ||
| 95 | #define ATC_SRC_WIDTH_WORD (0x2 << 24) | ||
| 96 | #define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ | ||
| 97 | #define ATC_DST_WIDTH(x) ((x) << 28) | ||
| 98 | #define ATC_DST_WIDTH_BYTE (0x0 << 28) | ||
| 99 | #define ATC_DST_WIDTH_HALFWORD (0x1 << 28) | ||
| 100 | #define ATC_DST_WIDTH_WORD (0x2 << 28) | ||
| 101 | #define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ | ||
| 102 | |||
| 103 | /* Bitfields in CTRLB */ | ||
| 104 | #define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ | ||
| 105 | #define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ | ||
| 106 | #define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ | ||
| 107 | #define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ | ||
| 108 | #define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ | ||
| 109 | #define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ | ||
| 110 | #define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ | ||
| 111 | #define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ | ||
| 112 | #define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ | ||
| 113 | #define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ | ||
| 114 | #define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ | ||
| 115 | #define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ | ||
| 116 | #define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ | ||
| 117 | #define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ | ||
| 118 | #define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ | ||
| 119 | #define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) | ||
| 120 | #define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ | ||
| 121 | #define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ | ||
| 122 | #define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ | ||
| 123 | #define ATC_DST_ADDR_MODE_MASK (0x3 << 28) | ||
| 124 | #define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ | ||
| 125 | #define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ | ||
| 126 | #define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ | ||
| 127 | #define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ | ||
| 128 | #define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ | ||
| 129 | |||
| 130 | /* Bitfields in CFG */ | ||
| 131 | /* are in at_hdmac.h */ | ||
| 132 | |||
| 133 | /* Bitfields in SPIP */ | ||
| 134 | #define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) | ||
| 135 | #define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) | ||
| 136 | |||
| 137 | /* Bitfields in DPIP */ | ||
| 138 | #define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) | ||
| 139 | #define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) | ||
| 140 | |||
| 141 | |||
| 142 | /*-- descriptors -----------------------------------------------------*/ | ||
| 143 | |||
| 144 | /* LLI == Linked List Item; aka DMA buffer descriptor */ | ||
| 145 | struct at_lli { | ||
| 146 | /* values that are not changed by hardware */ | ||
| 147 | dma_addr_t saddr; | ||
| 148 | dma_addr_t daddr; | ||
| 149 | /* value that may get written back: */ | ||
| 150 | u32 ctrla; | ||
| 151 | /* more values that are not changed by hardware */ | ||
| 152 | u32 ctrlb; | ||
| 153 | dma_addr_t dscr; /* chain to next lli */ | ||
| 154 | }; | ||
| 155 | |||
| 156 | /** | ||
| 157 | * struct at_desc - software descriptor | ||
| 158 | * @at_lli: hardware lli structure | ||
| 159 | * @txd: support for the async_tx api | ||
| 160 | * @desc_node: node on the channed descriptors list | ||
| 161 | * @len: total transaction bytecount | ||
| 162 | */ | ||
| 163 | struct at_desc { | ||
| 164 | /* FIRST values the hardware uses */ | ||
| 165 | struct at_lli lli; | ||
| 166 | |||
| 167 | /* THEN values for driver housekeeping */ | ||
| 168 | struct dma_async_tx_descriptor txd; | ||
| 169 | struct list_head desc_node; | ||
| 170 | size_t len; | ||
| 171 | }; | ||
| 172 | |||
| 173 | static inline struct at_desc * | ||
| 174 | txd_to_at_desc(struct dma_async_tx_descriptor *txd) | ||
| 175 | { | ||
| 176 | return container_of(txd, struct at_desc, txd); | ||
| 177 | } | ||
| 178 | |||
| 179 | |||
| 180 | /*-- Channels --------------------------------------------------------*/ | ||
| 181 | |||
| 182 | /** | ||
| 183 | * struct at_dma_chan - internal representation of an Atmel HDMAC channel | ||
| 184 | * @chan_common: common dmaengine channel object members | ||
| 185 | * @device: parent device | ||
| 186 | * @ch_regs: memory mapped register base | ||
| 187 | * @mask: channel index in a mask | ||
| 188 | * @error_status: transmit error status information from irq handler | ||
| 189 | * to tasklet (use atomic operations) | ||
| 190 | * @tasklet: bottom half to finish transaction work | ||
| 191 | * @lock: serializes enqueue/dequeue operations to descriptors lists | ||
| 192 | * @completed_cookie: identifier for the most recently completed operation | ||
| 193 | * @active_list: list of descriptors dmaengine is being running on | ||
| 194 | * @queue: list of descriptors ready to be submitted to engine | ||
| 195 | * @free_list: list of descriptors usable by the channel | ||
| 196 | * @descs_allocated: records the actual size of the descriptor pool | ||
| 197 | */ | ||
| 198 | struct at_dma_chan { | ||
| 199 | struct dma_chan chan_common; | ||
| 200 | struct at_dma *device; | ||
| 201 | void __iomem *ch_regs; | ||
| 202 | u8 mask; | ||
| 203 | unsigned long error_status; | ||
| 204 | struct tasklet_struct tasklet; | ||
| 205 | |||
| 206 | spinlock_t lock; | ||
| 207 | |||
| 208 | /* these other elements are all protected by lock */ | ||
| 209 | dma_cookie_t completed_cookie; | ||
| 210 | struct list_head active_list; | ||
| 211 | struct list_head queue; | ||
| 212 | struct list_head free_list; | ||
| 213 | unsigned int descs_allocated; | ||
| 214 | }; | ||
| 215 | |||
| 216 | #define channel_readl(atchan, name) \ | ||
| 217 | __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) | ||
| 218 | |||
| 219 | #define channel_writel(atchan, name, val) \ | ||
| 220 | __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) | ||
| 221 | |||
| 222 | static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) | ||
| 223 | { | ||
| 224 | return container_of(dchan, struct at_dma_chan, chan_common); | ||
| 225 | } | ||
| 226 | |||
| 227 | |||
| 228 | /*-- Controller ------------------------------------------------------*/ | ||
| 229 | |||
| 230 | /** | ||
| 231 | * struct at_dma - internal representation of an Atmel HDMA Controller | ||
| 232 | * @chan_common: common dmaengine dma_device object members | ||
| 233 | * @ch_regs: memory mapped register base | ||
| 234 | * @clk: dma controller clock | ||
| 235 | * @all_chan_mask: all channels availlable in a mask | ||
| 236 | * @dma_desc_pool: base of DMA descriptor region (DMA address) | ||
| 237 | * @chan: channels table to store at_dma_chan structures | ||
| 238 | */ | ||
| 239 | struct at_dma { | ||
| 240 | struct dma_device dma_common; | ||
| 241 | void __iomem *regs; | ||
| 242 | struct clk *clk; | ||
| 243 | |||
| 244 | u8 all_chan_mask; | ||
| 245 | |||
| 246 | struct dma_pool *dma_desc_pool; | ||
| 247 | /* AT THE END channels table */ | ||
| 248 | struct at_dma_chan chan[0]; | ||
| 249 | }; | ||
| 250 | |||
| 251 | #define dma_readl(atdma, name) \ | ||
| 252 | __raw_readl((atdma)->regs + AT_DMA_##name) | ||
| 253 | #define dma_writel(atdma, name, val) \ | ||
| 254 | __raw_writel((val), (atdma)->regs + AT_DMA_##name) | ||
| 255 | |||
| 256 | static inline struct at_dma *to_at_dma(struct dma_device *ddev) | ||
| 257 | { | ||
| 258 | return container_of(ddev, struct at_dma, dma_common); | ||
| 259 | } | ||
| 260 | |||
| 261 | |||
| 262 | /*-- Helper functions ------------------------------------------------*/ | ||
| 263 | |||
| 264 | static struct device *chan2dev(struct dma_chan *chan) | ||
| 265 | { | ||
| 266 | return &chan->dev->device; | ||
| 267 | } | ||
| 268 | static struct device *chan2parent(struct dma_chan *chan) | ||
| 269 | { | ||
| 270 | return chan->dev->device.parent; | ||
| 271 | } | ||
| 272 | |||
| 273 | #if defined(VERBOSE_DEBUG) | ||
| 274 | static void vdbg_dump_regs(struct at_dma_chan *atchan) | ||
| 275 | { | ||
| 276 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
| 277 | |||
| 278 | dev_err(chan2dev(&atchan->chan_common), | ||
| 279 | " channel %d : imr = 0x%x, chsr = 0x%x\n", | ||
| 280 | atchan->chan_common.chan_id, | ||
| 281 | dma_readl(atdma, EBCIMR), | ||
| 282 | dma_readl(atdma, CHSR)); | ||
| 283 | |||
| 284 | dev_err(chan2dev(&atchan->chan_common), | ||
| 285 | " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", | ||
| 286 | channel_readl(atchan, SADDR), | ||
| 287 | channel_readl(atchan, DADDR), | ||
| 288 | channel_readl(atchan, CTRLA), | ||
| 289 | channel_readl(atchan, CTRLB), | ||
| 290 | channel_readl(atchan, CFG), | ||
| 291 | channel_readl(atchan, DSCR)); | ||
| 292 | } | ||
| 293 | #else | ||
| 294 | static void vdbg_dump_regs(struct at_dma_chan *atchan) {} | ||
| 295 | #endif | ||
| 296 | |||
| 297 | static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) | ||
| 298 | { | ||
| 299 | dev_printk(KERN_CRIT, chan2dev(&atchan->chan_common), | ||
| 300 | " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", | ||
| 301 | lli->saddr, lli->daddr, | ||
| 302 | lli->ctrla, lli->ctrlb, lli->dscr); | ||
| 303 | } | ||
| 304 | |||
| 305 | |||
| 306 | static void atc_setup_irq(struct at_dma_chan *atchan, int on) | ||
| 307 | { | ||
| 308 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
| 309 | u32 ebci; | ||
| 310 | |||
| 311 | /* enable interrupts on buffer chain completion & error */ | ||
| 312 | ebci = AT_DMA_CBTC(atchan->chan_common.chan_id) | ||
| 313 | | AT_DMA_ERR(atchan->chan_common.chan_id); | ||
| 314 | if (on) | ||
| 315 | dma_writel(atdma, EBCIER, ebci); | ||
| 316 | else | ||
| 317 | dma_writel(atdma, EBCIDR, ebci); | ||
| 318 | } | ||
| 319 | |||
| 320 | static inline void atc_enable_irq(struct at_dma_chan *atchan) | ||
| 321 | { | ||
| 322 | atc_setup_irq(atchan, 1); | ||
| 323 | } | ||
| 324 | |||
| 325 | static inline void atc_disable_irq(struct at_dma_chan *atchan) | ||
| 326 | { | ||
| 327 | atc_setup_irq(atchan, 0); | ||
| 328 | } | ||
| 329 | |||
| 330 | |||
| 331 | /** | ||
| 332 | * atc_chan_is_enabled - test if given channel is enabled | ||
| 333 | * @atchan: channel we want to test status | ||
| 334 | */ | ||
| 335 | static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) | ||
| 336 | { | ||
| 337 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
| 338 | |||
| 339 | return !!(dma_readl(atdma, CHSR) & atchan->mask); | ||
| 340 | } | ||
| 341 | |||
| 342 | |||
| 343 | /** | ||
| 344 | * set_desc_eol - set end-of-link to descriptor so it will end transfer | ||
| 345 | * @desc: descriptor, signle or at the end of a chain, to end chain on | ||
| 346 | */ | ||
| 347 | static void set_desc_eol(struct at_desc *desc) | ||
| 348 | { | ||
| 349 | desc->lli.ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; | ||
| 350 | desc->lli.dscr = 0; | ||
| 351 | } | ||
| 352 | |||
| 353 | #endif /* AT_HDMAC_REGS_H */ | ||
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index fb7da5141e96..d93017fc7872 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c | |||
| @@ -38,6 +38,11 @@ module_param(max_channels, uint, S_IRUGO); | |||
| 38 | MODULE_PARM_DESC(max_channels, | 38 | MODULE_PARM_DESC(max_channels, |
| 39 | "Maximum number of channels to use (default: all)"); | 39 | "Maximum number of channels to use (default: all)"); |
| 40 | 40 | ||
| 41 | static unsigned int iterations; | ||
| 42 | module_param(iterations, uint, S_IRUGO); | ||
| 43 | MODULE_PARM_DESC(iterations, | ||
| 44 | "Iterations before stopping test (default: infinite)"); | ||
| 45 | |||
| 41 | static unsigned int xor_sources = 3; | 46 | static unsigned int xor_sources = 3; |
| 42 | module_param(xor_sources, uint, S_IRUGO); | 47 | module_param(xor_sources, uint, S_IRUGO); |
| 43 | MODULE_PARM_DESC(xor_sources, | 48 | MODULE_PARM_DESC(xor_sources, |
| @@ -114,7 +119,7 @@ static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len) | |||
| 114 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); | 119 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); |
| 115 | for ( ; i < start + len; i++) | 120 | for ( ; i < start + len; i++) |
| 116 | buf[i] = PATTERN_SRC | PATTERN_COPY | 121 | buf[i] = PATTERN_SRC | PATTERN_COPY |
| 117 | | (~i & PATTERN_COUNT_MASK);; | 122 | | (~i & PATTERN_COUNT_MASK); |
| 118 | for ( ; i < test_buf_size; i++) | 123 | for ( ; i < test_buf_size; i++) |
| 119 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); | 124 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); |
| 120 | buf++; | 125 | buf++; |
| @@ -270,7 +275,8 @@ static int dmatest_func(void *data) | |||
| 270 | 275 | ||
| 271 | flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT; | 276 | flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT; |
| 272 | 277 | ||
| 273 | while (!kthread_should_stop()) { | 278 | while (!kthread_should_stop() |
| 279 | && !(iterations && total_tests >= iterations)) { | ||
| 274 | struct dma_device *dev = chan->device; | 280 | struct dma_device *dev = chan->device; |
| 275 | struct dma_async_tx_descriptor *tx = NULL; | 281 | struct dma_async_tx_descriptor *tx = NULL; |
| 276 | dma_addr_t dma_srcs[src_cnt]; | 282 | dma_addr_t dma_srcs[src_cnt]; |
| @@ -416,6 +422,13 @@ err_srcbuf: | |||
| 416 | err_srcs: | 422 | err_srcs: |
| 417 | pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", | 423 | pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", |
| 418 | thread_name, total_tests, failed_tests, ret); | 424 | thread_name, total_tests, failed_tests, ret); |
| 425 | |||
| 426 | if (iterations > 0) | ||
| 427 | while (!kthread_should_stop()) { | ||
| 428 | DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit); | ||
| 429 | interruptible_sleep_on(&wait_dmatest_exit); | ||
| 430 | } | ||
| 431 | |||
| 419 | return ret; | 432 | return ret; |
| 420 | } | 433 | } |
| 421 | 434 | ||
| @@ -495,11 +508,11 @@ static int dmatest_add_channel(struct dma_chan *chan) | |||
| 495 | 508 | ||
| 496 | if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { | 509 | if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { |
| 497 | cnt = dmatest_add_threads(dtc, DMA_MEMCPY); | 510 | cnt = dmatest_add_threads(dtc, DMA_MEMCPY); |
| 498 | thread_count += cnt > 0 ?: 0; | 511 | thread_count += cnt > 0 ? cnt : 0; |
| 499 | } | 512 | } |
| 500 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { | 513 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { |
| 501 | cnt = dmatest_add_threads(dtc, DMA_XOR); | 514 | cnt = dmatest_add_threads(dtc, DMA_XOR); |
| 502 | thread_count += cnt > 0 ?: 0; | 515 | thread_count += cnt > 0 ? cnt : 0; |
| 503 | } | 516 | } |
| 504 | 517 | ||
| 505 | pr_info("dmatest: Started %u threads using %s\n", | 518 | pr_info("dmatest: Started %u threads using %s\n", |
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index f18d1bde0439..ef87a8984145 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c | |||
| @@ -12,6 +12,11 @@ | |||
| 12 | * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. | 12 | * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. |
| 13 | * The support for MPC8349 DMA contorller is also added. | 13 | * The support for MPC8349 DMA contorller is also added. |
| 14 | * | 14 | * |
| 15 | * This driver instructs the DMA controller to issue the PCI Read Multiple | ||
| 16 | * command for PCI read operations, instead of using the default PCI Read Line | ||
| 17 | * command. Please be aware that this setting may result in read pre-fetching | ||
| 18 | * on some platforms. | ||
| 19 | * | ||
| 15 | * This is free software; you can redistribute it and/or modify | 20 | * This is free software; you can redistribute it and/or modify |
| 16 | * it under the terms of the GNU General Public License as published by | 21 | * it under the terms of the GNU General Public License as published by |
| 17 | * the Free Software Foundation; either version 2 of the License, or | 22 | * the Free Software Foundation; either version 2 of the License, or |
| @@ -49,9 +54,10 @@ static void dma_init(struct fsl_dma_chan *fsl_chan) | |||
| 49 | case FSL_DMA_IP_83XX: | 54 | case FSL_DMA_IP_83XX: |
| 50 | /* Set the channel to below modes: | 55 | /* Set the channel to below modes: |
| 51 | * EOTIE - End-of-transfer interrupt enable | 56 | * EOTIE - End-of-transfer interrupt enable |
| 57 | * PRC_RM - PCI read multiple | ||
| 52 | */ | 58 | */ |
| 53 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE, | 59 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE |
| 54 | 32); | 60 | | FSL_DMA_MR_PRC_RM, 32); |
| 55 | break; | 61 | break; |
| 56 | } | 62 | } |
| 57 | 63 | ||
| @@ -136,15 +142,16 @@ static int dma_is_idle(struct fsl_dma_chan *fsl_chan) | |||
| 136 | 142 | ||
| 137 | static void dma_start(struct fsl_dma_chan *fsl_chan) | 143 | static void dma_start(struct fsl_dma_chan *fsl_chan) |
| 138 | { | 144 | { |
| 139 | u32 mr_set = 0;; | 145 | u32 mr_set = 0; |
| 140 | 146 | ||
| 141 | if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { | 147 | if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { |
| 142 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32); | 148 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32); |
| 143 | mr_set |= FSL_DMA_MR_EMP_EN; | 149 | mr_set |= FSL_DMA_MR_EMP_EN; |
| 144 | } else | 150 | } else if ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { |
| 145 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, | 151 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, |
| 146 | DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | 152 | DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
| 147 | & ~FSL_DMA_MR_EMP_EN, 32); | 153 | & ~FSL_DMA_MR_EMP_EN, 32); |
| 154 | } | ||
| 148 | 155 | ||
| 149 | if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT) | 156 | if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT) |
| 150 | mr_set |= FSL_DMA_MR_EMS_EN; | 157 | mr_set |= FSL_DMA_MR_EMS_EN; |
| @@ -871,9 +878,9 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, | |||
| 871 | 878 | ||
| 872 | switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) { | 879 | switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) { |
| 873 | case FSL_DMA_IP_85XX: | 880 | case FSL_DMA_IP_85XX: |
| 874 | new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; | ||
| 875 | new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; | 881 | new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; |
| 876 | case FSL_DMA_IP_83XX: | 882 | case FSL_DMA_IP_83XX: |
| 883 | new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; | ||
| 877 | new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; | 884 | new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; |
| 878 | new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; | 885 | new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; |
| 879 | } | 886 | } |
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index 4f21a512d848..dc7f26865797 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | 38 | ||
| 39 | /* Special MR definition for MPC8349 */ | 39 | /* Special MR definition for MPC8349 */ |
| 40 | #define FSL_DMA_MR_EOTIE 0x00000080 | 40 | #define FSL_DMA_MR_EOTIE 0x00000080 |
| 41 | #define FSL_DMA_MR_PRC_RM 0x00000800 | ||
| 41 | 42 | ||
| 42 | #define FSL_DMA_SR_CH 0x00000020 | 43 | #define FSL_DMA_SR_CH 0x00000020 |
| 43 | #define FSL_DMA_SR_PE 0x00000010 | 44 | #define FSL_DMA_SR_PE 0x00000010 |
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index ddab94f51224..3f23eabe09f2 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c | |||
| @@ -1176,7 +1176,7 @@ static int __devinit mv_xor_probe(struct platform_device *pdev) | |||
| 1176 | if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) | 1176 | if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) |
| 1177 | dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; | 1177 | dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; |
| 1178 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { | 1178 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { |
| 1179 | dma_dev->max_xor = 8; ; | 1179 | dma_dev->max_xor = 8; |
| 1180 | dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; | 1180 | dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; |
| 1181 | } | 1181 | } |
| 1182 | 1182 | ||
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 24964c1d0af9..e2a10bcba7a1 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
| @@ -868,6 +868,8 @@ static void amd64_read_dbam_reg(struct amd64_pvt *pvt) | |||
| 868 | goto err_reg; | 868 | goto err_reg; |
| 869 | } | 869 | } |
| 870 | 870 | ||
| 871 | return; | ||
| 872 | |||
| 871 | err_reg: | 873 | err_reg: |
| 872 | debugf0("Error reading F2x%03x.\n", reg); | 874 | debugf0("Error reading F2x%03x.\n", reg); |
| 873 | } | 875 | } |
| @@ -2634,6 +2636,8 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) | |||
| 2634 | 2636 | ||
| 2635 | amd64_dump_misc_regs(pvt); | 2637 | amd64_dump_misc_regs(pvt); |
| 2636 | 2638 | ||
| 2639 | return; | ||
| 2640 | |||
| 2637 | err_reg: | 2641 | err_reg: |
| 2638 | debugf0("Reading an MC register failed\n"); | 2642 | debugf0("Reading an MC register failed\n"); |
| 2639 | 2643 | ||
| @@ -2977,6 +2981,9 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) | |||
| 2977 | "ECC is enabled by BIOS, Proceeding " | 2981 | "ECC is enabled by BIOS, Proceeding " |
| 2978 | "with EDAC module initialization\n"); | 2982 | "with EDAC module initialization\n"); |
| 2979 | 2983 | ||
| 2984 | /* Signal good ECC status */ | ||
| 2985 | ret = 0; | ||
| 2986 | |||
| 2980 | /* CLEAR the override, since BIOS controlled it */ | 2987 | /* CLEAR the override, since BIOS controlled it */ |
| 2981 | ecc_enable_override = 0; | 2988 | ecc_enable_override = 0; |
| 2982 | } | 2989 | } |
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 8fab7890a363..33be210d6723 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c | |||
| @@ -1461,7 +1461,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, | |||
| 1461 | goto out; | 1461 | goto out; |
| 1462 | } | 1462 | } |
| 1463 | 1463 | ||
| 1464 | if (crtc_req->count_connectors > 0 && !mode && !fb) { | 1464 | if (crtc_req->count_connectors > 0 && (!mode || !fb)) { |
| 1465 | DRM_DEBUG("Count connectors is %d but no mode or fb set\n", | 1465 | DRM_DEBUG("Count connectors is %d but no mode or fb set\n", |
| 1466 | crtc_req->count_connectors); | 1466 | crtc_req->count_connectors); |
| 1467 | ret = -EINVAL; | 1467 | ret = -EINVAL; |
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 3da9cfa31848..6aaa2cb23365 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c | |||
| @@ -706,8 +706,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) | |||
| 706 | struct drm_encoder **save_encoders, *new_encoder; | 706 | struct drm_encoder **save_encoders, *new_encoder; |
| 707 | struct drm_framebuffer *old_fb = NULL; | 707 | struct drm_framebuffer *old_fb = NULL; |
| 708 | bool save_enabled; | 708 | bool save_enabled; |
| 709 | bool mode_changed = false; | 709 | bool mode_changed = false; /* if true do a full mode set */ |
| 710 | bool fb_changed = false; | 710 | bool fb_changed = false; /* if true and !mode_changed just do a flip */ |
| 711 | struct drm_connector *connector; | 711 | struct drm_connector *connector; |
| 712 | int count = 0, ro, fail = 0; | 712 | int count = 0, ro, fail = 0; |
| 713 | struct drm_crtc_helper_funcs *crtc_funcs; | 713 | struct drm_crtc_helper_funcs *crtc_funcs; |
| @@ -758,6 +758,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) | |||
| 758 | if (set->crtc->fb == NULL) { | 758 | if (set->crtc->fb == NULL) { |
| 759 | DRM_DEBUG("crtc has no fb, full mode set\n"); | 759 | DRM_DEBUG("crtc has no fb, full mode set\n"); |
| 760 | mode_changed = true; | 760 | mode_changed = true; |
| 761 | } else if (set->fb == NULL) { | ||
| 762 | mode_changed = true; | ||
| 761 | } else if ((set->fb->bits_per_pixel != | 763 | } else if ((set->fb->bits_per_pixel != |
| 762 | set->crtc->fb->bits_per_pixel) || | 764 | set->crtc->fb->bits_per_pixel) || |
| 763 | set->fb->depth != set->crtc->fb->depth) | 765 | set->fb->depth != set->crtc->fb->depth) |
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 05a44896dffb..f1ba8ff41130 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c | |||
| @@ -722,13 +722,14 @@ int r100_cs_packet_parse(struct radeon_cs_parser *p, | |||
| 722 | unsigned idx) | 722 | unsigned idx) |
| 723 | { | 723 | { |
| 724 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; | 724 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; |
| 725 | uint32_t header = ib_chunk->kdata[idx]; | 725 | uint32_t header; |
| 726 | 726 | ||
| 727 | if (idx >= ib_chunk->length_dw) { | 727 | if (idx >= ib_chunk->length_dw) { |
| 728 | DRM_ERROR("Can not parse packet at %d after CS end %d !\n", | 728 | DRM_ERROR("Can not parse packet at %d after CS end %d !\n", |
| 729 | idx, ib_chunk->length_dw); | 729 | idx, ib_chunk->length_dw); |
| 730 | return -EINVAL; | 730 | return -EINVAL; |
| 731 | } | 731 | } |
| 732 | header = ib_chunk->kdata[idx]; | ||
| 732 | pkt->idx = idx; | 733 | pkt->idx = idx; |
| 733 | pkt->type = CP_PACKET_GET_TYPE(header); | 734 | pkt->type = CP_PACKET_GET_TYPE(header); |
| 734 | pkt->count = CP_PACKET_GET_COUNT(header); | 735 | pkt->count = CP_PACKET_GET_COUNT(header); |
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 3cfcee17dc56..0bd5879a4957 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c | |||
| @@ -318,6 +318,14 @@ static int __init radeon_init(void) | |||
| 318 | driver = &driver_old; | 318 | driver = &driver_old; |
| 319 | driver->num_ioctls = radeon_max_ioctl; | 319 | driver->num_ioctls = radeon_max_ioctl; |
| 320 | #if defined(CONFIG_DRM_RADEON_KMS) | 320 | #if defined(CONFIG_DRM_RADEON_KMS) |
| 321 | #ifdef CONFIG_VGA_CONSOLE | ||
| 322 | if (vgacon_text_force() && radeon_modeset == -1) { | ||
| 323 | DRM_INFO("VGACON disable radeon kernel modesetting.\n"); | ||
| 324 | driver = &driver_old; | ||
| 325 | driver->driver_features &= ~DRIVER_MODESET; | ||
| 326 | radeon_modeset = 0; | ||
| 327 | } | ||
| 328 | #endif | ||
| 321 | /* if enabled by default */ | 329 | /* if enabled by default */ |
| 322 | if (radeon_modeset == -1) { | 330 | if (radeon_modeset == -1) { |
| 323 | DRM_INFO("radeon default to kernel modesetting.\n"); | 331 | DRM_INFO("radeon default to kernel modesetting.\n"); |
| @@ -329,17 +337,8 @@ static int __init radeon_init(void) | |||
| 329 | driver->driver_features |= DRIVER_MODESET; | 337 | driver->driver_features |= DRIVER_MODESET; |
| 330 | driver->num_ioctls = radeon_max_kms_ioctl; | 338 | driver->num_ioctls = radeon_max_kms_ioctl; |
| 331 | } | 339 | } |
| 332 | |||
| 333 | /* if the vga console setting is enabled still | 340 | /* if the vga console setting is enabled still |
| 334 | * let modprobe override it */ | 341 | * let modprobe override it */ |
| 335 | #ifdef CONFIG_VGA_CONSOLE | ||
| 336 | if (vgacon_text_force() && radeon_modeset == -1) { | ||
| 337 | DRM_INFO("VGACON disable radeon kernel modesetting.\n"); | ||
| 338 | driver = &driver_old; | ||
| 339 | driver->driver_features &= ~DRIVER_MODESET; | ||
| 340 | radeon_modeset = 0; | ||
| 341 | } | ||
| 342 | #endif | ||
| 343 | #endif | 342 | #endif |
| 344 | return drm_init(driver); | 343 | return drm_init(driver); |
| 345 | } | 344 | } |
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 937a2f1cdb46..3357110e30ce 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c | |||
| @@ -58,6 +58,8 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) | |||
| 58 | if (r) { | 58 | if (r) { |
| 59 | DRM_ERROR("Failed to initialize radeon, disabling IOCTL\n"); | 59 | DRM_ERROR("Failed to initialize radeon, disabling IOCTL\n"); |
| 60 | radeon_device_fini(rdev); | 60 | radeon_device_fini(rdev); |
| 61 | kfree(rdev); | ||
| 62 | dev->dev_private = NULL; | ||
| 61 | return r; | 63 | return r; |
| 62 | } | 64 | } |
| 63 | return 0; | 65 | return 0; |
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 551e608702e4..fd8f3ca716ea 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c | |||
| @@ -370,6 +370,7 @@ void rv515_vram_info(struct radeon_device *rdev) | |||
| 370 | 370 | ||
| 371 | rv515_vram_get_type(rdev); | 371 | rv515_vram_get_type(rdev); |
| 372 | 372 | ||
| 373 | r100_vram_init_sizes(rdev); | ||
| 373 | /* FIXME: we should enforce default clock in case GPU is not in | 374 | /* FIXME: we should enforce default clock in case GPU is not in |
| 374 | * default setup | 375 | * default setup |
| 375 | */ | 376 | */ |
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6538d4236989..c2b0d710d10f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c | |||
| @@ -1182,13 +1182,14 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, | |||
| 1182 | 1182 | ||
| 1183 | int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) | 1183 | int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) |
| 1184 | { | 1184 | { |
| 1185 | struct ttm_mem_type_manager *man = &bdev->man[mem_type]; | 1185 | struct ttm_mem_type_manager *man; |
| 1186 | int ret = -EINVAL; | 1186 | int ret = -EINVAL; |
| 1187 | 1187 | ||
| 1188 | if (mem_type >= TTM_NUM_MEM_TYPES) { | 1188 | if (mem_type >= TTM_NUM_MEM_TYPES) { |
| 1189 | printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", mem_type); | 1189 | printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", mem_type); |
| 1190 | return ret; | 1190 | return ret; |
| 1191 | } | 1191 | } |
| 1192 | man = &bdev->man[mem_type]; | ||
| 1192 | 1193 | ||
| 1193 | if (!man->has_type) { | 1194 | if (!man->has_type) { |
| 1194 | printk(KERN_ERR TTM_PFX "Trying to take down uninitialized " | 1195 | printk(KERN_ERR TTM_PFX "Trying to take down uninitialized " |
| @@ -1575,6 +1576,10 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, | |||
| 1575 | driver->sync_obj_unref(&sync_obj); | 1576 | driver->sync_obj_unref(&sync_obj); |
| 1576 | driver->sync_obj_unref(&tmp_obj); | 1577 | driver->sync_obj_unref(&tmp_obj); |
| 1577 | spin_lock(&bo->lock); | 1578 | spin_lock(&bo->lock); |
| 1579 | } else { | ||
| 1580 | spin_unlock(&bo->lock); | ||
| 1581 | driver->sync_obj_unref(&sync_obj); | ||
| 1582 | spin_lock(&bo->lock); | ||
| 1578 | } | 1583 | } |
| 1579 | } | 1584 | } |
| 1580 | return 0; | 1585 | return 0; |
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ce2e6f38ea01..ad4ada07c6cf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c | |||
| @@ -150,7 +150,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, | |||
| 150 | #ifdef CONFIG_X86 | 150 | #ifdef CONFIG_X86 |
| 151 | dst = kmap_atomic_prot(d, KM_USER0, prot); | 151 | dst = kmap_atomic_prot(d, KM_USER0, prot); |
| 152 | #else | 152 | #else |
| 153 | if (prot != PAGE_KERNEL) | 153 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
| 154 | dst = vmap(&d, 1, 0, prot); | 154 | dst = vmap(&d, 1, 0, prot); |
| 155 | else | 155 | else |
| 156 | dst = kmap(d); | 156 | dst = kmap(d); |
| @@ -163,7 +163,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, | |||
| 163 | #ifdef CONFIG_X86 | 163 | #ifdef CONFIG_X86 |
| 164 | kunmap_atomic(dst, KM_USER0); | 164 | kunmap_atomic(dst, KM_USER0); |
| 165 | #else | 165 | #else |
| 166 | if (prot != PAGE_KERNEL) | 166 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
| 167 | vunmap(dst); | 167 | vunmap(dst); |
| 168 | else | 168 | else |
| 169 | kunmap(d); | 169 | kunmap(d); |
| @@ -186,7 +186,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, | |||
| 186 | #ifdef CONFIG_X86 | 186 | #ifdef CONFIG_X86 |
| 187 | src = kmap_atomic_prot(s, KM_USER0, prot); | 187 | src = kmap_atomic_prot(s, KM_USER0, prot); |
| 188 | #else | 188 | #else |
| 189 | if (prot != PAGE_KERNEL) | 189 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
| 190 | src = vmap(&s, 1, 0, prot); | 190 | src = vmap(&s, 1, 0, prot); |
| 191 | else | 191 | else |
| 192 | src = kmap(s); | 192 | src = kmap(s); |
| @@ -199,7 +199,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, | |||
| 199 | #ifdef CONFIG_X86 | 199 | #ifdef CONFIG_X86 |
| 200 | kunmap_atomic(src, KM_USER0); | 200 | kunmap_atomic(src, KM_USER0); |
| 201 | #else | 201 | #else |
| 202 | if (prot != PAGE_KERNEL) | 202 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
| 203 | vunmap(src); | 203 | vunmap(src); |
| 204 | else | 204 | else |
| 205 | kunmap(s); | 205 | kunmap(s); |
diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index b587e2d576ac..820e51673b26 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c | |||
| @@ -296,7 +296,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) | |||
| 296 | priv->tseq[3] = 0; | 296 | priv->tseq[3] = 0; |
| 297 | if (mlc->opacket & HIL_CTRL_APE) { | 297 | if (mlc->opacket & HIL_CTRL_APE) { |
| 298 | priv->tseq[3] |= HP_SDC_LPC_APE_IPF; | 298 | priv->tseq[3] |= HP_SDC_LPC_APE_IPF; |
| 299 | down_trylock(&mlc->csem); | 299 | BUG_ON(down_trylock(&mlc->csem)); |
| 300 | } | 300 | } |
| 301 | enqueue: | 301 | enqueue: |
| 302 | hp_sdc_enqueue_transaction(&priv->trans); | 302 | hp_sdc_enqueue_transaction(&priv->trans); |
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index c3b661a666cb..7e5f30dbc0a0 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c | |||
| @@ -1480,7 +1480,7 @@ l1oip_init(void) | |||
| 1480 | return -ENOMEM; | 1480 | return -ENOMEM; |
| 1481 | 1481 | ||
| 1482 | l1oip_cnt = 0; | 1482 | l1oip_cnt = 0; |
| 1483 | while (type[l1oip_cnt] && l1oip_cnt < MAX_CARDS) { | 1483 | while (l1oip_cnt < MAX_CARDS && type[l1oip_cnt]) { |
| 1484 | switch (type[l1oip_cnt] & 0xff) { | 1484 | switch (type[l1oip_cnt] & 0xff) { |
| 1485 | case 1: | 1485 | case 1: |
| 1486 | pri = 0; | 1486 | pri = 0; |
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index a6974e9b8ebf..1e2cb846b3c9 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | /*P:400 This contains run_guest() which actually calls into the Host<->Guest | 1 | /*P:400 |
| 2 | * This contains run_guest() which actually calls into the Host<->Guest | ||
| 2 | * Switcher and analyzes the return, such as determining if the Guest wants the | 3 | * Switcher and analyzes the return, such as determining if the Guest wants the |
| 3 | * Host to do something. This file also contains useful helper routines. :*/ | 4 | * Host to do something. This file also contains useful helper routines. |
| 5 | :*/ | ||
| 4 | #include <linux/module.h> | 6 | #include <linux/module.h> |
| 5 | #include <linux/stringify.h> | 7 | #include <linux/stringify.h> |
| 6 | #include <linux/stddef.h> | 8 | #include <linux/stddef.h> |
| @@ -24,7 +26,8 @@ static struct page **switcher_page; | |||
| 24 | /* This One Big lock protects all inter-guest data structures. */ | 26 | /* This One Big lock protects all inter-guest data structures. */ |
| 25 | DEFINE_MUTEX(lguest_lock); | 27 | DEFINE_MUTEX(lguest_lock); |
| 26 | 28 | ||
| 27 | /*H:010 We need to set up the Switcher at a high virtual address. Remember the | 29 | /*H:010 |
| 30 | * We need to set up the Switcher at a high virtual address. Remember the | ||
| 28 | * Switcher is a few hundred bytes of assembler code which actually changes the | 31 | * Switcher is a few hundred bytes of assembler code which actually changes the |
| 29 | * CPU to run the Guest, and then changes back to the Host when a trap or | 32 | * CPU to run the Guest, and then changes back to the Host when a trap or |
| 30 | * interrupt happens. | 33 | * interrupt happens. |
| @@ -33,7 +36,8 @@ DEFINE_MUTEX(lguest_lock); | |||
| 33 | * Host since it will be running as the switchover occurs. | 36 | * Host since it will be running as the switchover occurs. |
| 34 | * | 37 | * |
| 35 | * Trying to map memory at a particular address is an unusual thing to do, so | 38 | * Trying to map memory at a particular address is an unusual thing to do, so |
| 36 | * it's not a simple one-liner. */ | 39 | * it's not a simple one-liner. |
| 40 | */ | ||
| 37 | static __init int map_switcher(void) | 41 | static __init int map_switcher(void) |
| 38 | { | 42 | { |
| 39 | int i, err; | 43 | int i, err; |
| @@ -47,8 +51,10 @@ static __init int map_switcher(void) | |||
| 47 | * easy. | 51 | * easy. |
| 48 | */ | 52 | */ |
| 49 | 53 | ||
| 50 | /* We allocate an array of struct page pointers. map_vm_area() wants | 54 | /* |
| 51 | * this, rather than just an array of pages. */ | 55 | * We allocate an array of struct page pointers. map_vm_area() wants |
| 56 | * this, rather than just an array of pages. | ||
| 57 | */ | ||
| 52 | switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, | 58 | switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, |
| 53 | GFP_KERNEL); | 59 | GFP_KERNEL); |
| 54 | if (!switcher_page) { | 60 | if (!switcher_page) { |
| @@ -56,8 +62,10 @@ static __init int map_switcher(void) | |||
| 56 | goto out; | 62 | goto out; |
| 57 | } | 63 | } |
| 58 | 64 | ||
| 59 | /* Now we actually allocate the pages. The Guest will see these pages, | 65 | /* |
| 60 | * so we make sure they're zeroed. */ | 66 | * Now we actually allocate the pages. The Guest will see these pages, |
| 67 | * so we make sure they're zeroed. | ||
| 68 | */ | ||
| 61 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { | 69 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { |
| 62 | unsigned long addr = get_zeroed_page(GFP_KERNEL); | 70 | unsigned long addr = get_zeroed_page(GFP_KERNEL); |
| 63 | if (!addr) { | 71 | if (!addr) { |
| @@ -67,19 +75,23 @@ static __init int map_switcher(void) | |||
| 67 | switcher_page[i] = virt_to_page(addr); | 75 | switcher_page[i] = virt_to_page(addr); |
| 68 | } | 76 | } |
| 69 | 77 | ||
| 70 | /* First we check that the Switcher won't overlap the fixmap area at | 78 | /* |
| 79 | * First we check that the Switcher won't overlap the fixmap area at | ||
| 71 | * the top of memory. It's currently nowhere near, but it could have | 80 | * the top of memory. It's currently nowhere near, but it could have |
| 72 | * very strange effects if it ever happened. */ | 81 | * very strange effects if it ever happened. |
| 82 | */ | ||
| 73 | if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ | 83 | if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ |
| 74 | err = -ENOMEM; | 84 | err = -ENOMEM; |
| 75 | printk("lguest: mapping switcher would thwack fixmap\n"); | 85 | printk("lguest: mapping switcher would thwack fixmap\n"); |
| 76 | goto free_pages; | 86 | goto free_pages; |
| 77 | } | 87 | } |
| 78 | 88 | ||
| 79 | /* Now we reserve the "virtual memory area" we want: 0xFFC00000 | 89 | /* |
| 90 | * Now we reserve the "virtual memory area" we want: 0xFFC00000 | ||
| 80 | * (SWITCHER_ADDR). We might not get it in theory, but in practice | 91 | * (SWITCHER_ADDR). We might not get it in theory, but in practice |
| 81 | * it's worked so far. The end address needs +1 because __get_vm_area | 92 | * it's worked so far. The end address needs +1 because __get_vm_area |
| 82 | * allocates an extra guard page, so we need space for that. */ | 93 | * allocates an extra guard page, so we need space for that. |
| 94 | */ | ||
| 83 | switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, | 95 | switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, |
| 84 | VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR | 96 | VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR |
| 85 | + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); | 97 | + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); |
| @@ -89,11 +101,13 @@ static __init int map_switcher(void) | |||
| 89 | goto free_pages; | 101 | goto free_pages; |
| 90 | } | 102 | } |
| 91 | 103 | ||
| 92 | /* This code actually sets up the pages we've allocated to appear at | 104 | /* |
| 105 | * This code actually sets up the pages we've allocated to appear at | ||
| 93 | * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the | 106 | * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the |
| 94 | * kind of pages we're mapping (kernel pages), and a pointer to our | 107 | * kind of pages we're mapping (kernel pages), and a pointer to our |
| 95 | * array of struct pages. It increments that pointer, but we don't | 108 | * array of struct pages. It increments that pointer, but we don't |
| 96 | * care. */ | 109 | * care. |
| 110 | */ | ||
| 97 | pagep = switcher_page; | 111 | pagep = switcher_page; |
| 98 | err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); | 112 | err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); |
| 99 | if (err) { | 113 | if (err) { |
| @@ -101,8 +115,10 @@ static __init int map_switcher(void) | |||
| 101 | goto free_vma; | 115 | goto free_vma; |
| 102 | } | 116 | } |
| 103 | 117 | ||
| 104 | /* Now the Switcher is mapped at the right address, we can't fail! | 118 | /* |
| 105 | * Copy in the compiled-in Switcher code (from <arch>_switcher.S). */ | 119 | * Now the Switcher is mapped at the right address, we can't fail! |
| 120 | * Copy in the compiled-in Switcher code (from <arch>_switcher.S). | ||
| 121 | */ | ||
| 106 | memcpy(switcher_vma->addr, start_switcher_text, | 122 | memcpy(switcher_vma->addr, start_switcher_text, |
| 107 | end_switcher_text - start_switcher_text); | 123 | end_switcher_text - start_switcher_text); |
| 108 | 124 | ||
| @@ -124,8 +140,7 @@ out: | |||
| 124 | } | 140 | } |
| 125 | /*:*/ | 141 | /*:*/ |
| 126 | 142 | ||
| 127 | /* Cleaning up the mapping when the module is unloaded is almost... | 143 | /* Cleaning up the mapping when the module is unloaded is almost... too easy. */ |
| 128 | * too easy. */ | ||
| 129 | static void unmap_switcher(void) | 144 | static void unmap_switcher(void) |
| 130 | { | 145 | { |
| 131 | unsigned int i; | 146 | unsigned int i; |
| @@ -151,16 +166,19 @@ static void unmap_switcher(void) | |||
| 151 | * But we can't trust the Guest: it might be trying to access the Launcher | 166 | * But we can't trust the Guest: it might be trying to access the Launcher |
| 152 | * code. We have to check that the range is below the pfn_limit the Launcher | 167 | * code. We have to check that the range is below the pfn_limit the Launcher |
| 153 | * gave us. We have to make sure that addr + len doesn't give us a false | 168 | * gave us. We have to make sure that addr + len doesn't give us a false |
| 154 | * positive by overflowing, too. */ | 169 | * positive by overflowing, too. |
| 170 | */ | ||
| 155 | bool lguest_address_ok(const struct lguest *lg, | 171 | bool lguest_address_ok(const struct lguest *lg, |
| 156 | unsigned long addr, unsigned long len) | 172 | unsigned long addr, unsigned long len) |
| 157 | { | 173 | { |
| 158 | return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); | 174 | return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); |
| 159 | } | 175 | } |
| 160 | 176 | ||
| 161 | /* This routine copies memory from the Guest. Here we can see how useful the | 177 | /* |
| 178 | * This routine copies memory from the Guest. Here we can see how useful the | ||
| 162 | * kill_lguest() routine we met in the Launcher can be: we return a random | 179 | * kill_lguest() routine we met in the Launcher can be: we return a random |
| 163 | * value (all zeroes) instead of needing to return an error. */ | 180 | * value (all zeroes) instead of needing to return an error. |
| 181 | */ | ||
| 164 | void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) | 182 | void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) |
| 165 | { | 183 | { |
| 166 | if (!lguest_address_ok(cpu->lg, addr, bytes) | 184 | if (!lguest_address_ok(cpu->lg, addr, bytes) |
| @@ -181,9 +199,11 @@ void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, | |||
| 181 | } | 199 | } |
| 182 | /*:*/ | 200 | /*:*/ |
| 183 | 201 | ||
| 184 | /*H:030 Let's jump straight to the the main loop which runs the Guest. | 202 | /*H:030 |
| 203 | * Let's jump straight to the the main loop which runs the Guest. | ||
| 185 | * Remember, this is called by the Launcher reading /dev/lguest, and we keep | 204 | * Remember, this is called by the Launcher reading /dev/lguest, and we keep |
| 186 | * going around and around until something interesting happens. */ | 205 | * going around and around until something interesting happens. |
| 206 | */ | ||
| 187 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | 207 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user) |
| 188 | { | 208 | { |
| 189 | /* We stop running once the Guest is dead. */ | 209 | /* We stop running once the Guest is dead. */ |
| @@ -195,10 +215,17 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
| 195 | if (cpu->hcall) | 215 | if (cpu->hcall) |
| 196 | do_hypercalls(cpu); | 216 | do_hypercalls(cpu); |
| 197 | 217 | ||
| 198 | /* It's possible the Guest did a NOTIFY hypercall to the | 218 | /* |
| 199 | * Launcher, in which case we return from the read() now. */ | 219 | * It's possible the Guest did a NOTIFY hypercall to the |
| 220 | * Launcher. | ||
| 221 | */ | ||
| 200 | if (cpu->pending_notify) { | 222 | if (cpu->pending_notify) { |
| 223 | /* | ||
| 224 | * Does it just needs to write to a registered | ||
| 225 | * eventfd (ie. the appropriate virtqueue thread)? | ||
| 226 | */ | ||
| 201 | if (!send_notify_to_eventfd(cpu)) { | 227 | if (!send_notify_to_eventfd(cpu)) { |
| 228 | /* OK, we tell the main Laucher. */ | ||
| 202 | if (put_user(cpu->pending_notify, user)) | 229 | if (put_user(cpu->pending_notify, user)) |
| 203 | return -EFAULT; | 230 | return -EFAULT; |
| 204 | return sizeof(cpu->pending_notify); | 231 | return sizeof(cpu->pending_notify); |
| @@ -209,29 +236,39 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
| 209 | if (signal_pending(current)) | 236 | if (signal_pending(current)) |
| 210 | return -ERESTARTSYS; | 237 | return -ERESTARTSYS; |
| 211 | 238 | ||
| 212 | /* Check if there are any interrupts which can be delivered now: | 239 | /* |
| 240 | * Check if there are any interrupts which can be delivered now: | ||
| 213 | * if so, this sets up the hander to be executed when we next | 241 | * if so, this sets up the hander to be executed when we next |
| 214 | * run the Guest. */ | 242 | * run the Guest. |
| 243 | */ | ||
| 215 | irq = interrupt_pending(cpu, &more); | 244 | irq = interrupt_pending(cpu, &more); |
| 216 | if (irq < LGUEST_IRQS) | 245 | if (irq < LGUEST_IRQS) |
| 217 | try_deliver_interrupt(cpu, irq, more); | 246 | try_deliver_interrupt(cpu, irq, more); |
| 218 | 247 | ||
| 219 | /* All long-lived kernel loops need to check with this horrible | 248 | /* |
| 249 | * All long-lived kernel loops need to check with this horrible | ||
| 220 | * thing called the freezer. If the Host is trying to suspend, | 250 | * thing called the freezer. If the Host is trying to suspend, |
| 221 | * it stops us. */ | 251 | * it stops us. |
| 252 | */ | ||
| 222 | try_to_freeze(); | 253 | try_to_freeze(); |
| 223 | 254 | ||
| 224 | /* Just make absolutely sure the Guest is still alive. One of | 255 | /* |
| 225 | * those hypercalls could have been fatal, for example. */ | 256 | * Just make absolutely sure the Guest is still alive. One of |
| 257 | * those hypercalls could have been fatal, for example. | ||
| 258 | */ | ||
| 226 | if (cpu->lg->dead) | 259 | if (cpu->lg->dead) |
| 227 | break; | 260 | break; |
| 228 | 261 | ||
| 229 | /* If the Guest asked to be stopped, we sleep. The Guest's | 262 | /* |
| 230 | * clock timer will wake us. */ | 263 | * If the Guest asked to be stopped, we sleep. The Guest's |
| 264 | * clock timer will wake us. | ||
| 265 | */ | ||
| 231 | if (cpu->halted) { | 266 | if (cpu->halted) { |
| 232 | set_current_state(TASK_INTERRUPTIBLE); | 267 | set_current_state(TASK_INTERRUPTIBLE); |
| 233 | /* Just before we sleep, make sure no interrupt snuck in | 268 | /* |
| 234 | * which we should be doing. */ | 269 | * Just before we sleep, make sure no interrupt snuck in |
| 270 | * which we should be doing. | ||
| 271 | */ | ||
| 235 | if (interrupt_pending(cpu, &more) < LGUEST_IRQS) | 272 | if (interrupt_pending(cpu, &more) < LGUEST_IRQS) |
| 236 | set_current_state(TASK_RUNNING); | 273 | set_current_state(TASK_RUNNING); |
| 237 | else | 274 | else |
| @@ -239,8 +276,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
| 239 | continue; | 276 | continue; |
| 240 | } | 277 | } |
| 241 | 278 | ||
| 242 | /* OK, now we're ready to jump into the Guest. First we put up | 279 | /* |
| 243 | * the "Do Not Disturb" sign: */ | 280 | * OK, now we're ready to jump into the Guest. First we put up |
| 281 | * the "Do Not Disturb" sign: | ||
| 282 | */ | ||
| 244 | local_irq_disable(); | 283 | local_irq_disable(); |
| 245 | 284 | ||
| 246 | /* Actually run the Guest until something happens. */ | 285 | /* Actually run the Guest until something happens. */ |
| @@ -327,8 +366,10 @@ static void __exit fini(void) | |||
| 327 | } | 366 | } |
| 328 | /*:*/ | 367 | /*:*/ |
| 329 | 368 | ||
| 330 | /* The Host side of lguest can be a module. This is a nice way for people to | 369 | /* |
| 331 | * play with it. */ | 370 | * The Host side of lguest can be a module. This is a nice way for people to |
| 371 | * play with it. | ||
| 372 | */ | ||
| 332 | module_init(init); | 373 | module_init(init); |
| 333 | module_exit(fini); | 374 | module_exit(fini); |
| 334 | MODULE_LICENSE("GPL"); | 375 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index c29ffa19cb74..83511eb0923d 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
| @@ -1,8 +1,10 @@ | |||
| 1 | /*P:500 Just as userspace programs request kernel operations through a system | 1 | /*P:500 |
| 2 | * Just as userspace programs request kernel operations through a system | ||
| 2 | * call, the Guest requests Host operations through a "hypercall". You might | 3 | * call, the Guest requests Host operations through a "hypercall". You might |
| 3 | * notice this nomenclature doesn't really follow any logic, but the name has | 4 | * notice this nomenclature doesn't really follow any logic, but the name has |
| 4 | * been around for long enough that we're stuck with it. As you'd expect, this | 5 | * been around for long enough that we're stuck with it. As you'd expect, this |
| 5 | * code is basically a one big switch statement. :*/ | 6 | * code is basically a one big switch statement. |
| 7 | :*/ | ||
| 6 | 8 | ||
| 7 | /* Copyright (C) 2006 Rusty Russell IBM Corporation | 9 | /* Copyright (C) 2006 Rusty Russell IBM Corporation |
| 8 | 10 | ||
| @@ -28,30 +30,41 @@ | |||
| 28 | #include <asm/pgtable.h> | 30 | #include <asm/pgtable.h> |
| 29 | #include "lg.h" | 31 | #include "lg.h" |
| 30 | 32 | ||
| 31 | /*H:120 This is the core hypercall routine: where the Guest gets what it wants. | 33 | /*H:120 |
| 32 | * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. */ | 34 | * This is the core hypercall routine: where the Guest gets what it wants. |
| 35 | * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. | ||
| 36 | */ | ||
| 33 | static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | 37 | static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) |
| 34 | { | 38 | { |
| 35 | switch (args->arg0) { | 39 | switch (args->arg0) { |
| 36 | case LHCALL_FLUSH_ASYNC: | 40 | case LHCALL_FLUSH_ASYNC: |
| 37 | /* This call does nothing, except by breaking out of the Guest | 41 | /* |
| 38 | * it makes us process all the asynchronous hypercalls. */ | 42 | * This call does nothing, except by breaking out of the Guest |
| 43 | * it makes us process all the asynchronous hypercalls. | ||
| 44 | */ | ||
| 39 | break; | 45 | break; |
| 40 | case LHCALL_SEND_INTERRUPTS: | 46 | case LHCALL_SEND_INTERRUPTS: |
| 41 | /* This call does nothing too, but by breaking out of the Guest | 47 | /* |
| 42 | * it makes us process any pending interrupts. */ | 48 | * This call does nothing too, but by breaking out of the Guest |
| 49 | * it makes us process any pending interrupts. | ||
| 50 | */ | ||
| 43 | break; | 51 | break; |
| 44 | case LHCALL_LGUEST_INIT: | 52 | case LHCALL_LGUEST_INIT: |
| 45 | /* You can't get here unless you're already initialized. Don't | 53 | /* |
| 46 | * do that. */ | 54 | * You can't get here unless you're already initialized. Don't |
| 55 | * do that. | ||
| 56 | */ | ||
| 47 | kill_guest(cpu, "already have lguest_data"); | 57 | kill_guest(cpu, "already have lguest_data"); |
| 48 | break; | 58 | break; |
| 49 | case LHCALL_SHUTDOWN: { | 59 | case LHCALL_SHUTDOWN: { |
| 50 | /* Shutdown is such a trivial hypercall that we do it in four | ||
| 51 | * lines right here. */ | ||
| 52 | char msg[128]; | 60 | char msg[128]; |
| 53 | /* If the lgread fails, it will call kill_guest() itself; the | 61 | /* |
| 54 | * kill_guest() with the message will be ignored. */ | 62 | * Shutdown is such a trivial hypercall that we do it in five |
| 63 | * lines right here. | ||
| 64 | * | ||
| 65 | * If the lgread fails, it will call kill_guest() itself; the | ||
| 66 | * kill_guest() with the message will be ignored. | ||
| 67 | */ | ||
| 55 | __lgread(cpu, msg, args->arg1, sizeof(msg)); | 68 | __lgread(cpu, msg, args->arg1, sizeof(msg)); |
| 56 | msg[sizeof(msg)-1] = '\0'; | 69 | msg[sizeof(msg)-1] = '\0'; |
| 57 | kill_guest(cpu, "CRASH: %s", msg); | 70 | kill_guest(cpu, "CRASH: %s", msg); |
| @@ -60,16 +73,17 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | |||
| 60 | break; | 73 | break; |
| 61 | } | 74 | } |
| 62 | case LHCALL_FLUSH_TLB: | 75 | case LHCALL_FLUSH_TLB: |
| 63 | /* FLUSH_TLB comes in two flavors, depending on the | 76 | /* FLUSH_TLB comes in two flavors, depending on the argument: */ |
| 64 | * argument: */ | ||
| 65 | if (args->arg1) | 77 | if (args->arg1) |
| 66 | guest_pagetable_clear_all(cpu); | 78 | guest_pagetable_clear_all(cpu); |
| 67 | else | 79 | else |
| 68 | guest_pagetable_flush_user(cpu); | 80 | guest_pagetable_flush_user(cpu); |
| 69 | break; | 81 | break; |
| 70 | 82 | ||
| 71 | /* All these calls simply pass the arguments through to the right | 83 | /* |
| 72 | * routines. */ | 84 | * All these calls simply pass the arguments through to the right |
| 85 | * routines. | ||
| 86 | */ | ||
| 73 | case LHCALL_NEW_PGTABLE: | 87 | case LHCALL_NEW_PGTABLE: |
| 74 | guest_new_pagetable(cpu, args->arg1); | 88 | guest_new_pagetable(cpu, args->arg1); |
| 75 | break; | 89 | break; |
| @@ -112,15 +126,16 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | |||
| 112 | kill_guest(cpu, "Bad hypercall %li\n", args->arg0); | 126 | kill_guest(cpu, "Bad hypercall %li\n", args->arg0); |
| 113 | } | 127 | } |
| 114 | } | 128 | } |
| 115 | /*:*/ | ||
| 116 | 129 | ||
| 117 | /*H:124 Asynchronous hypercalls are easy: we just look in the array in the | 130 | /*H:124 |
| 131 | * Asynchronous hypercalls are easy: we just look in the array in the | ||
| 118 | * Guest's "struct lguest_data" to see if any new ones are marked "ready". | 132 | * Guest's "struct lguest_data" to see if any new ones are marked "ready". |
| 119 | * | 133 | * |
| 120 | * We are careful to do these in order: obviously we respect the order the | 134 | * We are careful to do these in order: obviously we respect the order the |
| 121 | * Guest put them in the ring, but we also promise the Guest that they will | 135 | * Guest put them in the ring, but we also promise the Guest that they will |
| 122 | * happen before any normal hypercall (which is why we check this before | 136 | * happen before any normal hypercall (which is why we check this before |
| 123 | * checking for a normal hcall). */ | 137 | * checking for a normal hcall). |
| 138 | */ | ||
| 124 | static void do_async_hcalls(struct lg_cpu *cpu) | 139 | static void do_async_hcalls(struct lg_cpu *cpu) |
| 125 | { | 140 | { |
| 126 | unsigned int i; | 141 | unsigned int i; |
| @@ -133,22 +148,28 @@ static void do_async_hcalls(struct lg_cpu *cpu) | |||
| 133 | /* We process "struct lguest_data"s hcalls[] ring once. */ | 148 | /* We process "struct lguest_data"s hcalls[] ring once. */ |
| 134 | for (i = 0; i < ARRAY_SIZE(st); i++) { | 149 | for (i = 0; i < ARRAY_SIZE(st); i++) { |
| 135 | struct hcall_args args; | 150 | struct hcall_args args; |
| 136 | /* We remember where we were up to from last time. This makes | 151 | /* |
| 152 | * We remember where we were up to from last time. This makes | ||
| 137 | * sure that the hypercalls are done in the order the Guest | 153 | * sure that the hypercalls are done in the order the Guest |
| 138 | * places them in the ring. */ | 154 | * places them in the ring. |
| 155 | */ | ||
| 139 | unsigned int n = cpu->next_hcall; | 156 | unsigned int n = cpu->next_hcall; |
| 140 | 157 | ||
| 141 | /* 0xFF means there's no call here (yet). */ | 158 | /* 0xFF means there's no call here (yet). */ |
| 142 | if (st[n] == 0xFF) | 159 | if (st[n] == 0xFF) |
| 143 | break; | 160 | break; |
| 144 | 161 | ||
| 145 | /* OK, we have hypercall. Increment the "next_hcall" cursor, | 162 | /* |
| 146 | * and wrap back to 0 if we reach the end. */ | 163 | * OK, we have hypercall. Increment the "next_hcall" cursor, |
| 164 | * and wrap back to 0 if we reach the end. | ||
| 165 | */ | ||
| 147 | if (++cpu->next_hcall == LHCALL_RING_SIZE) | 166 | if (++cpu->next_hcall == LHCALL_RING_SIZE) |
| 148 | cpu->next_hcall = 0; | 167 | cpu->next_hcall = 0; |
| 149 | 168 | ||
| 150 | /* Copy the hypercall arguments into a local copy of | 169 | /* |
| 151 | * the hcall_args struct. */ | 170 | * Copy the hypercall arguments into a local copy of the |
| 171 | * hcall_args struct. | ||
| 172 | */ | ||
| 152 | if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], | 173 | if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], |
| 153 | sizeof(struct hcall_args))) { | 174 | sizeof(struct hcall_args))) { |
| 154 | kill_guest(cpu, "Fetching async hypercalls"); | 175 | kill_guest(cpu, "Fetching async hypercalls"); |
| @@ -164,19 +185,25 @@ static void do_async_hcalls(struct lg_cpu *cpu) | |||
| 164 | break; | 185 | break; |
| 165 | } | 186 | } |
| 166 | 187 | ||
| 167 | /* Stop doing hypercalls if they want to notify the Launcher: | 188 | /* |
| 168 | * it needs to service this first. */ | 189 | * Stop doing hypercalls if they want to notify the Launcher: |
| 190 | * it needs to service this first. | ||
| 191 | */ | ||
| 169 | if (cpu->pending_notify) | 192 | if (cpu->pending_notify) |
| 170 | break; | 193 | break; |
| 171 | } | 194 | } |
| 172 | } | 195 | } |
| 173 | 196 | ||
| 174 | /* Last of all, we look at what happens first of all. The very first time the | 197 | /* |
| 175 | * Guest makes a hypercall, we end up here to set things up: */ | 198 | * Last of all, we look at what happens first of all. The very first time the |
| 199 | * Guest makes a hypercall, we end up here to set things up: | ||
| 200 | */ | ||
| 176 | static void initialize(struct lg_cpu *cpu) | 201 | static void initialize(struct lg_cpu *cpu) |
| 177 | { | 202 | { |
| 178 | /* You can't do anything until you're initialized. The Guest knows the | 203 | /* |
| 179 | * rules, so we're unforgiving here. */ | 204 | * You can't do anything until you're initialized. The Guest knows the |
| 205 | * rules, so we're unforgiving here. | ||
| 206 | */ | ||
| 180 | if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { | 207 | if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { |
| 181 | kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); | 208 | kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); |
| 182 | return; | 209 | return; |
| @@ -185,32 +212,44 @@ static void initialize(struct lg_cpu *cpu) | |||
| 185 | if (lguest_arch_init_hypercalls(cpu)) | 212 | if (lguest_arch_init_hypercalls(cpu)) |
| 186 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); | 213 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
| 187 | 214 | ||
| 188 | /* The Guest tells us where we're not to deliver interrupts by putting | 215 | /* |
| 189 | * the range of addresses into "struct lguest_data". */ | 216 | * The Guest tells us where we're not to deliver interrupts by putting |
| 217 | * the range of addresses into "struct lguest_data". | ||
| 218 | */ | ||
| 190 | if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) | 219 | if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) |
| 191 | || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) | 220 | || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) |
| 192 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); | 221 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
| 193 | 222 | ||
| 194 | /* We write the current time into the Guest's data page once so it can | 223 | /* |
| 195 | * set its clock. */ | 224 | * We write the current time into the Guest's data page once so it can |
| 225 | * set its clock. | ||
| 226 | */ | ||
| 196 | write_timestamp(cpu); | 227 | write_timestamp(cpu); |
| 197 | 228 | ||
| 198 | /* page_tables.c will also do some setup. */ | 229 | /* page_tables.c will also do some setup. */ |
| 199 | page_table_guest_data_init(cpu); | 230 | page_table_guest_data_init(cpu); |
| 200 | 231 | ||
| 201 | /* This is the one case where the above accesses might have been the | 232 | /* |
| 233 | * This is the one case where the above accesses might have been the | ||
| 202 | * first write to a Guest page. This may have caused a copy-on-write | 234 | * first write to a Guest page. This may have caused a copy-on-write |
| 203 | * fault, but the old page might be (read-only) in the Guest | 235 | * fault, but the old page might be (read-only) in the Guest |
| 204 | * pagetable. */ | 236 | * pagetable. |
| 237 | */ | ||
| 205 | guest_pagetable_clear_all(cpu); | 238 | guest_pagetable_clear_all(cpu); |
| 206 | } | 239 | } |
| 207 | /*:*/ | 240 | /*:*/ |
| 208 | 241 | ||
| 209 | /*M:013 If a Guest reads from a page (so creates a mapping) that it has never | 242 | /*M:013 |
| 243 | * If a Guest reads from a page (so creates a mapping) that it has never | ||
| 210 | * written to, and then the Launcher writes to it (ie. the output of a virtual | 244 | * written to, and then the Launcher writes to it (ie. the output of a virtual |
| 211 | * device), the Guest will still see the old page. In practice, this never | 245 | * device), the Guest will still see the old page. In practice, this never |
| 212 | * happens: why would the Guest read a page which it has never written to? But | 246 | * happens: why would the Guest read a page which it has never written to? But |
| 213 | * a similar scenario might one day bite us, so it's worth mentioning. :*/ | 247 | * a similar scenario might one day bite us, so it's worth mentioning. |
| 248 | * | ||
| 249 | * Note that if we used a shared anonymous mapping in the Launcher instead of | ||
| 250 | * mapping /dev/zero private, we wouldn't worry about cop-on-write. And we | ||
| 251 | * need that to switch the Launcher to processes (away from threads) anyway. | ||
| 252 | :*/ | ||
| 214 | 253 | ||
| 215 | /*H:100 | 254 | /*H:100 |
| 216 | * Hypercalls | 255 | * Hypercalls |
| @@ -229,17 +268,22 @@ void do_hypercalls(struct lg_cpu *cpu) | |||
| 229 | return; | 268 | return; |
| 230 | } | 269 | } |
| 231 | 270 | ||
| 232 | /* The Guest has initialized. | 271 | /* |
| 272 | * The Guest has initialized. | ||
| 233 | * | 273 | * |
| 234 | * Look in the hypercall ring for the async hypercalls: */ | 274 | * Look in the hypercall ring for the async hypercalls: |
| 275 | */ | ||
| 235 | do_async_hcalls(cpu); | 276 | do_async_hcalls(cpu); |
| 236 | 277 | ||
| 237 | /* If we stopped reading the hypercall ring because the Guest did a | 278 | /* |
| 279 | * If we stopped reading the hypercall ring because the Guest did a | ||
| 238 | * NOTIFY to the Launcher, we want to return now. Otherwise we do | 280 | * NOTIFY to the Launcher, we want to return now. Otherwise we do |
| 239 | * the hypercall. */ | 281 | * the hypercall. |
| 282 | */ | ||
| 240 | if (!cpu->pending_notify) { | 283 | if (!cpu->pending_notify) { |
| 241 | do_hcall(cpu, cpu->hcall); | 284 | do_hcall(cpu, cpu->hcall); |
| 242 | /* Tricky point: we reset the hcall pointer to mark the | 285 | /* |
| 286 | * Tricky point: we reset the hcall pointer to mark the | ||
| 243 | * hypercall as "done". We use the hcall pointer rather than | 287 | * hypercall as "done". We use the hcall pointer rather than |
| 244 | * the trap number to indicate a hypercall is pending. | 288 | * the trap number to indicate a hypercall is pending. |
| 245 | * Normally it doesn't matter: the Guest will run again and | 289 | * Normally it doesn't matter: the Guest will run again and |
| @@ -248,13 +292,16 @@ void do_hypercalls(struct lg_cpu *cpu) | |||
| 248 | * However, if we are signalled or the Guest sends I/O to the | 292 | * However, if we are signalled or the Guest sends I/O to the |
| 249 | * Launcher, the run_guest() loop will exit without running the | 293 | * Launcher, the run_guest() loop will exit without running the |
| 250 | * Guest. When it comes back it would try to re-run the | 294 | * Guest. When it comes back it would try to re-run the |
| 251 | * hypercall. Finding that bug sucked. */ | 295 | * hypercall. Finding that bug sucked. |
| 296 | */ | ||
| 252 | cpu->hcall = NULL; | 297 | cpu->hcall = NULL; |
| 253 | } | 298 | } |
| 254 | } | 299 | } |
| 255 | 300 | ||
| 256 | /* This routine supplies the Guest with time: it's used for wallclock time at | 301 | /* |
| 257 | * initial boot and as a rough time source if the TSC isn't available. */ | 302 | * This routine supplies the Guest with time: it's used for wallclock time at |
| 303 | * initial boot and as a rough time source if the TSC isn't available. | ||
| 304 | */ | ||
| 258 | void write_timestamp(struct lg_cpu *cpu) | 305 | void write_timestamp(struct lg_cpu *cpu) |
| 259 | { | 306 | { |
| 260 | struct timespec now; | 307 | struct timespec now; |
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 0e9067b0d507..18648180db02 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | /*P:800 Interrupts (traps) are complicated enough to earn their own file. | 1 | /*P:800 |
| 2 | * Interrupts (traps) are complicated enough to earn their own file. | ||
| 2 | * There are three classes of interrupts: | 3 | * There are three classes of interrupts: |
| 3 | * | 4 | * |
| 4 | * 1) Real hardware interrupts which occur while we're running the Guest, | 5 | * 1) Real hardware interrupts which occur while we're running the Guest, |
| @@ -10,7 +11,8 @@ | |||
| 10 | * just like real hardware would deliver them. Traps from the Guest can be set | 11 | * just like real hardware would deliver them. Traps from the Guest can be set |
| 11 | * up to go directly back into the Guest, but sometimes the Host wants to see | 12 | * up to go directly back into the Guest, but sometimes the Host wants to see |
| 12 | * them first, so we also have a way of "reflecting" them into the Guest as if | 13 | * them first, so we also have a way of "reflecting" them into the Guest as if |
| 13 | * they had been delivered to it directly. :*/ | 14 | * they had been delivered to it directly. |
| 15 | :*/ | ||
| 14 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
| 15 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
| 16 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| @@ -26,8 +28,10 @@ static unsigned long idt_address(u32 lo, u32 hi) | |||
| 26 | return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); | 28 | return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); |
| 27 | } | 29 | } |
| 28 | 30 | ||
| 29 | /* The "type" of the interrupt handler is a 4 bit field: we only support a | 31 | /* |
| 30 | * couple of types. */ | 32 | * The "type" of the interrupt handler is a 4 bit field: we only support a |
| 33 | * couple of types. | ||
| 34 | */ | ||
| 31 | static int idt_type(u32 lo, u32 hi) | 35 | static int idt_type(u32 lo, u32 hi) |
| 32 | { | 36 | { |
| 33 | return (hi >> 8) & 0xF; | 37 | return (hi >> 8) & 0xF; |
| @@ -39,8 +43,10 @@ static bool idt_present(u32 lo, u32 hi) | |||
| 39 | return (hi & 0x8000); | 43 | return (hi & 0x8000); |
| 40 | } | 44 | } |
| 41 | 45 | ||
| 42 | /* We need a helper to "push" a value onto the Guest's stack, since that's a | 46 | /* |
| 43 | * big part of what delivering an interrupt does. */ | 47 | * We need a helper to "push" a value onto the Guest's stack, since that's a |
| 48 | * big part of what delivering an interrupt does. | ||
| 49 | */ | ||
| 44 | static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) | 50 | static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) |
| 45 | { | 51 | { |
| 46 | /* Stack grows upwards: move stack then write value. */ | 52 | /* Stack grows upwards: move stack then write value. */ |
| @@ -48,7 +54,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) | |||
| 48 | lgwrite(cpu, *gstack, u32, val); | 54 | lgwrite(cpu, *gstack, u32, val); |
| 49 | } | 55 | } |
| 50 | 56 | ||
| 51 | /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or | 57 | /*H:210 |
| 58 | * The set_guest_interrupt() routine actually delivers the interrupt or | ||
| 52 | * trap. The mechanics of delivering traps and interrupts to the Guest are the | 59 | * trap. The mechanics of delivering traps and interrupts to the Guest are the |
| 53 | * same, except some traps have an "error code" which gets pushed onto the | 60 | * same, except some traps have an "error code" which gets pushed onto the |
| 54 | * stack as well: the caller tells us if this is one. | 61 | * stack as well: the caller tells us if this is one. |
| @@ -59,7 +66,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) | |||
| 59 | * | 66 | * |
| 60 | * We set up the stack just like the CPU does for a real interrupt, so it's | 67 | * We set up the stack just like the CPU does for a real interrupt, so it's |
| 61 | * identical for the Guest (and the standard "iret" instruction will undo | 68 | * identical for the Guest (and the standard "iret" instruction will undo |
| 62 | * it). */ | 69 | * it). |
| 70 | */ | ||
| 63 | static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | 71 | static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, |
| 64 | bool has_err) | 72 | bool has_err) |
| 65 | { | 73 | { |
| @@ -67,20 +75,26 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
| 67 | u32 eflags, ss, irq_enable; | 75 | u32 eflags, ss, irq_enable; |
| 68 | unsigned long virtstack; | 76 | unsigned long virtstack; |
| 69 | 77 | ||
| 70 | /* There are two cases for interrupts: one where the Guest is already | 78 | /* |
| 79 | * There are two cases for interrupts: one where the Guest is already | ||
| 71 | * in the kernel, and a more complex one where the Guest is in | 80 | * in the kernel, and a more complex one where the Guest is in |
| 72 | * userspace. We check the privilege level to find out. */ | 81 | * userspace. We check the privilege level to find out. |
| 82 | */ | ||
| 73 | if ((cpu->regs->ss&0x3) != GUEST_PL) { | 83 | if ((cpu->regs->ss&0x3) != GUEST_PL) { |
| 74 | /* The Guest told us their kernel stack with the SET_STACK | 84 | /* |
| 75 | * hypercall: both the virtual address and the segment */ | 85 | * The Guest told us their kernel stack with the SET_STACK |
| 86 | * hypercall: both the virtual address and the segment. | ||
| 87 | */ | ||
| 76 | virtstack = cpu->esp1; | 88 | virtstack = cpu->esp1; |
| 77 | ss = cpu->ss1; | 89 | ss = cpu->ss1; |
| 78 | 90 | ||
| 79 | origstack = gstack = guest_pa(cpu, virtstack); | 91 | origstack = gstack = guest_pa(cpu, virtstack); |
| 80 | /* We push the old stack segment and pointer onto the new | 92 | /* |
| 93 | * We push the old stack segment and pointer onto the new | ||
| 81 | * stack: when the Guest does an "iret" back from the interrupt | 94 | * stack: when the Guest does an "iret" back from the interrupt |
| 82 | * handler the CPU will notice they're dropping privilege | 95 | * handler the CPU will notice they're dropping privilege |
| 83 | * levels and expect these here. */ | 96 | * levels and expect these here. |
| 97 | */ | ||
| 84 | push_guest_stack(cpu, &gstack, cpu->regs->ss); | 98 | push_guest_stack(cpu, &gstack, cpu->regs->ss); |
| 85 | push_guest_stack(cpu, &gstack, cpu->regs->esp); | 99 | push_guest_stack(cpu, &gstack, cpu->regs->esp); |
| 86 | } else { | 100 | } else { |
| @@ -91,18 +105,22 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
| 91 | origstack = gstack = guest_pa(cpu, virtstack); | 105 | origstack = gstack = guest_pa(cpu, virtstack); |
| 92 | } | 106 | } |
| 93 | 107 | ||
| 94 | /* Remember that we never let the Guest actually disable interrupts, so | 108 | /* |
| 109 | * Remember that we never let the Guest actually disable interrupts, so | ||
| 95 | * the "Interrupt Flag" bit is always set. We copy that bit from the | 110 | * the "Interrupt Flag" bit is always set. We copy that bit from the |
| 96 | * Guest's "irq_enabled" field into the eflags word: we saw the Guest | 111 | * Guest's "irq_enabled" field into the eflags word: we saw the Guest |
| 97 | * copy it back in "lguest_iret". */ | 112 | * copy it back in "lguest_iret". |
| 113 | */ | ||
| 98 | eflags = cpu->regs->eflags; | 114 | eflags = cpu->regs->eflags; |
| 99 | if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 | 115 | if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 |
| 100 | && !(irq_enable & X86_EFLAGS_IF)) | 116 | && !(irq_enable & X86_EFLAGS_IF)) |
| 101 | eflags &= ~X86_EFLAGS_IF; | 117 | eflags &= ~X86_EFLAGS_IF; |
| 102 | 118 | ||
| 103 | /* An interrupt is expected to push three things on the stack: the old | 119 | /* |
| 120 | * An interrupt is expected to push three things on the stack: the old | ||
| 104 | * "eflags" word, the old code segment, and the old instruction | 121 | * "eflags" word, the old code segment, and the old instruction |
| 105 | * pointer. */ | 122 | * pointer. |
| 123 | */ | ||
| 106 | push_guest_stack(cpu, &gstack, eflags); | 124 | push_guest_stack(cpu, &gstack, eflags); |
| 107 | push_guest_stack(cpu, &gstack, cpu->regs->cs); | 125 | push_guest_stack(cpu, &gstack, cpu->regs->cs); |
| 108 | push_guest_stack(cpu, &gstack, cpu->regs->eip); | 126 | push_guest_stack(cpu, &gstack, cpu->regs->eip); |
| @@ -111,15 +129,19 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
| 111 | if (has_err) | 129 | if (has_err) |
| 112 | push_guest_stack(cpu, &gstack, cpu->regs->errcode); | 130 | push_guest_stack(cpu, &gstack, cpu->regs->errcode); |
| 113 | 131 | ||
| 114 | /* Now we've pushed all the old state, we change the stack, the code | 132 | /* |
| 115 | * segment and the address to execute. */ | 133 | * Now we've pushed all the old state, we change the stack, the code |
| 134 | * segment and the address to execute. | ||
| 135 | */ | ||
| 116 | cpu->regs->ss = ss; | 136 | cpu->regs->ss = ss; |
| 117 | cpu->regs->esp = virtstack + (gstack - origstack); | 137 | cpu->regs->esp = virtstack + (gstack - origstack); |
| 118 | cpu->regs->cs = (__KERNEL_CS|GUEST_PL); | 138 | cpu->regs->cs = (__KERNEL_CS|GUEST_PL); |
| 119 | cpu->regs->eip = idt_address(lo, hi); | 139 | cpu->regs->eip = idt_address(lo, hi); |
| 120 | 140 | ||
| 121 | /* There are two kinds of interrupt handlers: 0xE is an "interrupt | 141 | /* |
| 122 | * gate" which expects interrupts to be disabled on entry. */ | 142 | * There are two kinds of interrupt handlers: 0xE is an "interrupt |
| 143 | * gate" which expects interrupts to be disabled on entry. | ||
| 144 | */ | ||
| 123 | if (idt_type(lo, hi) == 0xE) | 145 | if (idt_type(lo, hi) == 0xE) |
| 124 | if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) | 146 | if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) |
| 125 | kill_guest(cpu, "Disabling interrupts"); | 147 | kill_guest(cpu, "Disabling interrupts"); |
| @@ -130,7 +152,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
| 130 | * | 152 | * |
| 131 | * interrupt_pending() returns the first pending interrupt which isn't blocked | 153 | * interrupt_pending() returns the first pending interrupt which isn't blocked |
| 132 | * by the Guest. It is called before every entry to the Guest, and just before | 154 | * by the Guest. It is called before every entry to the Guest, and just before |
| 133 | * we go to sleep when the Guest has halted itself. */ | 155 | * we go to sleep when the Guest has halted itself. |
| 156 | */ | ||
| 134 | unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) | 157 | unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) |
| 135 | { | 158 | { |
| 136 | unsigned int irq; | 159 | unsigned int irq; |
| @@ -140,8 +163,10 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) | |||
| 140 | if (!cpu->lg->lguest_data) | 163 | if (!cpu->lg->lguest_data) |
| 141 | return LGUEST_IRQS; | 164 | return LGUEST_IRQS; |
| 142 | 165 | ||
| 143 | /* Take our "irqs_pending" array and remove any interrupts the Guest | 166 | /* |
| 144 | * wants blocked: the result ends up in "blk". */ | 167 | * Take our "irqs_pending" array and remove any interrupts the Guest |
| 168 | * wants blocked: the result ends up in "blk". | ||
| 169 | */ | ||
| 145 | if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, | 170 | if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, |
| 146 | sizeof(blk))) | 171 | sizeof(blk))) |
| 147 | return LGUEST_IRQS; | 172 | return LGUEST_IRQS; |
| @@ -154,16 +179,20 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) | |||
| 154 | return irq; | 179 | return irq; |
| 155 | } | 180 | } |
| 156 | 181 | ||
| 157 | /* This actually diverts the Guest to running an interrupt handler, once an | 182 | /* |
| 158 | * interrupt has been identified by interrupt_pending(). */ | 183 | * This actually diverts the Guest to running an interrupt handler, once an |
| 184 | * interrupt has been identified by interrupt_pending(). | ||
| 185 | */ | ||
| 159 | void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) | 186 | void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) |
| 160 | { | 187 | { |
| 161 | struct desc_struct *idt; | 188 | struct desc_struct *idt; |
| 162 | 189 | ||
| 163 | BUG_ON(irq >= LGUEST_IRQS); | 190 | BUG_ON(irq >= LGUEST_IRQS); |
| 164 | 191 | ||
| 165 | /* They may be in the middle of an iret, where they asked us never to | 192 | /* |
| 166 | * deliver interrupts. */ | 193 | * They may be in the middle of an iret, where they asked us never to |
| 194 | * deliver interrupts. | ||
| 195 | */ | ||
| 167 | if (cpu->regs->eip >= cpu->lg->noirq_start && | 196 | if (cpu->regs->eip >= cpu->lg->noirq_start && |
| 168 | (cpu->regs->eip < cpu->lg->noirq_end)) | 197 | (cpu->regs->eip < cpu->lg->noirq_end)) |
| 169 | return; | 198 | return; |
| @@ -187,29 +216,37 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) | |||
| 187 | } | 216 | } |
| 188 | } | 217 | } |
| 189 | 218 | ||
| 190 | /* Look at the IDT entry the Guest gave us for this interrupt. The | 219 | /* |
| 220 | * Look at the IDT entry the Guest gave us for this interrupt. The | ||
| 191 | * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip | 221 | * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip |
| 192 | * over them. */ | 222 | * over them. |
| 223 | */ | ||
| 193 | idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; | 224 | idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; |
| 194 | /* If they don't have a handler (yet?), we just ignore it */ | 225 | /* If they don't have a handler (yet?), we just ignore it */ |
| 195 | if (idt_present(idt->a, idt->b)) { | 226 | if (idt_present(idt->a, idt->b)) { |
| 196 | /* OK, mark it no longer pending and deliver it. */ | 227 | /* OK, mark it no longer pending and deliver it. */ |
| 197 | clear_bit(irq, cpu->irqs_pending); | 228 | clear_bit(irq, cpu->irqs_pending); |
| 198 | /* set_guest_interrupt() takes the interrupt descriptor and a | 229 | /* |
| 230 | * set_guest_interrupt() takes the interrupt descriptor and a | ||
| 199 | * flag to say whether this interrupt pushes an error code onto | 231 | * flag to say whether this interrupt pushes an error code onto |
| 200 | * the stack as well: virtual interrupts never do. */ | 232 | * the stack as well: virtual interrupts never do. |
| 233 | */ | ||
| 201 | set_guest_interrupt(cpu, idt->a, idt->b, false); | 234 | set_guest_interrupt(cpu, idt->a, idt->b, false); |
| 202 | } | 235 | } |
| 203 | 236 | ||
| 204 | /* Every time we deliver an interrupt, we update the timestamp in the | 237 | /* |
| 238 | * Every time we deliver an interrupt, we update the timestamp in the | ||
| 205 | * Guest's lguest_data struct. It would be better for the Guest if we | 239 | * Guest's lguest_data struct. It would be better for the Guest if we |
| 206 | * did this more often, but it can actually be quite slow: doing it | 240 | * did this more often, but it can actually be quite slow: doing it |
| 207 | * here is a compromise which means at least it gets updated every | 241 | * here is a compromise which means at least it gets updated every |
| 208 | * timer interrupt. */ | 242 | * timer interrupt. |
| 243 | */ | ||
| 209 | write_timestamp(cpu); | 244 | write_timestamp(cpu); |
| 210 | 245 | ||
| 211 | /* If there are no other interrupts we want to deliver, clear | 246 | /* |
| 212 | * the pending flag. */ | 247 | * If there are no other interrupts we want to deliver, clear |
| 248 | * the pending flag. | ||
| 249 | */ | ||
| 213 | if (!more) | 250 | if (!more) |
| 214 | put_user(0, &cpu->lg->lguest_data->irq_pending); | 251 | put_user(0, &cpu->lg->lguest_data->irq_pending); |
| 215 | } | 252 | } |
| @@ -217,24 +254,29 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) | |||
| 217 | /* And this is the routine when we want to set an interrupt for the Guest. */ | 254 | /* And this is the routine when we want to set an interrupt for the Guest. */ |
| 218 | void set_interrupt(struct lg_cpu *cpu, unsigned int irq) | 255 | void set_interrupt(struct lg_cpu *cpu, unsigned int irq) |
| 219 | { | 256 | { |
| 220 | /* Next time the Guest runs, the core code will see if it can deliver | 257 | /* |
| 221 | * this interrupt. */ | 258 | * Next time the Guest runs, the core code will see if it can deliver |
| 259 | * this interrupt. | ||
| 260 | */ | ||
| 222 | set_bit(irq, cpu->irqs_pending); | 261 | set_bit(irq, cpu->irqs_pending); |
| 223 | 262 | ||
| 224 | /* Make sure it sees it; it might be asleep (eg. halted), or | 263 | /* |
| 225 | * running the Guest right now, in which case kick_process() | 264 | * Make sure it sees it; it might be asleep (eg. halted), or running |
| 226 | * will knock it out. */ | 265 | * the Guest right now, in which case kick_process() will knock it out. |
| 266 | */ | ||
| 227 | if (!wake_up_process(cpu->tsk)) | 267 | if (!wake_up_process(cpu->tsk)) |
| 228 | kick_process(cpu->tsk); | 268 | kick_process(cpu->tsk); |
| 229 | } | 269 | } |
| 230 | /*:*/ | 270 | /*:*/ |
| 231 | 271 | ||
| 232 | /* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent | 272 | /* |
| 273 | * Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent | ||
| 233 | * me a patch, so we support that too. It'd be a big step for lguest if half | 274 | * me a patch, so we support that too. It'd be a big step for lguest if half |
| 234 | * the Plan 9 user base were to start using it. | 275 | * the Plan 9 user base were to start using it. |
| 235 | * | 276 | * |
| 236 | * Actually now I think of it, it's possible that Ron *is* half the Plan 9 | 277 | * Actually now I think of it, it's possible that Ron *is* half the Plan 9 |
| 237 | * userbase. Oh well. */ | 278 | * userbase. Oh well. |
| 279 | */ | ||
| 238 | static bool could_be_syscall(unsigned int num) | 280 | static bool could_be_syscall(unsigned int num) |
| 239 | { | 281 | { |
| 240 | /* Normal Linux SYSCALL_VECTOR or reserved vector? */ | 282 | /* Normal Linux SYSCALL_VECTOR or reserved vector? */ |
| @@ -274,9 +316,11 @@ void free_interrupts(void) | |||
| 274 | clear_bit(syscall_vector, used_vectors); | 316 | clear_bit(syscall_vector, used_vectors); |
| 275 | } | 317 | } |
| 276 | 318 | ||
| 277 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps like | 319 | /*H:220 |
| 320 | * Now we've got the routines to deliver interrupts, delivering traps like | ||
| 278 | * page fault is easy. The only trick is that Intel decided that some traps | 321 | * page fault is easy. The only trick is that Intel decided that some traps |
| 279 | * should have error codes: */ | 322 | * should have error codes: |
| 323 | */ | ||
| 280 | static bool has_err(unsigned int trap) | 324 | static bool has_err(unsigned int trap) |
| 281 | { | 325 | { |
| 282 | return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); | 326 | return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); |
| @@ -285,13 +329,17 @@ static bool has_err(unsigned int trap) | |||
| 285 | /* deliver_trap() returns true if it could deliver the trap. */ | 329 | /* deliver_trap() returns true if it could deliver the trap. */ |
| 286 | bool deliver_trap(struct lg_cpu *cpu, unsigned int num) | 330 | bool deliver_trap(struct lg_cpu *cpu, unsigned int num) |
| 287 | { | 331 | { |
| 288 | /* Trap numbers are always 8 bit, but we set an impossible trap number | 332 | /* |
| 289 | * for traps inside the Switcher, so check that here. */ | 333 | * Trap numbers are always 8 bit, but we set an impossible trap number |
| 334 | * for traps inside the Switcher, so check that here. | ||
| 335 | */ | ||
| 290 | if (num >= ARRAY_SIZE(cpu->arch.idt)) | 336 | if (num >= ARRAY_SIZE(cpu->arch.idt)) |
| 291 | return false; | 337 | return false; |
| 292 | 338 | ||
| 293 | /* Early on the Guest hasn't set the IDT entries (or maybe it put a | 339 | /* |
| 294 | * bogus one in): if we fail here, the Guest will be killed. */ | 340 | * Early on the Guest hasn't set the IDT entries (or maybe it put a |
| 341 | * bogus one in): if we fail here, the Guest will be killed. | ||
| 342 | */ | ||
| 295 | if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) | 343 | if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) |
| 296 | return false; | 344 | return false; |
| 297 | set_guest_interrupt(cpu, cpu->arch.idt[num].a, | 345 | set_guest_interrupt(cpu, cpu->arch.idt[num].a, |
| @@ -299,7 +347,8 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) | |||
| 299 | return true; | 347 | return true; |
| 300 | } | 348 | } |
| 301 | 349 | ||
| 302 | /*H:250 Here's the hard part: returning to the Host every time a trap happens | 350 | /*H:250 |
| 351 | * Here's the hard part: returning to the Host every time a trap happens | ||
| 303 | * and then calling deliver_trap() and re-entering the Guest is slow. | 352 | * and then calling deliver_trap() and re-entering the Guest is slow. |
| 304 | * Particularly because Guest userspace system calls are traps (usually trap | 353 | * Particularly because Guest userspace system calls are traps (usually trap |
| 305 | * 128). | 354 | * 128). |
| @@ -311,69 +360,87 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) | |||
| 311 | * the other hypervisors would beat it up at lunchtime. | 360 | * the other hypervisors would beat it up at lunchtime. |
| 312 | * | 361 | * |
| 313 | * This routine indicates if a particular trap number could be delivered | 362 | * This routine indicates if a particular trap number could be delivered |
| 314 | * directly. */ | 363 | * directly. |
| 364 | */ | ||
| 315 | static bool direct_trap(unsigned int num) | 365 | static bool direct_trap(unsigned int num) |
| 316 | { | 366 | { |
| 317 | /* Hardware interrupts don't go to the Guest at all (except system | 367 | /* |
| 318 | * call). */ | 368 | * Hardware interrupts don't go to the Guest at all (except system |
| 369 | * call). | ||
| 370 | */ | ||
| 319 | if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num)) | 371 | if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num)) |
| 320 | return false; | 372 | return false; |
| 321 | 373 | ||
| 322 | /* The Host needs to see page faults (for shadow paging and to save the | 374 | /* |
| 375 | * The Host needs to see page faults (for shadow paging and to save the | ||
| 323 | * fault address), general protection faults (in/out emulation) and | 376 | * fault address), general protection faults (in/out emulation) and |
| 324 | * device not available (TS handling), invalid opcode fault (kvm hcall), | 377 | * device not available (TS handling), invalid opcode fault (kvm hcall), |
| 325 | * and of course, the hypercall trap. */ | 378 | * and of course, the hypercall trap. |
| 379 | */ | ||
| 326 | return num != 14 && num != 13 && num != 7 && | 380 | return num != 14 && num != 13 && num != 7 && |
| 327 | num != 6 && num != LGUEST_TRAP_ENTRY; | 381 | num != 6 && num != LGUEST_TRAP_ENTRY; |
| 328 | } | 382 | } |
| 329 | /*:*/ | 383 | /*:*/ |
| 330 | 384 | ||
| 331 | /*M:005 The Guest has the ability to turn its interrupt gates into trap gates, | 385 | /*M:005 |
| 386 | * The Guest has the ability to turn its interrupt gates into trap gates, | ||
| 332 | * if it is careful. The Host will let trap gates can go directly to the | 387 | * if it is careful. The Host will let trap gates can go directly to the |
| 333 | * Guest, but the Guest needs the interrupts atomically disabled for an | 388 | * Guest, but the Guest needs the interrupts atomically disabled for an |
| 334 | * interrupt gate. It can do this by pointing the trap gate at instructions | 389 | * interrupt gate. It can do this by pointing the trap gate at instructions |
| 335 | * within noirq_start and noirq_end, where it can safely disable interrupts. */ | 390 | * within noirq_start and noirq_end, where it can safely disable interrupts. |
| 391 | */ | ||
| 336 | 392 | ||
| 337 | /*M:006 The Guests do not use the sysenter (fast system call) instruction, | 393 | /*M:006 |
| 394 | * The Guests do not use the sysenter (fast system call) instruction, | ||
| 338 | * because it's hardcoded to enter privilege level 0 and so can't go direct. | 395 | * because it's hardcoded to enter privilege level 0 and so can't go direct. |
| 339 | * It's about twice as fast as the older "int 0x80" system call, so it might | 396 | * It's about twice as fast as the older "int 0x80" system call, so it might |
| 340 | * still be worthwhile to handle it in the Switcher and lcall down to the | 397 | * still be worthwhile to handle it in the Switcher and lcall down to the |
| 341 | * Guest. The sysenter semantics are hairy tho: search for that keyword in | 398 | * Guest. The sysenter semantics are hairy tho: search for that keyword in |
| 342 | * entry.S :*/ | 399 | * entry.S |
| 400 | :*/ | ||
| 343 | 401 | ||
| 344 | /*H:260 When we make traps go directly into the Guest, we need to make sure | 402 | /*H:260 |
| 403 | * When we make traps go directly into the Guest, we need to make sure | ||
| 345 | * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the | 404 | * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the |
| 346 | * CPU trying to deliver the trap will fault while trying to push the interrupt | 405 | * CPU trying to deliver the trap will fault while trying to push the interrupt |
| 347 | * words on the stack: this is called a double fault, and it forces us to kill | 406 | * words on the stack: this is called a double fault, and it forces us to kill |
| 348 | * the Guest. | 407 | * the Guest. |
| 349 | * | 408 | * |
| 350 | * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ | 409 | * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. |
| 410 | */ | ||
| 351 | void pin_stack_pages(struct lg_cpu *cpu) | 411 | void pin_stack_pages(struct lg_cpu *cpu) |
| 352 | { | 412 | { |
| 353 | unsigned int i; | 413 | unsigned int i; |
| 354 | 414 | ||
| 355 | /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or | 415 | /* |
| 356 | * two pages of stack space. */ | 416 | * Depending on the CONFIG_4KSTACKS option, the Guest can have one or |
| 417 | * two pages of stack space. | ||
| 418 | */ | ||
| 357 | for (i = 0; i < cpu->lg->stack_pages; i++) | 419 | for (i = 0; i < cpu->lg->stack_pages; i++) |
| 358 | /* The stack grows *upwards*, so the address we're given is the | 420 | /* |
| 421 | * The stack grows *upwards*, so the address we're given is the | ||
| 359 | * start of the page after the kernel stack. Subtract one to | 422 | * start of the page after the kernel stack. Subtract one to |
| 360 | * get back onto the first stack page, and keep subtracting to | 423 | * get back onto the first stack page, and keep subtracting to |
| 361 | * get to the rest of the stack pages. */ | 424 | * get to the rest of the stack pages. |
| 425 | */ | ||
| 362 | pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); | 426 | pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); |
| 363 | } | 427 | } |
| 364 | 428 | ||
| 365 | /* Direct traps also mean that we need to know whenever the Guest wants to use | 429 | /* |
| 430 | * Direct traps also mean that we need to know whenever the Guest wants to use | ||
| 366 | * a different kernel stack, so we can change the IDT entries to use that | 431 | * a different kernel stack, so we can change the IDT entries to use that |
| 367 | * stack. The IDT entries expect a virtual address, so unlike most addresses | 432 | * stack. The IDT entries expect a virtual address, so unlike most addresses |
| 368 | * the Guest gives us, the "esp" (stack pointer) value here is virtual, not | 433 | * the Guest gives us, the "esp" (stack pointer) value here is virtual, not |
| 369 | * physical. | 434 | * physical. |
| 370 | * | 435 | * |
| 371 | * In Linux each process has its own kernel stack, so this happens a lot: we | 436 | * In Linux each process has its own kernel stack, so this happens a lot: we |
| 372 | * change stacks on each context switch. */ | 437 | * change stacks on each context switch. |
| 438 | */ | ||
| 373 | void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) | 439 | void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) |
| 374 | { | 440 | { |
| 375 | /* You are not allowed have a stack segment with privilege level 0: bad | 441 | /* |
| 376 | * Guest! */ | 442 | * You're not allowed a stack segment with privilege level 0: bad Guest! |
| 443 | */ | ||
| 377 | if ((seg & 0x3) != GUEST_PL) | 444 | if ((seg & 0x3) != GUEST_PL) |
| 378 | kill_guest(cpu, "bad stack segment %i", seg); | 445 | kill_guest(cpu, "bad stack segment %i", seg); |
| 379 | /* We only expect one or two stack pages. */ | 446 | /* We only expect one or two stack pages. */ |
| @@ -387,11 +454,15 @@ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) | |||
| 387 | pin_stack_pages(cpu); | 454 | pin_stack_pages(cpu); |
| 388 | } | 455 | } |
| 389 | 456 | ||
| 390 | /* All this reference to mapping stacks leads us neatly into the other complex | 457 | /* |
| 391 | * part of the Host: page table handling. */ | 458 | * All this reference to mapping stacks leads us neatly into the other complex |
| 459 | * part of the Host: page table handling. | ||
| 460 | */ | ||
| 392 | 461 | ||
| 393 | /*H:235 This is the routine which actually checks the Guest's IDT entry and | 462 | /*H:235 |
| 394 | * transfers it into the entry in "struct lguest": */ | 463 | * This is the routine which actually checks the Guest's IDT entry and |
| 464 | * transfers it into the entry in "struct lguest": | ||
| 465 | */ | ||
| 395 | static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, | 466 | static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, |
| 396 | unsigned int num, u32 lo, u32 hi) | 467 | unsigned int num, u32 lo, u32 hi) |
| 397 | { | 468 | { |
| @@ -407,30 +478,38 @@ static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, | |||
| 407 | if (type != 0xE && type != 0xF) | 478 | if (type != 0xE && type != 0xF) |
| 408 | kill_guest(cpu, "bad IDT type %i", type); | 479 | kill_guest(cpu, "bad IDT type %i", type); |
| 409 | 480 | ||
| 410 | /* We only copy the handler address, present bit, privilege level and | 481 | /* |
| 482 | * We only copy the handler address, present bit, privilege level and | ||
| 411 | * type. The privilege level controls where the trap can be triggered | 483 | * type. The privilege level controls where the trap can be triggered |
| 412 | * manually with an "int" instruction. This is usually GUEST_PL, | 484 | * manually with an "int" instruction. This is usually GUEST_PL, |
| 413 | * except for system calls which userspace can use. */ | 485 | * except for system calls which userspace can use. |
| 486 | */ | ||
| 414 | trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); | 487 | trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); |
| 415 | trap->b = (hi&0xFFFFEF00); | 488 | trap->b = (hi&0xFFFFEF00); |
| 416 | } | 489 | } |
| 417 | 490 | ||
| 418 | /*H:230 While we're here, dealing with delivering traps and interrupts to the | 491 | /*H:230 |
| 492 | * While we're here, dealing with delivering traps and interrupts to the | ||
| 419 | * Guest, we might as well complete the picture: how the Guest tells us where | 493 | * Guest, we might as well complete the picture: how the Guest tells us where |
| 420 | * it wants them to go. This would be simple, except making traps fast | 494 | * it wants them to go. This would be simple, except making traps fast |
| 421 | * requires some tricks. | 495 | * requires some tricks. |
| 422 | * | 496 | * |
| 423 | * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the | 497 | * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the |
| 424 | * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ | 498 | * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. |
| 499 | */ | ||
| 425 | void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) | 500 | void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) |
| 426 | { | 501 | { |
| 427 | /* Guest never handles: NMI, doublefault, spurious interrupt or | 502 | /* |
| 428 | * hypercall. We ignore when it tries to set them. */ | 503 | * Guest never handles: NMI, doublefault, spurious interrupt or |
| 504 | * hypercall. We ignore when it tries to set them. | ||
| 505 | */ | ||
| 429 | if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) | 506 | if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) |
| 430 | return; | 507 | return; |
| 431 | 508 | ||
| 432 | /* Mark the IDT as changed: next time the Guest runs we'll know we have | 509 | /* |
| 433 | * to copy this again. */ | 510 | * Mark the IDT as changed: next time the Guest runs we'll know we have |
| 511 | * to copy this again. | ||
| 512 | */ | ||
| 434 | cpu->changed |= CHANGED_IDT; | 513 | cpu->changed |= CHANGED_IDT; |
| 435 | 514 | ||
| 436 | /* Check that the Guest doesn't try to step outside the bounds. */ | 515 | /* Check that the Guest doesn't try to step outside the bounds. */ |
| @@ -440,9 +519,11 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) | |||
| 440 | set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); | 519 | set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); |
| 441 | } | 520 | } |
| 442 | 521 | ||
| 443 | /* The default entry for each interrupt points into the Switcher routines which | 522 | /* |
| 523 | * The default entry for each interrupt points into the Switcher routines which | ||
| 444 | * simply return to the Host. The run_guest() loop will then call | 524 | * simply return to the Host. The run_guest() loop will then call |
| 445 | * deliver_trap() to bounce it back into the Guest. */ | 525 | * deliver_trap() to bounce it back into the Guest. |
| 526 | */ | ||
| 446 | static void default_idt_entry(struct desc_struct *idt, | 527 | static void default_idt_entry(struct desc_struct *idt, |
| 447 | int trap, | 528 | int trap, |
| 448 | const unsigned long handler, | 529 | const unsigned long handler, |
| @@ -451,13 +532,17 @@ static void default_idt_entry(struct desc_struct *idt, | |||
| 451 | /* A present interrupt gate. */ | 532 | /* A present interrupt gate. */ |
| 452 | u32 flags = 0x8e00; | 533 | u32 flags = 0x8e00; |
| 453 | 534 | ||
| 454 | /* Set the privilege level on the entry for the hypercall: this allows | 535 | /* |
| 455 | * the Guest to use the "int" instruction to trigger it. */ | 536 | * Set the privilege level on the entry for the hypercall: this allows |
| 537 | * the Guest to use the "int" instruction to trigger it. | ||
| 538 | */ | ||
| 456 | if (trap == LGUEST_TRAP_ENTRY) | 539 | if (trap == LGUEST_TRAP_ENTRY) |
| 457 | flags |= (GUEST_PL << 13); | 540 | flags |= (GUEST_PL << 13); |
| 458 | else if (base) | 541 | else if (base) |
| 459 | /* Copy priv. level from what Guest asked for. This allows | 542 | /* |
| 460 | * debug (int 3) traps from Guest userspace, for example. */ | 543 | * Copy privilege level from what Guest asked for. This allows |
| 544 | * debug (int 3) traps from Guest userspace, for example. | ||
| 545 | */ | ||
| 461 | flags |= (base->b & 0x6000); | 546 | flags |= (base->b & 0x6000); |
| 462 | 547 | ||
| 463 | /* Now pack it into the IDT entry in its weird format. */ | 548 | /* Now pack it into the IDT entry in its weird format. */ |
| @@ -475,16 +560,20 @@ void setup_default_idt_entries(struct lguest_ro_state *state, | |||
| 475 | default_idt_entry(&state->guest_idt[i], i, def[i], NULL); | 560 | default_idt_entry(&state->guest_idt[i], i, def[i], NULL); |
| 476 | } | 561 | } |
| 477 | 562 | ||
| 478 | /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead | 563 | /*H:240 |
| 564 | * We don't use the IDT entries in the "struct lguest" directly, instead | ||
| 479 | * we copy them into the IDT which we've set up for Guests on this CPU, just | 565 | * we copy them into the IDT which we've set up for Guests on this CPU, just |
| 480 | * before we run the Guest. This routine does that copy. */ | 566 | * before we run the Guest. This routine does that copy. |
| 567 | */ | ||
| 481 | void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | 568 | void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, |
| 482 | const unsigned long *def) | 569 | const unsigned long *def) |
| 483 | { | 570 | { |
| 484 | unsigned int i; | 571 | unsigned int i; |
| 485 | 572 | ||
| 486 | /* We can simply copy the direct traps, otherwise we use the default | 573 | /* |
| 487 | * ones in the Switcher: they will return to the Host. */ | 574 | * We can simply copy the direct traps, otherwise we use the default |
| 575 | * ones in the Switcher: they will return to the Host. | ||
| 576 | */ | ||
| 488 | for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { | 577 | for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { |
| 489 | const struct desc_struct *gidt = &cpu->arch.idt[i]; | 578 | const struct desc_struct *gidt = &cpu->arch.idt[i]; |
| 490 | 579 | ||
| @@ -492,14 +581,16 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | |||
| 492 | if (!direct_trap(i)) | 581 | if (!direct_trap(i)) |
| 493 | continue; | 582 | continue; |
| 494 | 583 | ||
| 495 | /* Only trap gates (type 15) can go direct to the Guest. | 584 | /* |
| 585 | * Only trap gates (type 15) can go direct to the Guest. | ||
| 496 | * Interrupt gates (type 14) disable interrupts as they are | 586 | * Interrupt gates (type 14) disable interrupts as they are |
| 497 | * entered, which we never let the Guest do. Not present | 587 | * entered, which we never let the Guest do. Not present |
| 498 | * entries (type 0x0) also can't go direct, of course. | 588 | * entries (type 0x0) also can't go direct, of course. |
| 499 | * | 589 | * |
| 500 | * If it can't go direct, we still need to copy the priv. level: | 590 | * If it can't go direct, we still need to copy the priv. level: |
| 501 | * they might want to give userspace access to a software | 591 | * they might want to give userspace access to a software |
| 502 | * interrupt. */ | 592 | * interrupt. |
| 593 | */ | ||
| 503 | if (idt_type(gidt->a, gidt->b) == 0xF) | 594 | if (idt_type(gidt->a, gidt->b) == 0xF) |
| 504 | idt[i] = *gidt; | 595 | idt[i] = *gidt; |
| 505 | else | 596 | else |
| @@ -518,7 +609,8 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | |||
| 518 | * the next timer interrupt (in nanoseconds). We use the high-resolution timer | 609 | * the next timer interrupt (in nanoseconds). We use the high-resolution timer |
| 519 | * infrastructure to set a callback at that time. | 610 | * infrastructure to set a callback at that time. |
| 520 | * | 611 | * |
| 521 | * 0 means "turn off the clock". */ | 612 | * 0 means "turn off the clock". |
| 613 | */ | ||
| 522 | void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) | 614 | void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) |
| 523 | { | 615 | { |
| 524 | ktime_t expires; | 616 | ktime_t expires; |
| @@ -529,9 +621,11 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) | |||
| 529 | return; | 621 | return; |
| 530 | } | 622 | } |
| 531 | 623 | ||
| 532 | /* We use wallclock time here, so the Guest might not be running for | 624 | /* |
| 625 | * We use wallclock time here, so the Guest might not be running for | ||
| 533 | * all the time between now and the timer interrupt it asked for. This | 626 | * all the time between now and the timer interrupt it asked for. This |
| 534 | * is almost always the right thing to do. */ | 627 | * is almost always the right thing to do. |
| 628 | */ | ||
| 535 | expires = ktime_add_ns(ktime_get_real(), delta); | 629 | expires = ktime_add_ns(ktime_get_real(), delta); |
| 536 | hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); | 630 | hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); |
| 537 | } | 631 | } |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 01c591923793..bc28745d05af 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
| @@ -16,15 +16,13 @@ | |||
| 16 | void free_pagetables(void); | 16 | void free_pagetables(void); |
| 17 | int init_pagetables(struct page **switcher_page, unsigned int pages); | 17 | int init_pagetables(struct page **switcher_page, unsigned int pages); |
| 18 | 18 | ||
| 19 | struct pgdir | 19 | struct pgdir { |
| 20 | { | ||
| 21 | unsigned long gpgdir; | 20 | unsigned long gpgdir; |
| 22 | pgd_t *pgdir; | 21 | pgd_t *pgdir; |
| 23 | }; | 22 | }; |
| 24 | 23 | ||
| 25 | /* We have two pages shared with guests, per cpu. */ | 24 | /* We have two pages shared with guests, per cpu. */ |
| 26 | struct lguest_pages | 25 | struct lguest_pages { |
| 27 | { | ||
| 28 | /* This is the stack page mapped rw in guest */ | 26 | /* This is the stack page mapped rw in guest */ |
| 29 | char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; | 27 | char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; |
| 30 | struct lguest_regs regs; | 28 | struct lguest_regs regs; |
| @@ -54,13 +52,13 @@ struct lg_cpu { | |||
| 54 | 52 | ||
| 55 | unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ | 53 | unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ |
| 56 | 54 | ||
| 57 | /* At end of a page shared mapped over lguest_pages in guest. */ | 55 | /* At end of a page shared mapped over lguest_pages in guest. */ |
| 58 | unsigned long regs_page; | 56 | unsigned long regs_page; |
| 59 | struct lguest_regs *regs; | 57 | struct lguest_regs *regs; |
| 60 | 58 | ||
| 61 | struct lguest_pages *last_pages; | 59 | struct lguest_pages *last_pages; |
| 62 | 60 | ||
| 63 | int cpu_pgd; /* which pgd this cpu is currently using */ | 61 | int cpu_pgd; /* Which pgd this cpu is currently using */ |
| 64 | 62 | ||
| 65 | /* If a hypercall was asked for, this points to the arguments. */ | 63 | /* If a hypercall was asked for, this points to the arguments. */ |
| 66 | struct hcall_args *hcall; | 64 | struct hcall_args *hcall; |
| @@ -89,15 +87,17 @@ struct lg_eventfd_map { | |||
| 89 | }; | 87 | }; |
| 90 | 88 | ||
| 91 | /* The private info the thread maintains about the guest. */ | 89 | /* The private info the thread maintains about the guest. */ |
| 92 | struct lguest | 90 | struct lguest { |
| 93 | { | ||
| 94 | struct lguest_data __user *lguest_data; | 91 | struct lguest_data __user *lguest_data; |
| 95 | struct lg_cpu cpus[NR_CPUS]; | 92 | struct lg_cpu cpus[NR_CPUS]; |
| 96 | unsigned int nr_cpus; | 93 | unsigned int nr_cpus; |
| 97 | 94 | ||
| 98 | u32 pfn_limit; | 95 | u32 pfn_limit; |
| 99 | /* This provides the offset to the base of guest-physical | 96 | |
| 100 | * memory in the Launcher. */ | 97 | /* |
| 98 | * This provides the offset to the base of guest-physical memory in the | ||
| 99 | * Launcher. | ||
| 100 | */ | ||
| 101 | void __user *mem_base; | 101 | void __user *mem_base; |
| 102 | unsigned long kernel_address; | 102 | unsigned long kernel_address; |
| 103 | 103 | ||
| @@ -122,11 +122,13 @@ bool lguest_address_ok(const struct lguest *lg, | |||
| 122 | void __lgread(struct lg_cpu *, void *, unsigned long, unsigned); | 122 | void __lgread(struct lg_cpu *, void *, unsigned long, unsigned); |
| 123 | void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); | 123 | void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); |
| 124 | 124 | ||
| 125 | /*H:035 Using memory-copy operations like that is usually inconvient, so we | 125 | /*H:035 |
| 126 | * Using memory-copy operations like that is usually inconvient, so we | ||
| 126 | * have the following helper macros which read and write a specific type (often | 127 | * have the following helper macros which read and write a specific type (often |
| 127 | * an unsigned long). | 128 | * an unsigned long). |
| 128 | * | 129 | * |
| 129 | * This reads into a variable of the given type then returns that. */ | 130 | * This reads into a variable of the given type then returns that. |
| 131 | */ | ||
| 130 | #define lgread(cpu, addr, type) \ | 132 | #define lgread(cpu, addr, type) \ |
| 131 | ({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; }) | 133 | ({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; }) |
| 132 | 134 | ||
| @@ -140,9 +142,11 @@ void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); | |||
| 140 | 142 | ||
| 141 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user); | 143 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user); |
| 142 | 144 | ||
| 143 | /* Helper macros to obtain the first 12 or the last 20 bits, this is only the | 145 | /* |
| 146 | * Helper macros to obtain the first 12 or the last 20 bits, this is only the | ||
| 144 | * first step in the migration to the kernel types. pte_pfn is already defined | 147 | * first step in the migration to the kernel types. pte_pfn is already defined |
| 145 | * in the kernel. */ | 148 | * in the kernel. |
| 149 | */ | ||
| 146 | #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) | 150 | #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) |
| 147 | #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) | 151 | #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) |
| 148 | #define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK) | 152 | #define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK) |
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index e082cdac88b4..b6200bc39b58 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c | |||
| @@ -1,10 +1,12 @@ | |||
| 1 | /*P:050 Lguest guests use a very simple method to describe devices. It's a | 1 | /*P:050 |
| 2 | * Lguest guests use a very simple method to describe devices. It's a | ||
| 2 | * series of device descriptors contained just above the top of normal Guest | 3 | * series of device descriptors contained just above the top of normal Guest |
| 3 | * memory. | 4 | * memory. |
| 4 | * | 5 | * |
| 5 | * We use the standard "virtio" device infrastructure, which provides us with a | 6 | * We use the standard "virtio" device infrastructure, which provides us with a |
| 6 | * console, a network and a block driver. Each one expects some configuration | 7 | * console, a network and a block driver. Each one expects some configuration |
| 7 | * information and a "virtqueue" or two to send and receive data. :*/ | 8 | * information and a "virtqueue" or two to send and receive data. |
| 9 | :*/ | ||
| 8 | #include <linux/init.h> | 10 | #include <linux/init.h> |
| 9 | #include <linux/bootmem.h> | 11 | #include <linux/bootmem.h> |
| 10 | #include <linux/lguest_launcher.h> | 12 | #include <linux/lguest_launcher.h> |
| @@ -20,8 +22,10 @@ | |||
| 20 | /* The pointer to our (page) of device descriptions. */ | 22 | /* The pointer to our (page) of device descriptions. */ |
| 21 | static void *lguest_devices; | 23 | static void *lguest_devices; |
| 22 | 24 | ||
| 23 | /* For Guests, device memory can be used as normal memory, so we cast away the | 25 | /* |
| 24 | * __iomem to quieten sparse. */ | 26 | * For Guests, device memory can be used as normal memory, so we cast away the |
| 27 | * __iomem to quieten sparse. | ||
| 28 | */ | ||
| 25 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) | 29 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) |
| 26 | { | 30 | { |
| 27 | return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages); | 31 | return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages); |
| @@ -32,8 +36,10 @@ static inline void lguest_unmap(void *addr) | |||
| 32 | iounmap((__force void __iomem *)addr); | 36 | iounmap((__force void __iomem *)addr); |
| 33 | } | 37 | } |
| 34 | 38 | ||
| 35 | /*D:100 Each lguest device is just a virtio device plus a pointer to its entry | 39 | /*D:100 |
| 36 | * in the lguest_devices page. */ | 40 | * Each lguest device is just a virtio device plus a pointer to its entry |
| 41 | * in the lguest_devices page. | ||
| 42 | */ | ||
| 37 | struct lguest_device { | 43 | struct lguest_device { |
| 38 | struct virtio_device vdev; | 44 | struct virtio_device vdev; |
| 39 | 45 | ||
| @@ -41,9 +47,11 @@ struct lguest_device { | |||
| 41 | struct lguest_device_desc *desc; | 47 | struct lguest_device_desc *desc; |
| 42 | }; | 48 | }; |
| 43 | 49 | ||
| 44 | /* Since the virtio infrastructure hands us a pointer to the virtio_device all | 50 | /* |
| 51 | * Since the virtio infrastructure hands us a pointer to the virtio_device all | ||
| 45 | * the time, it helps to have a curt macro to get a pointer to the struct | 52 | * the time, it helps to have a curt macro to get a pointer to the struct |
| 46 | * lguest_device it's enclosed in. */ | 53 | * lguest_device it's enclosed in. |
| 54 | */ | ||
| 47 | #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev) | 55 | #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev) |
| 48 | 56 | ||
| 49 | /*D:130 | 57 | /*D:130 |
| @@ -55,7 +63,8 @@ struct lguest_device { | |||
| 55 | * the driver will look at them during setup. | 63 | * the driver will look at them during setup. |
| 56 | * | 64 | * |
| 57 | * A convenient routine to return the device's virtqueue config array: | 65 | * A convenient routine to return the device's virtqueue config array: |
| 58 | * immediately after the descriptor. */ | 66 | * immediately after the descriptor. |
| 67 | */ | ||
| 59 | static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) | 68 | static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) |
| 60 | { | 69 | { |
| 61 | return (void *)(desc + 1); | 70 | return (void *)(desc + 1); |
| @@ -98,10 +107,12 @@ static u32 lg_get_features(struct virtio_device *vdev) | |||
| 98 | return features; | 107 | return features; |
| 99 | } | 108 | } |
| 100 | 109 | ||
| 101 | /* The virtio core takes the features the Host offers, and copies the | 110 | /* |
| 102 | * ones supported by the driver into the vdev->features array. Once | 111 | * The virtio core takes the features the Host offers, and copies the ones |
| 103 | * that's all sorted out, this routine is called so we can tell the | 112 | * supported by the driver into the vdev->features array. Once that's all |
| 104 | * Host which features we understand and accept. */ | 113 | * sorted out, this routine is called so we can tell the Host which features we |
| 114 | * understand and accept. | ||
| 115 | */ | ||
| 105 | static void lg_finalize_features(struct virtio_device *vdev) | 116 | static void lg_finalize_features(struct virtio_device *vdev) |
| 106 | { | 117 | { |
| 107 | unsigned int i, bits; | 118 | unsigned int i, bits; |
| @@ -112,10 +123,11 @@ static void lg_finalize_features(struct virtio_device *vdev) | |||
| 112 | /* Give virtio_ring a chance to accept features. */ | 123 | /* Give virtio_ring a chance to accept features. */ |
| 113 | vring_transport_features(vdev); | 124 | vring_transport_features(vdev); |
| 114 | 125 | ||
| 115 | /* The vdev->feature array is a Linux bitmask: this isn't the | 126 | /* |
| 116 | * same as a the simple array of bits used by lguest devices | 127 | * The vdev->feature array is a Linux bitmask: this isn't the same as a |
| 117 | * for features. So we do this slow, manual conversion which is | 128 | * the simple array of bits used by lguest devices for features. So we |
| 118 | * completely general. */ | 129 | * do this slow, manual conversion which is completely general. |
| 130 | */ | ||
| 119 | memset(out_features, 0, desc->feature_len); | 131 | memset(out_features, 0, desc->feature_len); |
| 120 | bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; | 132 | bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; |
| 121 | for (i = 0; i < bits; i++) { | 133 | for (i = 0; i < bits; i++) { |
| @@ -146,15 +158,19 @@ static void lg_set(struct virtio_device *vdev, unsigned int offset, | |||
| 146 | memcpy(lg_config(desc) + offset, buf, len); | 158 | memcpy(lg_config(desc) + offset, buf, len); |
| 147 | } | 159 | } |
| 148 | 160 | ||
| 149 | /* The operations to get and set the status word just access the status field | 161 | /* |
| 150 | * of the device descriptor. */ | 162 | * The operations to get and set the status word just access the status field |
| 163 | * of the device descriptor. | ||
| 164 | */ | ||
| 151 | static u8 lg_get_status(struct virtio_device *vdev) | 165 | static u8 lg_get_status(struct virtio_device *vdev) |
| 152 | { | 166 | { |
| 153 | return to_lgdev(vdev)->desc->status; | 167 | return to_lgdev(vdev)->desc->status; |
| 154 | } | 168 | } |
| 155 | 169 | ||
| 156 | /* To notify on status updates, we (ab)use the NOTIFY hypercall, with the | 170 | /* |
| 157 | * descriptor address of the device. A zero status means "reset". */ | 171 | * To notify on status updates, we (ab)use the NOTIFY hypercall, with the |
| 172 | * descriptor address of the device. A zero status means "reset". | ||
| 173 | */ | ||
| 158 | static void set_status(struct virtio_device *vdev, u8 status) | 174 | static void set_status(struct virtio_device *vdev, u8 status) |
| 159 | { | 175 | { |
| 160 | unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; | 176 | unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; |
| @@ -191,8 +207,7 @@ static void lg_reset(struct virtio_device *vdev) | |||
| 191 | */ | 207 | */ |
| 192 | 208 | ||
| 193 | /*D:140 This is the information we remember about each virtqueue. */ | 209 | /*D:140 This is the information we remember about each virtqueue. */ |
| 194 | struct lguest_vq_info | 210 | struct lguest_vq_info { |
| 195 | { | ||
| 196 | /* A copy of the information contained in the device config. */ | 211 | /* A copy of the information contained in the device config. */ |
| 197 | struct lguest_vqconfig config; | 212 | struct lguest_vqconfig config; |
| 198 | 213 | ||
| @@ -200,13 +215,17 @@ struct lguest_vq_info | |||
| 200 | void *pages; | 215 | void *pages; |
| 201 | }; | 216 | }; |
| 202 | 217 | ||
| 203 | /* When the virtio_ring code wants to prod the Host, it calls us here and we | 218 | /* |
| 219 | * When the virtio_ring code wants to prod the Host, it calls us here and we | ||
| 204 | * make a hypercall. We hand the physical address of the virtqueue so the Host | 220 | * make a hypercall. We hand the physical address of the virtqueue so the Host |
| 205 | * knows which virtqueue we're talking about. */ | 221 | * knows which virtqueue we're talking about. |
| 222 | */ | ||
| 206 | static void lg_notify(struct virtqueue *vq) | 223 | static void lg_notify(struct virtqueue *vq) |
| 207 | { | 224 | { |
| 208 | /* We store our virtqueue information in the "priv" pointer of the | 225 | /* |
| 209 | * virtqueue structure. */ | 226 | * We store our virtqueue information in the "priv" pointer of the |
| 227 | * virtqueue structure. | ||
| 228 | */ | ||
| 210 | struct lguest_vq_info *lvq = vq->priv; | 229 | struct lguest_vq_info *lvq = vq->priv; |
| 211 | 230 | ||
| 212 | kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT); | 231 | kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT); |
| @@ -215,7 +234,8 @@ static void lg_notify(struct virtqueue *vq) | |||
| 215 | /* An extern declaration inside a C file is bad form. Don't do it. */ | 234 | /* An extern declaration inside a C file is bad form. Don't do it. */ |
| 216 | extern void lguest_setup_irq(unsigned int irq); | 235 | extern void lguest_setup_irq(unsigned int irq); |
| 217 | 236 | ||
| 218 | /* This routine finds the first virtqueue described in the configuration of | 237 | /* |
| 238 | * This routine finds the Nth virtqueue described in the configuration of | ||
| 219 | * this device and sets it up. | 239 | * this device and sets it up. |
| 220 | * | 240 | * |
| 221 | * This is kind of an ugly duckling. It'd be nicer to have a standard | 241 | * This is kind of an ugly duckling. It'd be nicer to have a standard |
| @@ -223,9 +243,7 @@ extern void lguest_setup_irq(unsigned int irq); | |||
| 223 | * everyone wants to do it differently. The KVM coders want the Guest to | 243 | * everyone wants to do it differently. The KVM coders want the Guest to |
| 224 | * allocate its own pages and tell the Host where they are, but for lguest it's | 244 | * allocate its own pages and tell the Host where they are, but for lguest it's |
| 225 | * simpler for the Host to simply tell us where the pages are. | 245 | * simpler for the Host to simply tell us where the pages are. |
| 226 | * | 246 | */ |
| 227 | * So we provide drivers with a "find the Nth virtqueue and set it up" | ||
| 228 | * function. */ | ||
| 229 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | 247 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, |
| 230 | unsigned index, | 248 | unsigned index, |
| 231 | void (*callback)(struct virtqueue *vq), | 249 | void (*callback)(struct virtqueue *vq), |
| @@ -244,9 +262,11 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
| 244 | if (!lvq) | 262 | if (!lvq) |
| 245 | return ERR_PTR(-ENOMEM); | 263 | return ERR_PTR(-ENOMEM); |
| 246 | 264 | ||
| 247 | /* Make a copy of the "struct lguest_vqconfig" entry, which sits after | 265 | /* |
| 266 | * Make a copy of the "struct lguest_vqconfig" entry, which sits after | ||
| 248 | * the descriptor. We need a copy because the config space might not | 267 | * the descriptor. We need a copy because the config space might not |
| 249 | * be aligned correctly. */ | 268 | * be aligned correctly. |
| 269 | */ | ||
| 250 | memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); | 270 | memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); |
| 251 | 271 | ||
| 252 | printk("Mapping virtqueue %i addr %lx\n", index, | 272 | printk("Mapping virtqueue %i addr %lx\n", index, |
| @@ -261,8 +281,10 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
| 261 | goto free_lvq; | 281 | goto free_lvq; |
| 262 | } | 282 | } |
| 263 | 283 | ||
| 264 | /* OK, tell virtio_ring.c to set up a virtqueue now we know its size | 284 | /* |
| 265 | * and we've got a pointer to its pages. */ | 285 | * OK, tell virtio_ring.c to set up a virtqueue now we know its size |
| 286 | * and we've got a pointer to its pages. | ||
| 287 | */ | ||
| 266 | vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, | 288 | vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, |
| 267 | vdev, lvq->pages, lg_notify, callback, name); | 289 | vdev, lvq->pages, lg_notify, callback, name); |
| 268 | if (!vq) { | 290 | if (!vq) { |
| @@ -273,18 +295,23 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
| 273 | /* Make sure the interrupt is allocated. */ | 295 | /* Make sure the interrupt is allocated. */ |
| 274 | lguest_setup_irq(lvq->config.irq); | 296 | lguest_setup_irq(lvq->config.irq); |
| 275 | 297 | ||
| 276 | /* Tell the interrupt for this virtqueue to go to the virtio_ring | 298 | /* |
| 277 | * interrupt handler. */ | 299 | * Tell the interrupt for this virtqueue to go to the virtio_ring |
| 278 | /* FIXME: We used to have a flag for the Host to tell us we could use | 300 | * interrupt handler. |
| 301 | * | ||
| 302 | * FIXME: We used to have a flag for the Host to tell us we could use | ||
| 279 | * the interrupt as a source of randomness: it'd be nice to have that | 303 | * the interrupt as a source of randomness: it'd be nice to have that |
| 280 | * back.. */ | 304 | * back. |
| 305 | */ | ||
| 281 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, | 306 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, |
| 282 | dev_name(&vdev->dev), vq); | 307 | dev_name(&vdev->dev), vq); |
| 283 | if (err) | 308 | if (err) |
| 284 | goto destroy_vring; | 309 | goto destroy_vring; |
| 285 | 310 | ||
| 286 | /* Last of all we hook up our 'struct lguest_vq_info" to the | 311 | /* |
| 287 | * virtqueue's priv pointer. */ | 312 | * Last of all we hook up our 'struct lguest_vq_info" to the |
| 313 | * virtqueue's priv pointer. | ||
| 314 | */ | ||
| 288 | vq->priv = lvq; | 315 | vq->priv = lvq; |
| 289 | return vq; | 316 | return vq; |
| 290 | 317 | ||
| @@ -358,11 +385,14 @@ static struct virtio_config_ops lguest_config_ops = { | |||
| 358 | .del_vqs = lg_del_vqs, | 385 | .del_vqs = lg_del_vqs, |
| 359 | }; | 386 | }; |
| 360 | 387 | ||
| 361 | /* The root device for the lguest virtio devices. This makes them appear as | 388 | /* |
| 362 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */ | 389 | * The root device for the lguest virtio devices. This makes them appear as |
| 390 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. | ||
| 391 | */ | ||
| 363 | static struct device *lguest_root; | 392 | static struct device *lguest_root; |
| 364 | 393 | ||
| 365 | /*D:120 This is the core of the lguest bus: actually adding a new device. | 394 | /*D:120 |
| 395 | * This is the core of the lguest bus: actually adding a new device. | ||
| 366 | * It's a separate function because it's neater that way, and because an | 396 | * It's a separate function because it's neater that way, and because an |
| 367 | * earlier version of the code supported hotplug and unplug. They were removed | 397 | * earlier version of the code supported hotplug and unplug. They were removed |
| 368 | * early on because they were never used. | 398 | * early on because they were never used. |
| @@ -371,14 +401,14 @@ static struct device *lguest_root; | |||
| 371 | * | 401 | * |
| 372 | * It's worth reading this carefully: we start with a pointer to the new device | 402 | * It's worth reading this carefully: we start with a pointer to the new device |
| 373 | * descriptor in the "lguest_devices" page, and the offset into the device | 403 | * descriptor in the "lguest_devices" page, and the offset into the device |
| 374 | * descriptor page so we can uniquely identify it if things go badly wrong. */ | 404 | * descriptor page so we can uniquely identify it if things go badly wrong. |
| 405 | */ | ||
| 375 | static void add_lguest_device(struct lguest_device_desc *d, | 406 | static void add_lguest_device(struct lguest_device_desc *d, |
| 376 | unsigned int offset) | 407 | unsigned int offset) |
| 377 | { | 408 | { |
| 378 | struct lguest_device *ldev; | 409 | struct lguest_device *ldev; |
| 379 | 410 | ||
| 380 | /* Start with zeroed memory; Linux's device layer seems to count on | 411 | /* Start with zeroed memory; Linux's device layer counts on it. */ |
| 381 | * it. */ | ||
| 382 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); | 412 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); |
| 383 | if (!ldev) { | 413 | if (!ldev) { |
| 384 | printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n", | 414 | printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n", |
| @@ -388,17 +418,25 @@ static void add_lguest_device(struct lguest_device_desc *d, | |||
| 388 | 418 | ||
| 389 | /* This devices' parent is the lguest/ dir. */ | 419 | /* This devices' parent is the lguest/ dir. */ |
| 390 | ldev->vdev.dev.parent = lguest_root; | 420 | ldev->vdev.dev.parent = lguest_root; |
| 391 | /* We have a unique device index thanks to the dev_index counter. */ | 421 | /* |
| 422 | * The device type comes straight from the descriptor. There's also a | ||
| 423 | * device vendor field in the virtio_device struct, which we leave as | ||
| 424 | * 0. | ||
| 425 | */ | ||
| 392 | ldev->vdev.id.device = d->type; | 426 | ldev->vdev.id.device = d->type; |
| 393 | /* We have a simple set of routines for querying the device's | 427 | /* |
| 394 | * configuration information and setting its status. */ | 428 | * We have a simple set of routines for querying the device's |
| 429 | * configuration information and setting its status. | ||
| 430 | */ | ||
| 395 | ldev->vdev.config = &lguest_config_ops; | 431 | ldev->vdev.config = &lguest_config_ops; |
| 396 | /* And we remember the device's descriptor for lguest_config_ops. */ | 432 | /* And we remember the device's descriptor for lguest_config_ops. */ |
| 397 | ldev->desc = d; | 433 | ldev->desc = d; |
| 398 | 434 | ||
| 399 | /* register_virtio_device() sets up the generic fields for the struct | 435 | /* |
| 436 | * register_virtio_device() sets up the generic fields for the struct | ||
| 400 | * virtio_device and calls device_register(). This makes the bus | 437 | * virtio_device and calls device_register(). This makes the bus |
| 401 | * infrastructure look for a matching driver. */ | 438 | * infrastructure look for a matching driver. |
| 439 | */ | ||
| 402 | if (register_virtio_device(&ldev->vdev) != 0) { | 440 | if (register_virtio_device(&ldev->vdev) != 0) { |
| 403 | printk(KERN_ERR "Failed to register lguest dev %u type %u\n", | 441 | printk(KERN_ERR "Failed to register lguest dev %u type %u\n", |
| 404 | offset, d->type); | 442 | offset, d->type); |
| @@ -406,8 +444,10 @@ static void add_lguest_device(struct lguest_device_desc *d, | |||
| 406 | } | 444 | } |
| 407 | } | 445 | } |
| 408 | 446 | ||
| 409 | /*D:110 scan_devices() simply iterates through the device page. The type 0 is | 447 | /*D:110 |
| 410 | * reserved to mean "end of devices". */ | 448 | * scan_devices() simply iterates through the device page. The type 0 is |
| 449 | * reserved to mean "end of devices". | ||
| 450 | */ | ||
| 411 | static void scan_devices(void) | 451 | static void scan_devices(void) |
| 412 | { | 452 | { |
| 413 | unsigned int i; | 453 | unsigned int i; |
| @@ -426,7 +466,8 @@ static void scan_devices(void) | |||
| 426 | } | 466 | } |
| 427 | } | 467 | } |
| 428 | 468 | ||
| 429 | /*D:105 Fairly early in boot, lguest_devices_init() is called to set up the | 469 | /*D:105 |
| 470 | * Fairly early in boot, lguest_devices_init() is called to set up the | ||
| 430 | * lguest device infrastructure. We check that we are a Guest by checking | 471 | * lguest device infrastructure. We check that we are a Guest by checking |
| 431 | * pv_info.name: there are other ways of checking, but this seems most | 472 | * pv_info.name: there are other ways of checking, but this seems most |
| 432 | * obvious to me. | 473 | * obvious to me. |
| @@ -437,7 +478,8 @@ static void scan_devices(void) | |||
| 437 | * correct sysfs incantation). | 478 | * correct sysfs incantation). |
| 438 | * | 479 | * |
| 439 | * Finally we call scan_devices() which adds all the devices found in the | 480 | * Finally we call scan_devices() which adds all the devices found in the |
| 440 | * lguest_devices page. */ | 481 | * lguest_devices page. |
| 482 | */ | ||
| 441 | static int __init lguest_devices_init(void) | 483 | static int __init lguest_devices_init(void) |
| 442 | { | 484 | { |
| 443 | if (strcmp(pv_info.name, "lguest") != 0) | 485 | if (strcmp(pv_info.name, "lguest") != 0) |
| @@ -456,11 +498,13 @@ static int __init lguest_devices_init(void) | |||
| 456 | /* We do this after core stuff, but before the drivers. */ | 498 | /* We do this after core stuff, but before the drivers. */ |
| 457 | postcore_initcall(lguest_devices_init); | 499 | postcore_initcall(lguest_devices_init); |
| 458 | 500 | ||
| 459 | /*D:150 At this point in the journey we used to now wade through the lguest | 501 | /*D:150 |
| 502 | * At this point in the journey we used to now wade through the lguest | ||
| 460 | * devices themselves: net, block and console. Since they're all now virtio | 503 | * devices themselves: net, block and console. Since they're all now virtio |
| 461 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, | 504 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, |
| 462 | * they're kind of boring. But this does mean you'll never experience the | 505 | * they're kind of boring. But this does mean you'll never experience the |
| 463 | * thrill of reading the forbidden love scene buried deep in the block driver. | 506 | * thrill of reading the forbidden love scene buried deep in the block driver. |
| 464 | * | 507 | * |
| 465 | * "make Launcher" beckons, where we answer questions like "Where do Guests | 508 | * "make Launcher" beckons, where we answer questions like "Where do Guests |
| 466 | * come from?", and "What do you do when someone asks for optimization?". */ | 509 | * come from?", and "What do you do when someone asks for optimization?". |
| 510 | */ | ||
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 9f9a2953b383..b4d3f7ca554f 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
| @@ -1,8 +1,9 @@ | |||
| 1 | /*P:200 This contains all the /dev/lguest code, whereby the userspace launcher | 1 | /*P:200 This contains all the /dev/lguest code, whereby the userspace launcher |
| 2 | * controls and communicates with the Guest. For example, the first write will | 2 | * controls and communicates with the Guest. For example, the first write will |
| 3 | * tell us the Guest's memory layout, pagetable, entry point and kernel address | 3 | * tell us the Guest's memory layout and entry point. A read will run the |
| 4 | * offset. A read will run the Guest until something happens, such as a signal | 4 | * Guest until something happens, such as a signal or the Guest doing a NOTIFY |
| 5 | * or the Guest doing a NOTIFY out to the Launcher. :*/ | 5 | * out to the Launcher. |
| 6 | :*/ | ||
| 6 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
| 7 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
| 8 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
| @@ -11,14 +12,41 @@ | |||
| 11 | #include <linux/file.h> | 12 | #include <linux/file.h> |
| 12 | #include "lg.h" | 13 | #include "lg.h" |
| 13 | 14 | ||
| 15 | /*L:056 | ||
| 16 | * Before we move on, let's jump ahead and look at what the kernel does when | ||
| 17 | * it needs to look up the eventfds. That will complete our picture of how we | ||
| 18 | * use RCU. | ||
| 19 | * | ||
| 20 | * The notification value is in cpu->pending_notify: we return true if it went | ||
| 21 | * to an eventfd. | ||
| 22 | */ | ||
| 14 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | 23 | bool send_notify_to_eventfd(struct lg_cpu *cpu) |
| 15 | { | 24 | { |
| 16 | unsigned int i; | 25 | unsigned int i; |
| 17 | struct lg_eventfd_map *map; | 26 | struct lg_eventfd_map *map; |
| 18 | 27 | ||
| 19 | /* lg->eventfds is RCU-protected */ | 28 | /* |
| 29 | * This "rcu_read_lock()" helps track when someone is still looking at | ||
| 30 | * the (RCU-using) eventfds array. It's not actually a lock at all; | ||
| 31 | * indeed it's a noop in many configurations. (You didn't expect me to | ||
| 32 | * explain all the RCU secrets here, did you?) | ||
| 33 | */ | ||
| 20 | rcu_read_lock(); | 34 | rcu_read_lock(); |
| 35 | /* | ||
| 36 | * rcu_dereference is the counter-side of rcu_assign_pointer(); it | ||
| 37 | * makes sure we don't access the memory pointed to by | ||
| 38 | * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, | ||
| 39 | * but Alpha allows this! Paul McKenney points out that a really | ||
| 40 | * aggressive compiler could have the same effect: | ||
| 41 | * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html | ||
| 42 | * | ||
| 43 | * So play safe, use rcu_dereference to get the rcu-protected pointer: | ||
| 44 | */ | ||
| 21 | map = rcu_dereference(cpu->lg->eventfds); | 45 | map = rcu_dereference(cpu->lg->eventfds); |
| 46 | /* | ||
| 47 | * Simple array search: even if they add an eventfd while we do this, | ||
| 48 | * we'll continue to use the old array and just won't see the new one. | ||
| 49 | */ | ||
| 22 | for (i = 0; i < map->num; i++) { | 50 | for (i = 0; i < map->num; i++) { |
| 23 | if (map->map[i].addr == cpu->pending_notify) { | 51 | if (map->map[i].addr == cpu->pending_notify) { |
| 24 | eventfd_signal(map->map[i].event, 1); | 52 | eventfd_signal(map->map[i].event, 1); |
| @@ -26,19 +54,50 @@ bool send_notify_to_eventfd(struct lg_cpu *cpu) | |||
| 26 | break; | 54 | break; |
| 27 | } | 55 | } |
| 28 | } | 56 | } |
| 57 | /* We're done with the rcu-protected variable cpu->lg->eventfds. */ | ||
| 29 | rcu_read_unlock(); | 58 | rcu_read_unlock(); |
| 59 | |||
| 60 | /* If we cleared the notification, it's because we found a match. */ | ||
| 30 | return cpu->pending_notify == 0; | 61 | return cpu->pending_notify == 0; |
| 31 | } | 62 | } |
| 32 | 63 | ||
| 64 | /*L:055 | ||
| 65 | * One of the more tricksy tricks in the Linux Kernel is a technique called | ||
| 66 | * Read Copy Update. Since one point of lguest is to teach lguest journeyers | ||
| 67 | * about kernel coding, I use it here. (In case you're curious, other purposes | ||
| 68 | * include learning about virtualization and instilling a deep appreciation for | ||
| 69 | * simplicity and puppies). | ||
| 70 | * | ||
| 71 | * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we | ||
| 72 | * add new eventfds without ever blocking readers from accessing the array. | ||
| 73 | * The current Launcher only does this during boot, so that never happens. But | ||
| 74 | * Read Copy Update is cool, and adding a lock risks damaging even more puppies | ||
| 75 | * than this code does. | ||
| 76 | * | ||
| 77 | * We allocate a brand new one-larger array, copy the old one and add our new | ||
| 78 | * element. Then we make the lg eventfd pointer point to the new array. | ||
| 79 | * That's the easy part: now we need to free the old one, but we need to make | ||
| 80 | * sure no slow CPU somewhere is still looking at it. That's what | ||
| 81 | * synchronize_rcu does for us: waits until every CPU has indicated that it has | ||
| 82 | * moved on to know it's no longer using the old one. | ||
| 83 | * | ||
| 84 | * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. | ||
| 85 | */ | ||
| 33 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | 86 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) |
| 34 | { | 87 | { |
| 35 | struct lg_eventfd_map *new, *old = lg->eventfds; | 88 | struct lg_eventfd_map *new, *old = lg->eventfds; |
| 36 | 89 | ||
| 90 | /* | ||
| 91 | * We don't allow notifications on value 0 anyway (pending_notify of | ||
| 92 | * 0 means "nothing pending"). | ||
| 93 | */ | ||
| 37 | if (!addr) | 94 | if (!addr) |
| 38 | return -EINVAL; | 95 | return -EINVAL; |
| 39 | 96 | ||
| 40 | /* Replace the old array with the new one, carefully: others can | 97 | /* |
| 41 | * be accessing it at the same time */ | 98 | * Replace the old array with the new one, carefully: others can |
| 99 | * be accessing it at the same time. | ||
| 100 | */ | ||
| 42 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), | 101 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), |
| 43 | GFP_KERNEL); | 102 | GFP_KERNEL); |
| 44 | if (!new) | 103 | if (!new) |
| @@ -52,22 +111,41 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | |||
| 52 | new->map[new->num].addr = addr; | 111 | new->map[new->num].addr = addr; |
| 53 | new->map[new->num].event = eventfd_ctx_fdget(fd); | 112 | new->map[new->num].event = eventfd_ctx_fdget(fd); |
| 54 | if (IS_ERR(new->map[new->num].event)) { | 113 | if (IS_ERR(new->map[new->num].event)) { |
| 114 | int err = PTR_ERR(new->map[new->num].event); | ||
| 55 | kfree(new); | 115 | kfree(new); |
| 56 | return PTR_ERR(new->map[new->num].event); | 116 | return err; |
| 57 | } | 117 | } |
| 58 | new->num++; | 118 | new->num++; |
| 59 | 119 | ||
| 60 | /* Now put new one in place. */ | 120 | /* |
| 121 | * Now put new one in place: rcu_assign_pointer() is a fancy way of | ||
| 122 | * doing "lg->eventfds = new", but it uses memory barriers to make | ||
| 123 | * absolutely sure that the contents of "new" written above is nailed | ||
| 124 | * down before we actually do the assignment. | ||
| 125 | * | ||
| 126 | * We have to think about these kinds of things when we're operating on | ||
| 127 | * live data without locks. | ||
| 128 | */ | ||
| 61 | rcu_assign_pointer(lg->eventfds, new); | 129 | rcu_assign_pointer(lg->eventfds, new); |
| 62 | 130 | ||
| 63 | /* We're not in a big hurry. Wait until noone's looking at old | 131 | /* |
| 64 | * version, then delete it. */ | 132 | * We're not in a big hurry. Wait until noone's looking at old |
| 133 | * version, then free it. | ||
| 134 | */ | ||
| 65 | synchronize_rcu(); | 135 | synchronize_rcu(); |
| 66 | kfree(old); | 136 | kfree(old); |
| 67 | 137 | ||
| 68 | return 0; | 138 | return 0; |
| 69 | } | 139 | } |
| 70 | 140 | ||
| 141 | /*L:052 | ||
| 142 | * Receiving notifications from the Guest is usually done by attaching a | ||
| 143 | * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will | ||
| 144 | * become readable when the Guest does an LHCALL_NOTIFY with that value. | ||
| 145 | * | ||
| 146 | * This is really convenient for processing each virtqueue in a separate | ||
| 147 | * thread. | ||
| 148 | */ | ||
| 71 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | 149 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) |
| 72 | { | 150 | { |
| 73 | unsigned long addr, fd; | 151 | unsigned long addr, fd; |
| @@ -79,15 +157,22 @@ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | |||
| 79 | if (get_user(fd, input) != 0) | 157 | if (get_user(fd, input) != 0) |
| 80 | return -EFAULT; | 158 | return -EFAULT; |
| 81 | 159 | ||
| 160 | /* | ||
| 161 | * Just make sure two callers don't add eventfds at once. We really | ||
| 162 | * only need to lock against callers adding to the same Guest, so using | ||
| 163 | * the Big Lguest Lock is overkill. But this is setup, not a fast path. | ||
| 164 | */ | ||
| 82 | mutex_lock(&lguest_lock); | 165 | mutex_lock(&lguest_lock); |
| 83 | err = add_eventfd(lg, addr, fd); | 166 | err = add_eventfd(lg, addr, fd); |
| 84 | mutex_unlock(&lguest_lock); | 167 | mutex_unlock(&lguest_lock); |
| 85 | 168 | ||
| 86 | return 0; | 169 | return err; |
| 87 | } | 170 | } |
| 88 | 171 | ||
| 89 | /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt | 172 | /*L:050 |
| 90 | * number to /dev/lguest. */ | 173 | * Sending an interrupt is done by writing LHREQ_IRQ and an interrupt |
| 174 | * number to /dev/lguest. | ||
| 175 | */ | ||
| 91 | static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) | 176 | static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) |
| 92 | { | 177 | { |
| 93 | unsigned long irq; | 178 | unsigned long irq; |
| @@ -97,12 +182,18 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) | |||
| 97 | if (irq >= LGUEST_IRQS) | 182 | if (irq >= LGUEST_IRQS) |
| 98 | return -EINVAL; | 183 | return -EINVAL; |
| 99 | 184 | ||
| 185 | /* | ||
| 186 | * Next time the Guest runs, the core code will see if it can deliver | ||
| 187 | * this interrupt. | ||
| 188 | */ | ||
| 100 | set_interrupt(cpu, irq); | 189 | set_interrupt(cpu, irq); |
| 101 | return 0; | 190 | return 0; |
| 102 | } | 191 | } |
| 103 | 192 | ||
| 104 | /*L:040 Once our Guest is initialized, the Launcher makes it run by reading | 193 | /*L:040 |
| 105 | * from /dev/lguest. */ | 194 | * Once our Guest is initialized, the Launcher makes it run by reading |
| 195 | * from /dev/lguest. | ||
| 196 | */ | ||
| 106 | static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | 197 | static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) |
| 107 | { | 198 | { |
| 108 | struct lguest *lg = file->private_data; | 199 | struct lguest *lg = file->private_data; |
| @@ -138,8 +229,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
| 138 | return len; | 229 | return len; |
| 139 | } | 230 | } |
| 140 | 231 | ||
| 141 | /* If we returned from read() last time because the Guest sent I/O, | 232 | /* |
| 142 | * clear the flag. */ | 233 | * If we returned from read() last time because the Guest sent I/O, |
| 234 | * clear the flag. | ||
| 235 | */ | ||
| 143 | if (cpu->pending_notify) | 236 | if (cpu->pending_notify) |
| 144 | cpu->pending_notify = 0; | 237 | cpu->pending_notify = 0; |
| 145 | 238 | ||
| @@ -147,8 +240,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
| 147 | return run_guest(cpu, (unsigned long __user *)user); | 240 | return run_guest(cpu, (unsigned long __user *)user); |
| 148 | } | 241 | } |
| 149 | 242 | ||
| 150 | /*L:025 This actually initializes a CPU. For the moment, a Guest is only | 243 | /*L:025 |
| 151 | * uniprocessor, so "id" is always 0. */ | 244 | * This actually initializes a CPU. For the moment, a Guest is only |
| 245 | * uniprocessor, so "id" is always 0. | ||
| 246 | */ | ||
| 152 | static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | 247 | static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) |
| 153 | { | 248 | { |
| 154 | /* We have a limited number the number of CPUs in the lguest struct. */ | 249 | /* We have a limited number the number of CPUs in the lguest struct. */ |
| @@ -163,8 +258,10 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
| 163 | /* Each CPU has a timer it can set. */ | 258 | /* Each CPU has a timer it can set. */ |
| 164 | init_clockdev(cpu); | 259 | init_clockdev(cpu); |
| 165 | 260 | ||
| 166 | /* We need a complete page for the Guest registers: they are accessible | 261 | /* |
| 167 | * to the Guest and we can only grant it access to whole pages. */ | 262 | * We need a complete page for the Guest registers: they are accessible |
| 263 | * to the Guest and we can only grant it access to whole pages. | ||
| 264 | */ | ||
| 168 | cpu->regs_page = get_zeroed_page(GFP_KERNEL); | 265 | cpu->regs_page = get_zeroed_page(GFP_KERNEL); |
| 169 | if (!cpu->regs_page) | 266 | if (!cpu->regs_page) |
| 170 | return -ENOMEM; | 267 | return -ENOMEM; |
| @@ -172,29 +269,38 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
| 172 | /* We actually put the registers at the bottom of the page. */ | 269 | /* We actually put the registers at the bottom of the page. */ |
| 173 | cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); | 270 | cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); |
| 174 | 271 | ||
| 175 | /* Now we initialize the Guest's registers, handing it the start | 272 | /* |
| 176 | * address. */ | 273 | * Now we initialize the Guest's registers, handing it the start |
| 274 | * address. | ||
| 275 | */ | ||
| 177 | lguest_arch_setup_regs(cpu, start_ip); | 276 | lguest_arch_setup_regs(cpu, start_ip); |
| 178 | 277 | ||
| 179 | /* We keep a pointer to the Launcher task (ie. current task) for when | 278 | /* |
| 180 | * other Guests want to wake this one (eg. console input). */ | 279 | * We keep a pointer to the Launcher task (ie. current task) for when |
| 280 | * other Guests want to wake this one (eg. console input). | ||
| 281 | */ | ||
| 181 | cpu->tsk = current; | 282 | cpu->tsk = current; |
| 182 | 283 | ||
| 183 | /* We need to keep a pointer to the Launcher's memory map, because if | 284 | /* |
| 285 | * We need to keep a pointer to the Launcher's memory map, because if | ||
| 184 | * the Launcher dies we need to clean it up. If we don't keep a | 286 | * the Launcher dies we need to clean it up. If we don't keep a |
| 185 | * reference, it is destroyed before close() is called. */ | 287 | * reference, it is destroyed before close() is called. |
| 288 | */ | ||
| 186 | cpu->mm = get_task_mm(cpu->tsk); | 289 | cpu->mm = get_task_mm(cpu->tsk); |
| 187 | 290 | ||
| 188 | /* We remember which CPU's pages this Guest used last, for optimization | 291 | /* |
| 189 | * when the same Guest runs on the same CPU twice. */ | 292 | * We remember which CPU's pages this Guest used last, for optimization |
| 293 | * when the same Guest runs on the same CPU twice. | ||
| 294 | */ | ||
| 190 | cpu->last_pages = NULL; | 295 | cpu->last_pages = NULL; |
| 191 | 296 | ||
| 192 | /* No error == success. */ | 297 | /* No error == success. */ |
| 193 | return 0; | 298 | return 0; |
| 194 | } | 299 | } |
| 195 | 300 | ||
| 196 | /*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit) | 301 | /*L:020 |
| 197 | * values (in addition to the LHREQ_INITIALIZE value). These are: | 302 | * The initialization write supplies 3 pointer sized (32 or 64 bit) values (in |
| 303 | * addition to the LHREQ_INITIALIZE value). These are: | ||
| 198 | * | 304 | * |
| 199 | * base: The start of the Guest-physical memory inside the Launcher memory. | 305 | * base: The start of the Guest-physical memory inside the Launcher memory. |
| 200 | * | 306 | * |
| @@ -206,14 +312,15 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
| 206 | */ | 312 | */ |
| 207 | static int initialize(struct file *file, const unsigned long __user *input) | 313 | static int initialize(struct file *file, const unsigned long __user *input) |
| 208 | { | 314 | { |
| 209 | /* "struct lguest" contains everything we (the Host) know about a | 315 | /* "struct lguest" contains all we (the Host) know about a Guest. */ |
| 210 | * Guest. */ | ||
| 211 | struct lguest *lg; | 316 | struct lguest *lg; |
| 212 | int err; | 317 | int err; |
| 213 | unsigned long args[3]; | 318 | unsigned long args[3]; |
| 214 | 319 | ||
| 215 | /* We grab the Big Lguest lock, which protects against multiple | 320 | /* |
| 216 | * simultaneous initializations. */ | 321 | * We grab the Big Lguest lock, which protects against multiple |
| 322 | * simultaneous initializations. | ||
| 323 | */ | ||
| 217 | mutex_lock(&lguest_lock); | 324 | mutex_lock(&lguest_lock); |
| 218 | /* You can't initialize twice! Close the device and start again... */ | 325 | /* You can't initialize twice! Close the device and start again... */ |
| 219 | if (file->private_data) { | 326 | if (file->private_data) { |
| @@ -248,8 +355,10 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
| 248 | if (err) | 355 | if (err) |
| 249 | goto free_eventfds; | 356 | goto free_eventfds; |
| 250 | 357 | ||
| 251 | /* Initialize the Guest's shadow page tables, using the toplevel | 358 | /* |
| 252 | * address the Launcher gave us. This allocates memory, so can fail. */ | 359 | * Initialize the Guest's shadow page tables, using the toplevel |
| 360 | * address the Launcher gave us. This allocates memory, so can fail. | ||
| 361 | */ | ||
| 253 | err = init_guest_pagetable(lg); | 362 | err = init_guest_pagetable(lg); |
| 254 | if (err) | 363 | if (err) |
| 255 | goto free_regs; | 364 | goto free_regs; |
| @@ -274,20 +383,24 @@ unlock: | |||
| 274 | return err; | 383 | return err; |
| 275 | } | 384 | } |
| 276 | 385 | ||
| 277 | /*L:010 The first operation the Launcher does must be a write. All writes | 386 | /*L:010 |
| 387 | * The first operation the Launcher does must be a write. All writes | ||
| 278 | * start with an unsigned long number: for the first write this must be | 388 | * start with an unsigned long number: for the first write this must be |
| 279 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use | 389 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use |
| 280 | * writes of other values to send interrupts. | 390 | * writes of other values to send interrupts or set up receipt of notifications. |
| 281 | * | 391 | * |
| 282 | * Note that we overload the "offset" in the /dev/lguest file to indicate what | 392 | * Note that we overload the "offset" in the /dev/lguest file to indicate what |
| 283 | * CPU number we're dealing with. Currently this is always 0, since we only | 393 | * CPU number we're dealing with. Currently this is always 0 since we only |
| 284 | * support uniprocessor Guests, but you can see the beginnings of SMP support | 394 | * support uniprocessor Guests, but you can see the beginnings of SMP support |
| 285 | * here. */ | 395 | * here. |
| 396 | */ | ||
| 286 | static ssize_t write(struct file *file, const char __user *in, | 397 | static ssize_t write(struct file *file, const char __user *in, |
| 287 | size_t size, loff_t *off) | 398 | size_t size, loff_t *off) |
| 288 | { | 399 | { |
| 289 | /* Once the Guest is initialized, we hold the "struct lguest" in the | 400 | /* |
| 290 | * file private data. */ | 401 | * Once the Guest is initialized, we hold the "struct lguest" in the |
| 402 | * file private data. | ||
| 403 | */ | ||
| 291 | struct lguest *lg = file->private_data; | 404 | struct lguest *lg = file->private_data; |
| 292 | const unsigned long __user *input = (const unsigned long __user *)in; | 405 | const unsigned long __user *input = (const unsigned long __user *)in; |
| 293 | unsigned long req; | 406 | unsigned long req; |
| @@ -322,13 +435,15 @@ static ssize_t write(struct file *file, const char __user *in, | |||
| 322 | } | 435 | } |
| 323 | } | 436 | } |
| 324 | 437 | ||
| 325 | /*L:060 The final piece of interface code is the close() routine. It reverses | 438 | /*L:060 |
| 439 | * The final piece of interface code is the close() routine. It reverses | ||
| 326 | * everything done in initialize(). This is usually called because the | 440 | * everything done in initialize(). This is usually called because the |
| 327 | * Launcher exited. | 441 | * Launcher exited. |
| 328 | * | 442 | * |
| 329 | * Note that the close routine returns 0 or a negative error number: it can't | 443 | * Note that the close routine returns 0 or a negative error number: it can't |
| 330 | * really fail, but it can whine. I blame Sun for this wart, and K&R C for | 444 | * really fail, but it can whine. I blame Sun for this wart, and K&R C for |
| 331 | * letting them do it. :*/ | 445 | * letting them do it. |
| 446 | :*/ | ||
| 332 | static int close(struct inode *inode, struct file *file) | 447 | static int close(struct inode *inode, struct file *file) |
| 333 | { | 448 | { |
| 334 | struct lguest *lg = file->private_data; | 449 | struct lguest *lg = file->private_data; |
| @@ -338,8 +453,10 @@ static int close(struct inode *inode, struct file *file) | |||
| 338 | if (!lg) | 453 | if (!lg) |
| 339 | return 0; | 454 | return 0; |
| 340 | 455 | ||
| 341 | /* We need the big lock, to protect from inter-guest I/O and other | 456 | /* |
| 342 | * Launchers initializing guests. */ | 457 | * We need the big lock, to protect from inter-guest I/O and other |
| 458 | * Launchers initializing guests. | ||
| 459 | */ | ||
| 343 | mutex_lock(&lguest_lock); | 460 | mutex_lock(&lguest_lock); |
| 344 | 461 | ||
| 345 | /* Free up the shadow page tables for the Guest. */ | 462 | /* Free up the shadow page tables for the Guest. */ |
| @@ -350,8 +467,10 @@ static int close(struct inode *inode, struct file *file) | |||
| 350 | hrtimer_cancel(&lg->cpus[i].hrt); | 467 | hrtimer_cancel(&lg->cpus[i].hrt); |
| 351 | /* We can free up the register page we allocated. */ | 468 | /* We can free up the register page we allocated. */ |
| 352 | free_page(lg->cpus[i].regs_page); | 469 | free_page(lg->cpus[i].regs_page); |
| 353 | /* Now all the memory cleanups are done, it's safe to release | 470 | /* |
| 354 | * the Launcher's memory management structure. */ | 471 | * Now all the memory cleanups are done, it's safe to release |
| 472 | * the Launcher's memory management structure. | ||
| 473 | */ | ||
| 355 | mmput(lg->cpus[i].mm); | 474 | mmput(lg->cpus[i].mm); |
| 356 | } | 475 | } |
| 357 | 476 | ||
| @@ -360,8 +479,10 @@ static int close(struct inode *inode, struct file *file) | |||
| 360 | eventfd_ctx_put(lg->eventfds->map[i].event); | 479 | eventfd_ctx_put(lg->eventfds->map[i].event); |
| 361 | kfree(lg->eventfds); | 480 | kfree(lg->eventfds); |
| 362 | 481 | ||
| 363 | /* If lg->dead doesn't contain an error code it will be NULL or a | 482 | /* |
| 364 | * kmalloc()ed string, either of which is ok to hand to kfree(). */ | 483 | * If lg->dead doesn't contain an error code it will be NULL or a |
| 484 | * kmalloc()ed string, either of which is ok to hand to kfree(). | ||
| 485 | */ | ||
| 365 | if (!IS_ERR(lg->dead)) | 486 | if (!IS_ERR(lg->dead)) |
| 366 | kfree(lg->dead); | 487 | kfree(lg->dead); |
| 367 | /* Free the memory allocated to the lguest_struct */ | 488 | /* Free the memory allocated to the lguest_struct */ |
| @@ -385,7 +506,8 @@ static int close(struct inode *inode, struct file *file) | |||
| 385 | * | 506 | * |
| 386 | * We begin our understanding with the Host kernel interface which the Launcher | 507 | * We begin our understanding with the Host kernel interface which the Launcher |
| 387 | * uses: reading and writing a character device called /dev/lguest. All the | 508 | * uses: reading and writing a character device called /dev/lguest. All the |
| 388 | * work happens in the read(), write() and close() routines: */ | 509 | * work happens in the read(), write() and close() routines: |
| 510 | */ | ||
| 389 | static struct file_operations lguest_fops = { | 511 | static struct file_operations lguest_fops = { |
| 390 | .owner = THIS_MODULE, | 512 | .owner = THIS_MODULE, |
| 391 | .release = close, | 513 | .release = close, |
| @@ -393,8 +515,10 @@ static struct file_operations lguest_fops = { | |||
| 393 | .read = read, | 515 | .read = read, |
| 394 | }; | 516 | }; |
| 395 | 517 | ||
| 396 | /* This is a textbook example of a "misc" character device. Populate a "struct | 518 | /* |
| 397 | * miscdevice" and register it with misc_register(). */ | 519 | * This is a textbook example of a "misc" character device. Populate a "struct |
| 520 | * miscdevice" and register it with misc_register(). | ||
| 521 | */ | ||
| 398 | static struct miscdevice lguest_dev = { | 522 | static struct miscdevice lguest_dev = { |
| 399 | .minor = MISC_DYNAMIC_MINOR, | 523 | .minor = MISC_DYNAMIC_MINOR, |
| 400 | .name = "lguest", | 524 | .name = "lguest", |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index a6fe1abda240..a8d0aee3bc0e 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
| @@ -1,9 +1,11 @@ | |||
| 1 | /*P:700 The pagetable code, on the other hand, still shows the scars of | 1 | /*P:700 |
| 2 | * The pagetable code, on the other hand, still shows the scars of | ||
| 2 | * previous encounters. It's functional, and as neat as it can be in the | 3 | * previous encounters. It's functional, and as neat as it can be in the |
| 3 | * circumstances, but be wary, for these things are subtle and break easily. | 4 | * circumstances, but be wary, for these things are subtle and break easily. |
| 4 | * The Guest provides a virtual to physical mapping, but we can neither trust | 5 | * The Guest provides a virtual to physical mapping, but we can neither trust |
| 5 | * it nor use it: we verify and convert it here then point the CPU to the | 6 | * it nor use it: we verify and convert it here then point the CPU to the |
| 6 | * converted Guest pages when running the Guest. :*/ | 7 | * converted Guest pages when running the Guest. |
| 8 | :*/ | ||
| 7 | 9 | ||
| 8 | /* Copyright (C) Rusty Russell IBM Corporation 2006. | 10 | /* Copyright (C) Rusty Russell IBM Corporation 2006. |
| 9 | * GPL v2 and any later version */ | 11 | * GPL v2 and any later version */ |
| @@ -17,18 +19,20 @@ | |||
| 17 | #include <asm/bootparam.h> | 19 | #include <asm/bootparam.h> |
| 18 | #include "lg.h" | 20 | #include "lg.h" |
| 19 | 21 | ||
| 20 | /*M:008 We hold reference to pages, which prevents them from being swapped. | 22 | /*M:008 |
| 23 | * We hold reference to pages, which prevents them from being swapped. | ||
| 21 | * It'd be nice to have a callback in the "struct mm_struct" when Linux wants | 24 | * It'd be nice to have a callback in the "struct mm_struct" when Linux wants |
| 22 | * to swap out. If we had this, and a shrinker callback to trim PTE pages, we | 25 | * to swap out. If we had this, and a shrinker callback to trim PTE pages, we |
| 23 | * could probably consider launching Guests as non-root. :*/ | 26 | * could probably consider launching Guests as non-root. |
| 27 | :*/ | ||
| 24 | 28 | ||
| 25 | /*H:300 | 29 | /*H:300 |
| 26 | * The Page Table Code | 30 | * The Page Table Code |
| 27 | * | 31 | * |
| 28 | * We use two-level page tables for the Guest. If you're not entirely | 32 | * We use two-level page tables for the Guest, or three-level with PAE. If |
| 29 | * comfortable with virtual addresses, physical addresses and page tables then | 33 | * you're not entirely comfortable with virtual addresses, physical addresses |
| 30 | * I recommend you review arch/x86/lguest/boot.c's "Page Table Handling" (with | 34 | * and page tables then I recommend you review arch/x86/lguest/boot.c's "Page |
| 31 | * diagrams!). | 35 | * Table Handling" (with diagrams!). |
| 32 | * | 36 | * |
| 33 | * The Guest keeps page tables, but we maintain the actual ones here: these are | 37 | * The Guest keeps page tables, but we maintain the actual ones here: these are |
| 34 | * called "shadow" page tables. Which is a very Guest-centric name: these are | 38 | * called "shadow" page tables. Which is a very Guest-centric name: these are |
| @@ -45,16 +49,18 @@ | |||
| 45 | * (v) Flushing (throwing away) page tables, | 49 | * (v) Flushing (throwing away) page tables, |
| 46 | * (vi) Mapping the Switcher when the Guest is about to run, | 50 | * (vi) Mapping the Switcher when the Guest is about to run, |
| 47 | * (vii) Setting up the page tables initially. | 51 | * (vii) Setting up the page tables initially. |
| 48 | :*/ | 52 | :*/ |
| 49 | 53 | ||
| 50 | 54 | /* | |
| 51 | /* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is | 55 | * The Switcher uses the complete top PTE page. That's 1024 PTE entries (4MB) |
| 52 | * conveniently placed at the top 4MB, so it uses a separate, complete PTE | 56 | * or 512 PTE entries with PAE (2MB). |
| 53 | * page. */ | 57 | */ |
| 54 | #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1) | 58 | #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1) |
| 55 | 59 | ||
| 56 | /* For PAE we need the PMD index as well. We use the last 2MB, so we | 60 | /* |
| 57 | * will need the last pmd entry of the last pmd page. */ | 61 | * For PAE we need the PMD index as well. We use the last 2MB, so we |
| 62 | * will need the last pmd entry of the last pmd page. | ||
| 63 | */ | ||
| 58 | #ifdef CONFIG_X86_PAE | 64 | #ifdef CONFIG_X86_PAE |
| 59 | #define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) | 65 | #define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) |
| 60 | #define RESERVE_MEM 2U | 66 | #define RESERVE_MEM 2U |
| @@ -64,14 +70,18 @@ | |||
| 64 | #define CHECK_GPGD_MASK _PAGE_TABLE | 70 | #define CHECK_GPGD_MASK _PAGE_TABLE |
| 65 | #endif | 71 | #endif |
| 66 | 72 | ||
| 67 | /* We actually need a separate PTE page for each CPU. Remember that after the | 73 | /* |
| 74 | * We actually need a separate PTE page for each CPU. Remember that after the | ||
| 68 | * Switcher code itself comes two pages for each CPU, and we don't want this | 75 | * Switcher code itself comes two pages for each CPU, and we don't want this |
| 69 | * CPU's guest to see the pages of any other CPU. */ | 76 | * CPU's guest to see the pages of any other CPU. |
| 77 | */ | ||
| 70 | static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); | 78 | static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); |
| 71 | #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) | 79 | #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) |
| 72 | 80 | ||
| 73 | /*H:320 The page table code is curly enough to need helper functions to keep it | 81 | /*H:320 |
| 74 | * clear and clean. | 82 | * The page table code is curly enough to need helper functions to keep it |
| 83 | * clear and clean. The kernel itself provides many of them; one advantage | ||
| 84 | * of insisting that the Guest and Host use the same CONFIG_PAE setting. | ||
| 75 | * | 85 | * |
| 76 | * There are two functions which return pointers to the shadow (aka "real") | 86 | * There are two functions which return pointers to the shadow (aka "real") |
| 77 | * page tables. | 87 | * page tables. |
| @@ -79,7 +89,8 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); | |||
| 79 | * spgd_addr() takes the virtual address and returns a pointer to the top-level | 89 | * spgd_addr() takes the virtual address and returns a pointer to the top-level |
| 80 | * page directory entry (PGD) for that address. Since we keep track of several | 90 | * page directory entry (PGD) for that address. Since we keep track of several |
| 81 | * page tables, the "i" argument tells us which one we're interested in (it's | 91 | * page tables, the "i" argument tells us which one we're interested in (it's |
| 82 | * usually the current one). */ | 92 | * usually the current one). |
| 93 | */ | ||
| 83 | static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) | 94 | static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) |
| 84 | { | 95 | { |
| 85 | unsigned int index = pgd_index(vaddr); | 96 | unsigned int index = pgd_index(vaddr); |
| @@ -96,9 +107,11 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) | |||
| 96 | } | 107 | } |
| 97 | 108 | ||
| 98 | #ifdef CONFIG_X86_PAE | 109 | #ifdef CONFIG_X86_PAE |
| 99 | /* This routine then takes the PGD entry given above, which contains the | 110 | /* |
| 111 | * This routine then takes the PGD entry given above, which contains the | ||
| 100 | * address of the PMD page. It then returns a pointer to the PMD entry for the | 112 | * address of the PMD page. It then returns a pointer to the PMD entry for the |
| 101 | * given address. */ | 113 | * given address. |
| 114 | */ | ||
| 102 | static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | 115 | static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) |
| 103 | { | 116 | { |
| 104 | unsigned int index = pmd_index(vaddr); | 117 | unsigned int index = pmd_index(vaddr); |
| @@ -119,9 +132,11 @@ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | |||
| 119 | } | 132 | } |
| 120 | #endif | 133 | #endif |
| 121 | 134 | ||
| 122 | /* This routine then takes the page directory entry returned above, which | 135 | /* |
| 136 | * This routine then takes the page directory entry returned above, which | ||
| 123 | * contains the address of the page table entry (PTE) page. It then returns a | 137 | * contains the address of the page table entry (PTE) page. It then returns a |
| 124 | * pointer to the PTE entry for the given address. */ | 138 | * pointer to the PTE entry for the given address. |
| 139 | */ | ||
| 125 | static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | 140 | static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) |
| 126 | { | 141 | { |
| 127 | #ifdef CONFIG_X86_PAE | 142 | #ifdef CONFIG_X86_PAE |
| @@ -139,8 +154,10 @@ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | |||
| 139 | return &page[pte_index(vaddr)]; | 154 | return &page[pte_index(vaddr)]; |
| 140 | } | 155 | } |
| 141 | 156 | ||
| 142 | /* These two functions just like the above two, except they access the Guest | 157 | /* |
| 143 | * page tables. Hence they return a Guest address. */ | 158 | * These functions are just like the above two, except they access the Guest |
| 159 | * page tables. Hence they return a Guest address. | ||
| 160 | */ | ||
| 144 | static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) | 161 | static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) |
| 145 | { | 162 | { |
| 146 | unsigned int index = vaddr >> (PGDIR_SHIFT); | 163 | unsigned int index = vaddr >> (PGDIR_SHIFT); |
| @@ -148,6 +165,7 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) | |||
| 148 | } | 165 | } |
| 149 | 166 | ||
| 150 | #ifdef CONFIG_X86_PAE | 167 | #ifdef CONFIG_X86_PAE |
| 168 | /* Follow the PGD to the PMD. */ | ||
| 151 | static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) | 169 | static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) |
| 152 | { | 170 | { |
| 153 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; | 171 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; |
| @@ -155,6 +173,7 @@ static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) | |||
| 155 | return gpage + pmd_index(vaddr) * sizeof(pmd_t); | 173 | return gpage + pmd_index(vaddr) * sizeof(pmd_t); |
| 156 | } | 174 | } |
| 157 | 175 | ||
| 176 | /* Follow the PMD to the PTE. */ | ||
| 158 | static unsigned long gpte_addr(struct lg_cpu *cpu, | 177 | static unsigned long gpte_addr(struct lg_cpu *cpu, |
| 159 | pmd_t gpmd, unsigned long vaddr) | 178 | pmd_t gpmd, unsigned long vaddr) |
| 160 | { | 179 | { |
| @@ -164,6 +183,7 @@ static unsigned long gpte_addr(struct lg_cpu *cpu, | |||
| 164 | return gpage + pte_index(vaddr) * sizeof(pte_t); | 183 | return gpage + pte_index(vaddr) * sizeof(pte_t); |
| 165 | } | 184 | } |
| 166 | #else | 185 | #else |
| 186 | /* Follow the PGD to the PTE (no mid-level for !PAE). */ | ||
| 167 | static unsigned long gpte_addr(struct lg_cpu *cpu, | 187 | static unsigned long gpte_addr(struct lg_cpu *cpu, |
| 168 | pgd_t gpgd, unsigned long vaddr) | 188 | pgd_t gpgd, unsigned long vaddr) |
| 169 | { | 189 | { |
| @@ -175,17 +195,21 @@ static unsigned long gpte_addr(struct lg_cpu *cpu, | |||
| 175 | #endif | 195 | #endif |
| 176 | /*:*/ | 196 | /*:*/ |
| 177 | 197 | ||
| 178 | /*M:014 get_pfn is slow: we could probably try to grab batches of pages here as | 198 | /*M:014 |
| 179 | * an optimization (ie. pre-faulting). :*/ | 199 | * get_pfn is slow: we could probably try to grab batches of pages here as |
| 200 | * an optimization (ie. pre-faulting). | ||
| 201 | :*/ | ||
| 180 | 202 | ||
| 181 | /*H:350 This routine takes a page number given by the Guest and converts it to | 203 | /*H:350 |
| 204 | * This routine takes a page number given by the Guest and converts it to | ||
| 182 | * an actual, physical page number. It can fail for several reasons: the | 205 | * an actual, physical page number. It can fail for several reasons: the |
| 183 | * virtual address might not be mapped by the Launcher, the write flag is set | 206 | * virtual address might not be mapped by the Launcher, the write flag is set |
| 184 | * and the page is read-only, or the write flag was set and the page was | 207 | * and the page is read-only, or the write flag was set and the page was |
| 185 | * shared so had to be copied, but we ran out of memory. | 208 | * shared so had to be copied, but we ran out of memory. |
| 186 | * | 209 | * |
| 187 | * This holds a reference to the page, so release_pte() is careful to put that | 210 | * This holds a reference to the page, so release_pte() is careful to put that |
| 188 | * back. */ | 211 | * back. |
| 212 | */ | ||
| 189 | static unsigned long get_pfn(unsigned long virtpfn, int write) | 213 | static unsigned long get_pfn(unsigned long virtpfn, int write) |
| 190 | { | 214 | { |
| 191 | struct page *page; | 215 | struct page *page; |
| @@ -198,33 +222,41 @@ static unsigned long get_pfn(unsigned long virtpfn, int write) | |||
| 198 | return -1UL; | 222 | return -1UL; |
| 199 | } | 223 | } |
| 200 | 224 | ||
| 201 | /*H:340 Converting a Guest page table entry to a shadow (ie. real) page table | 225 | /*H:340 |
| 226 | * Converting a Guest page table entry to a shadow (ie. real) page table | ||
| 202 | * entry can be a little tricky. The flags are (almost) the same, but the | 227 | * entry can be a little tricky. The flags are (almost) the same, but the |
| 203 | * Guest PTE contains a virtual page number: the CPU needs the real page | 228 | * Guest PTE contains a virtual page number: the CPU needs the real page |
| 204 | * number. */ | 229 | * number. |
| 230 | */ | ||
| 205 | static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) | 231 | static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) |
| 206 | { | 232 | { |
| 207 | unsigned long pfn, base, flags; | 233 | unsigned long pfn, base, flags; |
| 208 | 234 | ||
| 209 | /* The Guest sets the global flag, because it thinks that it is using | 235 | /* |
| 236 | * The Guest sets the global flag, because it thinks that it is using | ||
| 210 | * PGE. We only told it to use PGE so it would tell us whether it was | 237 | * PGE. We only told it to use PGE so it would tell us whether it was |
| 211 | * flushing a kernel mapping or a userspace mapping. We don't actually | 238 | * flushing a kernel mapping or a userspace mapping. We don't actually |
| 212 | * use the global bit, so throw it away. */ | 239 | * use the global bit, so throw it away. |
| 240 | */ | ||
| 213 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); | 241 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); |
| 214 | 242 | ||
| 215 | /* The Guest's pages are offset inside the Launcher. */ | 243 | /* The Guest's pages are offset inside the Launcher. */ |
| 216 | base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; | 244 | base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; |
| 217 | 245 | ||
| 218 | /* We need a temporary "unsigned long" variable to hold the answer from | 246 | /* |
| 247 | * We need a temporary "unsigned long" variable to hold the answer from | ||
| 219 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't | 248 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't |
| 220 | * fit in spte.pfn. get_pfn() finds the real physical number of the | 249 | * fit in spte.pfn. get_pfn() finds the real physical number of the |
| 221 | * page, given the virtual number. */ | 250 | * page, given the virtual number. |
| 251 | */ | ||
| 222 | pfn = get_pfn(base + pte_pfn(gpte), write); | 252 | pfn = get_pfn(base + pte_pfn(gpte), write); |
| 223 | if (pfn == -1UL) { | 253 | if (pfn == -1UL) { |
| 224 | kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); | 254 | kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); |
| 225 | /* When we destroy the Guest, we'll go through the shadow page | 255 | /* |
| 256 | * When we destroy the Guest, we'll go through the shadow page | ||
| 226 | * tables and release_pte() them. Make sure we don't think | 257 | * tables and release_pte() them. Make sure we don't think |
| 227 | * this one is valid! */ | 258 | * this one is valid! |
| 259 | */ | ||
| 228 | flags = 0; | 260 | flags = 0; |
| 229 | } | 261 | } |
| 230 | /* Now we assemble our shadow PTE from the page number and flags. */ | 262 | /* Now we assemble our shadow PTE from the page number and flags. */ |
| @@ -234,8 +266,10 @@ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) | |||
| 234 | /*H:460 And to complete the chain, release_pte() looks like this: */ | 266 | /*H:460 And to complete the chain, release_pte() looks like this: */ |
| 235 | static void release_pte(pte_t pte) | 267 | static void release_pte(pte_t pte) |
| 236 | { | 268 | { |
| 237 | /* Remember that get_user_pages_fast() took a reference to the page, in | 269 | /* |
| 238 | * get_pfn()? We have to put it back now. */ | 270 | * Remember that get_user_pages_fast() took a reference to the page, in |
| 271 | * get_pfn()? We have to put it back now. | ||
| 272 | */ | ||
| 239 | if (pte_flags(pte) & _PAGE_PRESENT) | 273 | if (pte_flags(pte) & _PAGE_PRESENT) |
| 240 | put_page(pte_page(pte)); | 274 | put_page(pte_page(pte)); |
| 241 | } | 275 | } |
| @@ -273,7 +307,8 @@ static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) | |||
| 273 | * and return to the Guest without it knowing. | 307 | * and return to the Guest without it knowing. |
| 274 | * | 308 | * |
| 275 | * If we fixed up the fault (ie. we mapped the address), this routine returns | 309 | * If we fixed up the fault (ie. we mapped the address), this routine returns |
| 276 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ | 310 | * true. Otherwise, it was a real fault and we need to tell the Guest. |
| 311 | */ | ||
| 277 | bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | 312 | bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) |
| 278 | { | 313 | { |
| 279 | pgd_t gpgd; | 314 | pgd_t gpgd; |
| @@ -282,6 +317,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
| 282 | pte_t gpte; | 317 | pte_t gpte; |
| 283 | pte_t *spte; | 318 | pte_t *spte; |
| 284 | 319 | ||
| 320 | /* Mid level for PAE. */ | ||
| 285 | #ifdef CONFIG_X86_PAE | 321 | #ifdef CONFIG_X86_PAE |
| 286 | pmd_t *spmd; | 322 | pmd_t *spmd; |
| 287 | pmd_t gpmd; | 323 | pmd_t gpmd; |
| @@ -298,22 +334,26 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
| 298 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { | 334 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { |
| 299 | /* No shadow entry: allocate a new shadow PTE page. */ | 335 | /* No shadow entry: allocate a new shadow PTE page. */ |
| 300 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); | 336 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); |
| 301 | /* This is not really the Guest's fault, but killing it is | 337 | /* |
| 302 | * simple for this corner case. */ | 338 | * This is not really the Guest's fault, but killing it is |
| 339 | * simple for this corner case. | ||
| 340 | */ | ||
| 303 | if (!ptepage) { | 341 | if (!ptepage) { |
| 304 | kill_guest(cpu, "out of memory allocating pte page"); | 342 | kill_guest(cpu, "out of memory allocating pte page"); |
| 305 | return false; | 343 | return false; |
| 306 | } | 344 | } |
| 307 | /* We check that the Guest pgd is OK. */ | 345 | /* We check that the Guest pgd is OK. */ |
| 308 | check_gpgd(cpu, gpgd); | 346 | check_gpgd(cpu, gpgd); |
| 309 | /* And we copy the flags to the shadow PGD entry. The page | 347 | /* |
| 310 | * number in the shadow PGD is the page we just allocated. */ | 348 | * And we copy the flags to the shadow PGD entry. The page |
| 349 | * number in the shadow PGD is the page we just allocated. | ||
| 350 | */ | ||
| 311 | set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd))); | 351 | set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd))); |
| 312 | } | 352 | } |
| 313 | 353 | ||
| 314 | #ifdef CONFIG_X86_PAE | 354 | #ifdef CONFIG_X86_PAE |
| 315 | gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); | 355 | gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); |
| 316 | /* middle level not present? We can't map it in. */ | 356 | /* Middle level not present? We can't map it in. */ |
| 317 | if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) | 357 | if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) |
| 318 | return false; | 358 | return false; |
| 319 | 359 | ||
| @@ -324,8 +364,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
| 324 | /* No shadow entry: allocate a new shadow PTE page. */ | 364 | /* No shadow entry: allocate a new shadow PTE page. */ |
| 325 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); | 365 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); |
| 326 | 366 | ||
| 327 | /* This is not really the Guest's fault, but killing it is | 367 | /* |
| 328 | * simple for this corner case. */ | 368 | * This is not really the Guest's fault, but killing it is |
| 369 | * simple for this corner case. | ||
| 370 | */ | ||
| 329 | if (!ptepage) { | 371 | if (!ptepage) { |
| 330 | kill_guest(cpu, "out of memory allocating pte page"); | 372 | kill_guest(cpu, "out of memory allocating pte page"); |
| 331 | return false; | 373 | return false; |
| @@ -334,27 +376,37 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
| 334 | /* We check that the Guest pmd is OK. */ | 376 | /* We check that the Guest pmd is OK. */ |
| 335 | check_gpmd(cpu, gpmd); | 377 | check_gpmd(cpu, gpmd); |
| 336 | 378 | ||
| 337 | /* And we copy the flags to the shadow PMD entry. The page | 379 | /* |
| 338 | * number in the shadow PMD is the page we just allocated. */ | 380 | * And we copy the flags to the shadow PMD entry. The page |
| 381 | * number in the shadow PMD is the page we just allocated. | ||
| 382 | */ | ||
| 339 | native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); | 383 | native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); |
| 340 | } | 384 | } |
| 341 | 385 | ||
| 342 | /* OK, now we look at the lower level in the Guest page table: keep its | 386 | /* |
| 343 | * address, because we might update it later. */ | 387 | * OK, now we look at the lower level in the Guest page table: keep its |
| 388 | * address, because we might update it later. | ||
| 389 | */ | ||
| 344 | gpte_ptr = gpte_addr(cpu, gpmd, vaddr); | 390 | gpte_ptr = gpte_addr(cpu, gpmd, vaddr); |
| 345 | #else | 391 | #else |
| 346 | /* OK, now we look at the lower level in the Guest page table: keep its | 392 | /* |
| 347 | * address, because we might update it later. */ | 393 | * OK, now we look at the lower level in the Guest page table: keep its |
| 394 | * address, because we might update it later. | ||
| 395 | */ | ||
| 348 | gpte_ptr = gpte_addr(cpu, gpgd, vaddr); | 396 | gpte_ptr = gpte_addr(cpu, gpgd, vaddr); |
| 349 | #endif | 397 | #endif |
| 398 | |||
| 399 | /* Read the actual PTE value. */ | ||
| 350 | gpte = lgread(cpu, gpte_ptr, pte_t); | 400 | gpte = lgread(cpu, gpte_ptr, pte_t); |
| 351 | 401 | ||
| 352 | /* If this page isn't in the Guest page tables, we can't page it in. */ | 402 | /* If this page isn't in the Guest page tables, we can't page it in. */ |
| 353 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | 403 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) |
| 354 | return false; | 404 | return false; |
| 355 | 405 | ||
| 356 | /* Check they're not trying to write to a page the Guest wants | 406 | /* |
| 357 | * read-only (bit 2 of errcode == write). */ | 407 | * Check they're not trying to write to a page the Guest wants |
| 408 | * read-only (bit 2 of errcode == write). | ||
| 409 | */ | ||
| 358 | if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW)) | 410 | if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW)) |
| 359 | return false; | 411 | return false; |
| 360 | 412 | ||
| @@ -362,8 +414,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
| 362 | if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) | 414 | if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) |
| 363 | return false; | 415 | return false; |
| 364 | 416 | ||
| 365 | /* Check that the Guest PTE flags are OK, and the page number is below | 417 | /* |
| 366 | * the pfn_limit (ie. not mapping the Launcher binary). */ | 418 | * Check that the Guest PTE flags are OK, and the page number is below |
| 419 | * the pfn_limit (ie. not mapping the Launcher binary). | ||
| 420 | */ | ||
| 367 | check_gpte(cpu, gpte); | 421 | check_gpte(cpu, gpte); |
| 368 | 422 | ||
| 369 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ | 423 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ |
| @@ -373,29 +427,40 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
| 373 | 427 | ||
| 374 | /* Get the pointer to the shadow PTE entry we're going to set. */ | 428 | /* Get the pointer to the shadow PTE entry we're going to set. */ |
| 375 | spte = spte_addr(cpu, *spgd, vaddr); | 429 | spte = spte_addr(cpu, *spgd, vaddr); |
| 376 | /* If there was a valid shadow PTE entry here before, we release it. | 430 | |
| 377 | * This can happen with a write to a previously read-only entry. */ | 431 | /* |
| 432 | * If there was a valid shadow PTE entry here before, we release it. | ||
| 433 | * This can happen with a write to a previously read-only entry. | ||
| 434 | */ | ||
| 378 | release_pte(*spte); | 435 | release_pte(*spte); |
| 379 | 436 | ||
| 380 | /* If this is a write, we insist that the Guest page is writable (the | 437 | /* |
| 381 | * final arg to gpte_to_spte()). */ | 438 | * If this is a write, we insist that the Guest page is writable (the |
| 439 | * final arg to gpte_to_spte()). | ||
| 440 | */ | ||
| 382 | if (pte_dirty(gpte)) | 441 | if (pte_dirty(gpte)) |
| 383 | *spte = gpte_to_spte(cpu, gpte, 1); | 442 | *spte = gpte_to_spte(cpu, gpte, 1); |
| 384 | else | 443 | else |
| 385 | /* If this is a read, don't set the "writable" bit in the page | 444 | /* |
| 445 | * If this is a read, don't set the "writable" bit in the page | ||
| 386 | * table entry, even if the Guest says it's writable. That way | 446 | * table entry, even if the Guest says it's writable. That way |
| 387 | * we will come back here when a write does actually occur, so | 447 | * we will come back here when a write does actually occur, so |
| 388 | * we can update the Guest's _PAGE_DIRTY flag. */ | 448 | * we can update the Guest's _PAGE_DIRTY flag. |
| 449 | */ | ||
| 389 | native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0)); | 450 | native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0)); |
| 390 | 451 | ||
| 391 | /* Finally, we write the Guest PTE entry back: we've set the | 452 | /* |
| 392 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ | 453 | * Finally, we write the Guest PTE entry back: we've set the |
| 454 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. | ||
| 455 | */ | ||
| 393 | lgwrite(cpu, gpte_ptr, pte_t, gpte); | 456 | lgwrite(cpu, gpte_ptr, pte_t, gpte); |
| 394 | 457 | ||
| 395 | /* The fault is fixed, the page table is populated, the mapping | 458 | /* |
| 459 | * The fault is fixed, the page table is populated, the mapping | ||
| 396 | * manipulated, the result returned and the code complete. A small | 460 | * manipulated, the result returned and the code complete. A small |
| 397 | * delay and a trace of alliteration are the only indications the Guest | 461 | * delay and a trace of alliteration are the only indications the Guest |
| 398 | * has that a page fault occurred at all. */ | 462 | * has that a page fault occurred at all. |
| 463 | */ | ||
| 399 | return true; | 464 | return true; |
| 400 | } | 465 | } |
| 401 | 466 | ||
| @@ -408,7 +473,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
| 408 | * mapped, so it's overkill. | 473 | * mapped, so it's overkill. |
| 409 | * | 474 | * |
| 410 | * This is a quick version which answers the question: is this virtual address | 475 | * This is a quick version which answers the question: is this virtual address |
| 411 | * mapped by the shadow page tables, and is it writable? */ | 476 | * mapped by the shadow page tables, and is it writable? |
| 477 | */ | ||
| 412 | static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) | 478 | static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) |
| 413 | { | 479 | { |
| 414 | pgd_t *spgd; | 480 | pgd_t *spgd; |
| @@ -428,21 +494,26 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) | |||
| 428 | return false; | 494 | return false; |
| 429 | #endif | 495 | #endif |
| 430 | 496 | ||
| 431 | /* Check the flags on the pte entry itself: it must be present and | 497 | /* |
| 432 | * writable. */ | 498 | * Check the flags on the pte entry itself: it must be present and |
| 499 | * writable. | ||
| 500 | */ | ||
| 433 | flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr))); | 501 | flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr))); |
| 434 | 502 | ||
| 435 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); | 503 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); |
| 436 | } | 504 | } |
| 437 | 505 | ||
| 438 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already | 506 | /* |
| 507 | * So, when pin_stack_pages() asks us to pin a page, we check if it's already | ||
| 439 | * in the page tables, and if not, we call demand_page() with error code 2 | 508 | * in the page tables, and if not, we call demand_page() with error code 2 |
| 440 | * (meaning "write"). */ | 509 | * (meaning "write"). |
| 510 | */ | ||
| 441 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) | 511 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) |
| 442 | { | 512 | { |
| 443 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) | 513 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) |
| 444 | kill_guest(cpu, "bad stack page %#lx", vaddr); | 514 | kill_guest(cpu, "bad stack page %#lx", vaddr); |
| 445 | } | 515 | } |
| 516 | /*:*/ | ||
| 446 | 517 | ||
| 447 | #ifdef CONFIG_X86_PAE | 518 | #ifdef CONFIG_X86_PAE |
| 448 | static void release_pmd(pmd_t *spmd) | 519 | static void release_pmd(pmd_t *spmd) |
| @@ -479,15 +550,21 @@ static void release_pgd(pgd_t *spgd) | |||
| 479 | } | 550 | } |
| 480 | 551 | ||
| 481 | #else /* !CONFIG_X86_PAE */ | 552 | #else /* !CONFIG_X86_PAE */ |
| 482 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ | 553 | /*H:450 |
| 554 | * If we chase down the release_pgd() code, the non-PAE version looks like | ||
| 555 | * this. The PAE version is almost identical, but instead of calling | ||
| 556 | * release_pte it calls release_pmd(), which looks much like this. | ||
| 557 | */ | ||
| 483 | static void release_pgd(pgd_t *spgd) | 558 | static void release_pgd(pgd_t *spgd) |
| 484 | { | 559 | { |
| 485 | /* If the entry's not present, there's nothing to release. */ | 560 | /* If the entry's not present, there's nothing to release. */ |
| 486 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { | 561 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { |
| 487 | unsigned int i; | 562 | unsigned int i; |
| 488 | /* Converting the pfn to find the actual PTE page is easy: turn | 563 | /* |
| 564 | * Converting the pfn to find the actual PTE page is easy: turn | ||
| 489 | * the page number into a physical address, then convert to a | 565 | * the page number into a physical address, then convert to a |
| 490 | * virtual address (easy for kernel pages like this one). */ | 566 | * virtual address (easy for kernel pages like this one). |
| 567 | */ | ||
| 491 | pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); | 568 | pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); |
| 492 | /* For each entry in the page, we might need to release it. */ | 569 | /* For each entry in the page, we might need to release it. */ |
| 493 | for (i = 0; i < PTRS_PER_PTE; i++) | 570 | for (i = 0; i < PTRS_PER_PTE; i++) |
| @@ -499,9 +576,12 @@ static void release_pgd(pgd_t *spgd) | |||
| 499 | } | 576 | } |
| 500 | } | 577 | } |
| 501 | #endif | 578 | #endif |
| 502 | /*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings() | 579 | |
| 580 | /*H:445 | ||
| 581 | * We saw flush_user_mappings() twice: once from the flush_user_mappings() | ||
| 503 | * hypercall and once in new_pgdir() when we re-used a top-level pgdir page. | 582 | * hypercall and once in new_pgdir() when we re-used a top-level pgdir page. |
| 504 | * It simply releases every PTE page from 0 up to the Guest's kernel address. */ | 583 | * It simply releases every PTE page from 0 up to the Guest's kernel address. |
| 584 | */ | ||
| 505 | static void flush_user_mappings(struct lguest *lg, int idx) | 585 | static void flush_user_mappings(struct lguest *lg, int idx) |
| 506 | { | 586 | { |
| 507 | unsigned int i; | 587 | unsigned int i; |
| @@ -510,10 +590,12 @@ static void flush_user_mappings(struct lguest *lg, int idx) | |||
| 510 | release_pgd(lg->pgdirs[idx].pgdir + i); | 590 | release_pgd(lg->pgdirs[idx].pgdir + i); |
| 511 | } | 591 | } |
| 512 | 592 | ||
| 513 | /*H:440 (v) Flushing (throwing away) page tables, | 593 | /*H:440 |
| 594 | * (v) Flushing (throwing away) page tables, | ||
| 514 | * | 595 | * |
| 515 | * The Guest has a hypercall to throw away the page tables: it's used when a | 596 | * The Guest has a hypercall to throw away the page tables: it's used when a |
| 516 | * large number of mappings have been changed. */ | 597 | * large number of mappings have been changed. |
| 598 | */ | ||
| 517 | void guest_pagetable_flush_user(struct lg_cpu *cpu) | 599 | void guest_pagetable_flush_user(struct lg_cpu *cpu) |
| 518 | { | 600 | { |
| 519 | /* Drop the userspace part of the current page table. */ | 601 | /* Drop the userspace part of the current page table. */ |
| @@ -551,9 +633,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) | |||
| 551 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); | 633 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); |
| 552 | } | 634 | } |
| 553 | 635 | ||
| 554 | /* We keep several page tables. This is a simple routine to find the page | 636 | /* |
| 637 | * We keep several page tables. This is a simple routine to find the page | ||
| 555 | * table (if any) corresponding to this top-level address the Guest has given | 638 | * table (if any) corresponding to this top-level address the Guest has given |
| 556 | * us. */ | 639 | * us. |
| 640 | */ | ||
| 557 | static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | 641 | static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) |
| 558 | { | 642 | { |
| 559 | unsigned int i; | 643 | unsigned int i; |
| @@ -563,9 +647,11 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | |||
| 563 | return i; | 647 | return i; |
| 564 | } | 648 | } |
| 565 | 649 | ||
| 566 | /*H:435 And this is us, creating the new page directory. If we really do | 650 | /*H:435 |
| 651 | * And this is us, creating the new page directory. If we really do | ||
| 567 | * allocate a new one (and so the kernel parts are not there), we set | 652 | * allocate a new one (and so the kernel parts are not there), we set |
| 568 | * blank_pgdir. */ | 653 | * blank_pgdir. |
| 654 | */ | ||
| 569 | static unsigned int new_pgdir(struct lg_cpu *cpu, | 655 | static unsigned int new_pgdir(struct lg_cpu *cpu, |
| 570 | unsigned long gpgdir, | 656 | unsigned long gpgdir, |
| 571 | int *blank_pgdir) | 657 | int *blank_pgdir) |
| @@ -575,8 +661,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
| 575 | pmd_t *pmd_table; | 661 | pmd_t *pmd_table; |
| 576 | #endif | 662 | #endif |
| 577 | 663 | ||
| 578 | /* We pick one entry at random to throw out. Choosing the Least | 664 | /* |
| 579 | * Recently Used might be better, but this is easy. */ | 665 | * We pick one entry at random to throw out. Choosing the Least |
| 666 | * Recently Used might be better, but this is easy. | ||
| 667 | */ | ||
| 580 | next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); | 668 | next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); |
| 581 | /* If it's never been allocated at all before, try now. */ | 669 | /* If it's never been allocated at all before, try now. */ |
| 582 | if (!cpu->lg->pgdirs[next].pgdir) { | 670 | if (!cpu->lg->pgdirs[next].pgdir) { |
| @@ -587,8 +675,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
| 587 | next = cpu->cpu_pgd; | 675 | next = cpu->cpu_pgd; |
| 588 | else { | 676 | else { |
| 589 | #ifdef CONFIG_X86_PAE | 677 | #ifdef CONFIG_X86_PAE |
| 590 | /* In PAE mode, allocate a pmd page and populate the | 678 | /* |
| 591 | * last pgd entry. */ | 679 | * In PAE mode, allocate a pmd page and populate the |
| 680 | * last pgd entry. | ||
| 681 | */ | ||
| 592 | pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); | 682 | pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); |
| 593 | if (!pmd_table) { | 683 | if (!pmd_table) { |
| 594 | free_page((long)cpu->lg->pgdirs[next].pgdir); | 684 | free_page((long)cpu->lg->pgdirs[next].pgdir); |
| @@ -598,8 +688,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
| 598 | set_pgd(cpu->lg->pgdirs[next].pgdir + | 688 | set_pgd(cpu->lg->pgdirs[next].pgdir + |
| 599 | SWITCHER_PGD_INDEX, | 689 | SWITCHER_PGD_INDEX, |
| 600 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 690 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
| 601 | /* This is a blank page, so there are no kernel | 691 | /* |
| 602 | * mappings: caller must map the stack! */ | 692 | * This is a blank page, so there are no kernel |
| 693 | * mappings: caller must map the stack! | ||
| 694 | */ | ||
| 603 | *blank_pgdir = 1; | 695 | *blank_pgdir = 1; |
| 604 | } | 696 | } |
| 605 | #else | 697 | #else |
| @@ -615,19 +707,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
| 615 | return next; | 707 | return next; |
| 616 | } | 708 | } |
| 617 | 709 | ||
| 618 | /*H:430 (iv) Switching page tables | 710 | /*H:430 |
| 711 | * (iv) Switching page tables | ||
| 619 | * | 712 | * |
| 620 | * Now we've seen all the page table setting and manipulation, let's see | 713 | * Now we've seen all the page table setting and manipulation, let's see |
| 621 | * what happens when the Guest changes page tables (ie. changes the top-level | 714 | * what happens when the Guest changes page tables (ie. changes the top-level |
| 622 | * pgdir). This occurs on almost every context switch. */ | 715 | * pgdir). This occurs on almost every context switch. |
| 716 | */ | ||
| 623 | void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) | 717 | void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) |
| 624 | { | 718 | { |
| 625 | int newpgdir, repin = 0; | 719 | int newpgdir, repin = 0; |
| 626 | 720 | ||
| 627 | /* Look to see if we have this one already. */ | 721 | /* Look to see if we have this one already. */ |
| 628 | newpgdir = find_pgdir(cpu->lg, pgtable); | 722 | newpgdir = find_pgdir(cpu->lg, pgtable); |
| 629 | /* If not, we allocate or mug an existing one: if it's a fresh one, | 723 | /* |
| 630 | * repin gets set to 1. */ | 724 | * If not, we allocate or mug an existing one: if it's a fresh one, |
| 725 | * repin gets set to 1. | ||
| 726 | */ | ||
| 631 | if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) | 727 | if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) |
| 632 | newpgdir = new_pgdir(cpu, pgtable, &repin); | 728 | newpgdir = new_pgdir(cpu, pgtable, &repin); |
| 633 | /* Change the current pgd index to the new one. */ | 729 | /* Change the current pgd index to the new one. */ |
| @@ -637,9 +733,11 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) | |||
| 637 | pin_stack_pages(cpu); | 733 | pin_stack_pages(cpu); |
| 638 | } | 734 | } |
| 639 | 735 | ||
| 640 | /*H:470 Finally, a routine which throws away everything: all PGD entries in all | 736 | /*H:470 |
| 737 | * Finally, a routine which throws away everything: all PGD entries in all | ||
| 641 | * the shadow page tables, including the Guest's kernel mappings. This is used | 738 | * the shadow page tables, including the Guest's kernel mappings. This is used |
| 642 | * when we destroy the Guest. */ | 739 | * when we destroy the Guest. |
| 740 | */ | ||
| 643 | static void release_all_pagetables(struct lguest *lg) | 741 | static void release_all_pagetables(struct lguest *lg) |
| 644 | { | 742 | { |
| 645 | unsigned int i, j; | 743 | unsigned int i, j; |
| @@ -656,8 +754,10 @@ static void release_all_pagetables(struct lguest *lg) | |||
| 656 | spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; | 754 | spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; |
| 657 | pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); | 755 | pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); |
| 658 | 756 | ||
| 659 | /* And release the pmd entries of that pmd page, | 757 | /* |
| 660 | * except for the switcher pmd. */ | 758 | * And release the pmd entries of that pmd page, |
| 759 | * except for the switcher pmd. | ||
| 760 | */ | ||
| 661 | for (k = 0; k < SWITCHER_PMD_INDEX; k++) | 761 | for (k = 0; k < SWITCHER_PMD_INDEX; k++) |
| 662 | release_pmd(&pmdpage[k]); | 762 | release_pmd(&pmdpage[k]); |
| 663 | #endif | 763 | #endif |
| @@ -667,10 +767,12 @@ static void release_all_pagetables(struct lguest *lg) | |||
| 667 | } | 767 | } |
| 668 | } | 768 | } |
| 669 | 769 | ||
| 670 | /* We also throw away everything when a Guest tells us it's changed a kernel | 770 | /* |
| 771 | * We also throw away everything when a Guest tells us it's changed a kernel | ||
| 671 | * mapping. Since kernel mappings are in every page table, it's easiest to | 772 | * mapping. Since kernel mappings are in every page table, it's easiest to |
| 672 | * throw them all away. This traps the Guest in amber for a while as | 773 | * throw them all away. This traps the Guest in amber for a while as |
| 673 | * everything faults back in, but it's rare. */ | 774 | * everything faults back in, but it's rare. |
| 775 | */ | ||
| 674 | void guest_pagetable_clear_all(struct lg_cpu *cpu) | 776 | void guest_pagetable_clear_all(struct lg_cpu *cpu) |
| 675 | { | 777 | { |
| 676 | release_all_pagetables(cpu->lg); | 778 | release_all_pagetables(cpu->lg); |
| @@ -678,15 +780,19 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) | |||
| 678 | pin_stack_pages(cpu); | 780 | pin_stack_pages(cpu); |
| 679 | } | 781 | } |
| 680 | /*:*/ | 782 | /*:*/ |
| 681 | /*M:009 Since we throw away all mappings when a kernel mapping changes, our | 783 | |
| 784 | /*M:009 | ||
| 785 | * Since we throw away all mappings when a kernel mapping changes, our | ||
| 682 | * performance sucks for guests using highmem. In fact, a guest with | 786 | * performance sucks for guests using highmem. In fact, a guest with |
| 683 | * PAGE_OFFSET 0xc0000000 (the default) and more than about 700MB of RAM is | 787 | * PAGE_OFFSET 0xc0000000 (the default) and more than about 700MB of RAM is |
| 684 | * usually slower than a Guest with less memory. | 788 | * usually slower than a Guest with less memory. |
| 685 | * | 789 | * |
| 686 | * This, of course, cannot be fixed. It would take some kind of... well, I | 790 | * This, of course, cannot be fixed. It would take some kind of... well, I |
| 687 | * don't know, but the term "puissant code-fu" comes to mind. :*/ | 791 | * don't know, but the term "puissant code-fu" comes to mind. |
| 792 | :*/ | ||
| 688 | 793 | ||
| 689 | /*H:420 This is the routine which actually sets the page table entry for then | 794 | /*H:420 |
| 795 | * This is the routine which actually sets the page table entry for then | ||
| 690 | * "idx"'th shadow page table. | 796 | * "idx"'th shadow page table. |
| 691 | * | 797 | * |
| 692 | * Normally, we can just throw out the old entry and replace it with 0: if they | 798 | * Normally, we can just throw out the old entry and replace it with 0: if they |
| @@ -715,31 +821,36 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, | |||
| 715 | spmd = spmd_addr(cpu, *spgd, vaddr); | 821 | spmd = spmd_addr(cpu, *spgd, vaddr); |
| 716 | if (pmd_flags(*spmd) & _PAGE_PRESENT) { | 822 | if (pmd_flags(*spmd) & _PAGE_PRESENT) { |
| 717 | #endif | 823 | #endif |
| 718 | /* Otherwise, we start by releasing | 824 | /* Otherwise, start by releasing the existing entry. */ |
| 719 | * the existing entry. */ | ||
| 720 | pte_t *spte = spte_addr(cpu, *spgd, vaddr); | 825 | pte_t *spte = spte_addr(cpu, *spgd, vaddr); |
| 721 | release_pte(*spte); | 826 | release_pte(*spte); |
| 722 | 827 | ||
| 723 | /* If they're setting this entry as dirty or accessed, | 828 | /* |
| 724 | * we might as well put that entry they've given us | 829 | * If they're setting this entry as dirty or accessed, |
| 725 | * in now. This shaves 10% off a | 830 | * we might as well put that entry they've given us in |
| 726 | * copy-on-write micro-benchmark. */ | 831 | * now. This shaves 10% off a copy-on-write |
| 832 | * micro-benchmark. | ||
| 833 | */ | ||
| 727 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { | 834 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { |
| 728 | check_gpte(cpu, gpte); | 835 | check_gpte(cpu, gpte); |
| 729 | native_set_pte(spte, | 836 | native_set_pte(spte, |
| 730 | gpte_to_spte(cpu, gpte, | 837 | gpte_to_spte(cpu, gpte, |
| 731 | pte_flags(gpte) & _PAGE_DIRTY)); | 838 | pte_flags(gpte) & _PAGE_DIRTY)); |
| 732 | } else | 839 | } else { |
| 733 | /* Otherwise kill it and we can demand_page() | 840 | /* |
| 734 | * it in later. */ | 841 | * Otherwise kill it and we can demand_page() |
| 842 | * it in later. | ||
| 843 | */ | ||
| 735 | native_set_pte(spte, __pte(0)); | 844 | native_set_pte(spte, __pte(0)); |
| 845 | } | ||
| 736 | #ifdef CONFIG_X86_PAE | 846 | #ifdef CONFIG_X86_PAE |
| 737 | } | 847 | } |
| 738 | #endif | 848 | #endif |
| 739 | } | 849 | } |
| 740 | } | 850 | } |
| 741 | 851 | ||
| 742 | /*H:410 Updating a PTE entry is a little trickier. | 852 | /*H:410 |
| 853 | * Updating a PTE entry is a little trickier. | ||
| 743 | * | 854 | * |
| 744 | * We keep track of several different page tables (the Guest uses one for each | 855 | * We keep track of several different page tables (the Guest uses one for each |
| 745 | * process, so it makes sense to cache at least a few). Each of these have | 856 | * process, so it makes sense to cache at least a few). Each of these have |
| @@ -748,12 +859,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, | |||
| 748 | * all the page tables, not just the current one. This is rare. | 859 | * all the page tables, not just the current one. This is rare. |
| 749 | * | 860 | * |
| 750 | * The benefit is that when we have to track a new page table, we can keep all | 861 | * The benefit is that when we have to track a new page table, we can keep all |
| 751 | * the kernel mappings. This speeds up context switch immensely. */ | 862 | * the kernel mappings. This speeds up context switch immensely. |
| 863 | */ | ||
| 752 | void guest_set_pte(struct lg_cpu *cpu, | 864 | void guest_set_pte(struct lg_cpu *cpu, |
| 753 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) | 865 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) |
| 754 | { | 866 | { |
| 755 | /* Kernel mappings must be changed on all top levels. Slow, but doesn't | 867 | /* |
| 756 | * happen often. */ | 868 | * Kernel mappings must be changed on all top levels. Slow, but doesn't |
| 869 | * happen often. | ||
| 870 | */ | ||
| 757 | if (vaddr >= cpu->lg->kernel_address) { | 871 | if (vaddr >= cpu->lg->kernel_address) { |
| 758 | unsigned int i; | 872 | unsigned int i; |
| 759 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) | 873 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) |
| @@ -795,19 +909,25 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) | |||
| 795 | /* ... throw it away. */ | 909 | /* ... throw it away. */ |
| 796 | release_pgd(lg->pgdirs[pgdir].pgdir + idx); | 910 | release_pgd(lg->pgdirs[pgdir].pgdir + idx); |
| 797 | } | 911 | } |
| 912 | |||
| 798 | #ifdef CONFIG_X86_PAE | 913 | #ifdef CONFIG_X86_PAE |
| 914 | /* For setting a mid-level, we just throw everything away. It's easy. */ | ||
| 799 | void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) | 915 | void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) |
| 800 | { | 916 | { |
| 801 | guest_pagetable_clear_all(&lg->cpus[0]); | 917 | guest_pagetable_clear_all(&lg->cpus[0]); |
| 802 | } | 918 | } |
| 803 | #endif | 919 | #endif |
| 804 | 920 | ||
| 805 | /* Once we know how much memory we have we can construct simple identity | 921 | /*H:505 |
| 806 | * (which set virtual == physical) and linear mappings | 922 | * To get through boot, we construct simple identity page mappings (which |
| 807 | * which will get the Guest far enough into the boot to create its own. | 923 | * set virtual == physical) and linear mappings which will get the Guest far |
| 924 | * enough into the boot to create its own. The linear mapping means we | ||
| 925 | * simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET, | ||
| 926 | * as you'll see. | ||
| 808 | * | 927 | * |
| 809 | * We lay them out of the way, just below the initrd (which is why we need to | 928 | * We lay them out of the way, just below the initrd (which is why we need to |
| 810 | * know its size here). */ | 929 | * know its size here). |
| 930 | */ | ||
| 811 | static unsigned long setup_pagetables(struct lguest *lg, | 931 | static unsigned long setup_pagetables(struct lguest *lg, |
| 812 | unsigned long mem, | 932 | unsigned long mem, |
| 813 | unsigned long initrd_size) | 933 | unsigned long initrd_size) |
| @@ -825,8 +945,10 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
| 825 | unsigned int phys_linear; | 945 | unsigned int phys_linear; |
| 826 | #endif | 946 | #endif |
| 827 | 947 | ||
| 828 | /* We have mapped_pages frames to map, so we need | 948 | /* |
| 829 | * linear_pages page tables to map them. */ | 949 | * We have mapped_pages frames to map, so we need linear_pages page |
| 950 | * tables to map them. | ||
| 951 | */ | ||
| 830 | mapped_pages = mem / PAGE_SIZE; | 952 | mapped_pages = mem / PAGE_SIZE; |
| 831 | linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; | 953 | linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; |
| 832 | 954 | ||
| @@ -837,10 +959,16 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
| 837 | linear = (void *)pgdir - linear_pages * PAGE_SIZE; | 959 | linear = (void *)pgdir - linear_pages * PAGE_SIZE; |
| 838 | 960 | ||
| 839 | #ifdef CONFIG_X86_PAE | 961 | #ifdef CONFIG_X86_PAE |
| 962 | /* | ||
| 963 | * And the single mid page goes below that. We only use one, but | ||
| 964 | * that's enough to map 1G, which definitely gets us through boot. | ||
| 965 | */ | ||
| 840 | pmds = (void *)linear - PAGE_SIZE; | 966 | pmds = (void *)linear - PAGE_SIZE; |
| 841 | #endif | 967 | #endif |
| 842 | /* Linear mapping is easy: put every page's address into the | 968 | /* |
| 843 | * mapping in order. */ | 969 | * Linear mapping is easy: put every page's address into the |
| 970 | * mapping in order. | ||
| 971 | */ | ||
| 844 | for (i = 0; i < mapped_pages; i++) { | 972 | for (i = 0; i < mapped_pages; i++) { |
| 845 | pte_t pte; | 973 | pte_t pte; |
| 846 | pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); | 974 | pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); |
| @@ -848,11 +976,14 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
| 848 | return -EFAULT; | 976 | return -EFAULT; |
| 849 | } | 977 | } |
| 850 | 978 | ||
| 851 | /* The top level points to the linear page table pages above. | ||
| 852 | * We setup the identity and linear mappings here. */ | ||
| 853 | #ifdef CONFIG_X86_PAE | 979 | #ifdef CONFIG_X86_PAE |
| 980 | /* | ||
| 981 | * Make the Guest PMD entries point to the corresponding place in the | ||
| 982 | * linear mapping (up to one page worth of PMD). | ||
| 983 | */ | ||
| 854 | for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; | 984 | for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; |
| 855 | i += PTRS_PER_PTE, j++) { | 985 | i += PTRS_PER_PTE, j++) { |
| 986 | /* FIXME: native_set_pmd is overkill here. */ | ||
| 856 | native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i) | 987 | native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i) |
| 857 | - mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); | 988 | - mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); |
| 858 | 989 | ||
| @@ -860,18 +991,36 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
| 860 | return -EFAULT; | 991 | return -EFAULT; |
| 861 | } | 992 | } |
| 862 | 993 | ||
| 994 | /* One PGD entry, pointing to that PMD page. */ | ||
| 863 | set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT)); | 995 | set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT)); |
| 996 | /* Copy it in as the first PGD entry (ie. addresses 0-1G). */ | ||
| 864 | if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0) | 997 | if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0) |
| 865 | return -EFAULT; | 998 | return -EFAULT; |
| 999 | /* | ||
| 1000 | * And the third PGD entry (ie. addresses 3G-4G). | ||
| 1001 | * | ||
| 1002 | * FIXME: This assumes that PAGE_OFFSET for the Guest is 0xC0000000. | ||
| 1003 | */ | ||
| 866 | if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0) | 1004 | if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0) |
| 867 | return -EFAULT; | 1005 | return -EFAULT; |
| 868 | #else | 1006 | #else |
| 1007 | /* | ||
| 1008 | * The top level points to the linear page table pages above. | ||
| 1009 | * We setup the identity and linear mappings here. | ||
| 1010 | */ | ||
| 869 | phys_linear = (unsigned long)linear - mem_base; | 1011 | phys_linear = (unsigned long)linear - mem_base; |
| 870 | for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { | 1012 | for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { |
| 871 | pgd_t pgd; | 1013 | pgd_t pgd; |
| 1014 | /* | ||
| 1015 | * Create a PGD entry which points to the right part of the | ||
| 1016 | * linear PTE pages. | ||
| 1017 | */ | ||
| 872 | pgd = __pgd((phys_linear + i * sizeof(pte_t)) | | 1018 | pgd = __pgd((phys_linear + i * sizeof(pte_t)) | |
| 873 | (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); | 1019 | (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); |
| 874 | 1020 | ||
| 1021 | /* | ||
| 1022 | * Copy it into the PGD page at 0 and PAGE_OFFSET. | ||
| 1023 | */ | ||
| 875 | if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) | 1024 | if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) |
| 876 | || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) | 1025 | || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) |
| 877 | + i / PTRS_PER_PTE], | 1026 | + i / PTRS_PER_PTE], |
| @@ -880,15 +1029,19 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
| 880 | } | 1029 | } |
| 881 | #endif | 1030 | #endif |
| 882 | 1031 | ||
| 883 | /* We return the top level (guest-physical) address: remember where | 1032 | /* |
| 884 | * this is. */ | 1033 | * We return the top level (guest-physical) address: we remember where |
| 1034 | * this is to write it into lguest_data when the Guest initializes. | ||
| 1035 | */ | ||
| 885 | return (unsigned long)pgdir - mem_base; | 1036 | return (unsigned long)pgdir - mem_base; |
| 886 | } | 1037 | } |
| 887 | 1038 | ||
| 888 | /*H:500 (vii) Setting up the page tables initially. | 1039 | /*H:500 |
| 1040 | * (vii) Setting up the page tables initially. | ||
| 889 | * | 1041 | * |
| 890 | * When a Guest is first created, the Launcher tells us where the toplevel of | 1042 | * When a Guest is first created, the Launcher tells us where the toplevel of |
| 891 | * its first page table is. We set some things up here: */ | 1043 | * its first page table is. We set some things up here: |
| 1044 | */ | ||
| 892 | int init_guest_pagetable(struct lguest *lg) | 1045 | int init_guest_pagetable(struct lguest *lg) |
| 893 | { | 1046 | { |
| 894 | u64 mem; | 1047 | u64 mem; |
| @@ -898,21 +1051,27 @@ int init_guest_pagetable(struct lguest *lg) | |||
| 898 | pgd_t *pgd; | 1051 | pgd_t *pgd; |
| 899 | pmd_t *pmd_table; | 1052 | pmd_t *pmd_table; |
| 900 | #endif | 1053 | #endif |
| 901 | /* Get the Guest memory size and the ramdisk size from the boot header | 1054 | /* |
| 902 | * located at lg->mem_base (Guest address 0). */ | 1055 | * Get the Guest memory size and the ramdisk size from the boot header |
| 1056 | * located at lg->mem_base (Guest address 0). | ||
| 1057 | */ | ||
| 903 | if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) | 1058 | if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) |
| 904 | || get_user(initrd_size, &boot->hdr.ramdisk_size)) | 1059 | || get_user(initrd_size, &boot->hdr.ramdisk_size)) |
| 905 | return -EFAULT; | 1060 | return -EFAULT; |
| 906 | 1061 | ||
| 907 | /* We start on the first shadow page table, and give it a blank PGD | 1062 | /* |
| 908 | * page. */ | 1063 | * We start on the first shadow page table, and give it a blank PGD |
| 1064 | * page. | ||
| 1065 | */ | ||
| 909 | lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); | 1066 | lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); |
| 910 | if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) | 1067 | if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) |
| 911 | return lg->pgdirs[0].gpgdir; | 1068 | return lg->pgdirs[0].gpgdir; |
| 912 | lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); | 1069 | lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); |
| 913 | if (!lg->pgdirs[0].pgdir) | 1070 | if (!lg->pgdirs[0].pgdir) |
| 914 | return -ENOMEM; | 1071 | return -ENOMEM; |
| 1072 | |||
| 915 | #ifdef CONFIG_X86_PAE | 1073 | #ifdef CONFIG_X86_PAE |
| 1074 | /* For PAE, we also create the initial mid-level. */ | ||
| 916 | pgd = lg->pgdirs[0].pgdir; | 1075 | pgd = lg->pgdirs[0].pgdir; |
| 917 | pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL); | 1076 | pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL); |
| 918 | if (!pmd_table) | 1077 | if (!pmd_table) |
| @@ -921,27 +1080,33 @@ int init_guest_pagetable(struct lguest *lg) | |||
| 921 | set_pgd(pgd + SWITCHER_PGD_INDEX, | 1080 | set_pgd(pgd + SWITCHER_PGD_INDEX, |
| 922 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 1081 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
| 923 | #endif | 1082 | #endif |
| 1083 | |||
| 1084 | /* This is the current page table. */ | ||
| 924 | lg->cpus[0].cpu_pgd = 0; | 1085 | lg->cpus[0].cpu_pgd = 0; |
| 925 | return 0; | 1086 | return 0; |
| 926 | } | 1087 | } |
| 927 | 1088 | ||
| 928 | /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ | 1089 | /*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ |
| 929 | void page_table_guest_data_init(struct lg_cpu *cpu) | 1090 | void page_table_guest_data_init(struct lg_cpu *cpu) |
| 930 | { | 1091 | { |
| 931 | /* We get the kernel address: above this is all kernel memory. */ | 1092 | /* We get the kernel address: above this is all kernel memory. */ |
| 932 | if (get_user(cpu->lg->kernel_address, | 1093 | if (get_user(cpu->lg->kernel_address, |
| 933 | &cpu->lg->lguest_data->kernel_address) | 1094 | &cpu->lg->lguest_data->kernel_address) |
| 934 | /* We tell the Guest that it can't use the top 2 or 4 MB | 1095 | /* |
| 935 | * of virtual addresses used by the Switcher. */ | 1096 | * We tell the Guest that it can't use the top 2 or 4 MB |
| 1097 | * of virtual addresses used by the Switcher. | ||
| 1098 | */ | ||
| 936 | || put_user(RESERVE_MEM * 1024 * 1024, | 1099 | || put_user(RESERVE_MEM * 1024 * 1024, |
| 937 | &cpu->lg->lguest_data->reserve_mem) | 1100 | &cpu->lg->lguest_data->reserve_mem) |
| 938 | || put_user(cpu->lg->pgdirs[0].gpgdir, | 1101 | || put_user(cpu->lg->pgdirs[0].gpgdir, |
| 939 | &cpu->lg->lguest_data->pgdir)) | 1102 | &cpu->lg->lguest_data->pgdir)) |
| 940 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); | 1103 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
| 941 | 1104 | ||
| 942 | /* In flush_user_mappings() we loop from 0 to | 1105 | /* |
| 1106 | * In flush_user_mappings() we loop from 0 to | ||
| 943 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the | 1107 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the |
| 944 | * Switcher mappings, so check that now. */ | 1108 | * Switcher mappings, so check that now. |
| 1109 | */ | ||
| 945 | #ifdef CONFIG_X86_PAE | 1110 | #ifdef CONFIG_X86_PAE |
| 946 | if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX && | 1111 | if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX && |
| 947 | pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX) | 1112 | pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX) |
| @@ -964,12 +1129,14 @@ void free_guest_pagetable(struct lguest *lg) | |||
| 964 | free_page((long)lg->pgdirs[i].pgdir); | 1129 | free_page((long)lg->pgdirs[i].pgdir); |
| 965 | } | 1130 | } |
| 966 | 1131 | ||
| 967 | /*H:480 (vi) Mapping the Switcher when the Guest is about to run. | 1132 | /*H:480 |
| 1133 | * (vi) Mapping the Switcher when the Guest is about to run. | ||
| 968 | * | 1134 | * |
| 969 | * The Switcher and the two pages for this CPU need to be visible in the | 1135 | * The Switcher and the two pages for this CPU need to be visible in the |
| 970 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages | 1136 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages |
| 971 | * for each CPU already set up, we just need to hook them in now we know which | 1137 | * for each CPU already set up, we just need to hook them in now we know which |
| 972 | * Guest is about to run on this CPU. */ | 1138 | * Guest is about to run on this CPU. |
| 1139 | */ | ||
| 973 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) | 1140 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) |
| 974 | { | 1141 | { |
| 975 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); | 1142 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); |
| @@ -980,30 +1147,38 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) | |||
| 980 | pmd_t switcher_pmd; | 1147 | pmd_t switcher_pmd; |
| 981 | pmd_t *pmd_table; | 1148 | pmd_t *pmd_table; |
| 982 | 1149 | ||
| 1150 | /* FIXME: native_set_pmd is overkill here. */ | ||
| 983 | native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >> | 1151 | native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >> |
| 984 | PAGE_SHIFT, PAGE_KERNEL_EXEC)); | 1152 | PAGE_SHIFT, PAGE_KERNEL_EXEC)); |
| 985 | 1153 | ||
| 1154 | /* Figure out where the pmd page is, by reading the PGD, and converting | ||
| 1155 | * it to a virtual address. */ | ||
| 986 | pmd_table = __va(pgd_pfn(cpu->lg-> | 1156 | pmd_table = __va(pgd_pfn(cpu->lg-> |
| 987 | pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) | 1157 | pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) |
| 988 | << PAGE_SHIFT); | 1158 | << PAGE_SHIFT); |
| 1159 | /* Now write it into the shadow page table. */ | ||
| 989 | native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); | 1160 | native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); |
| 990 | #else | 1161 | #else |
| 991 | pgd_t switcher_pgd; | 1162 | pgd_t switcher_pgd; |
| 992 | 1163 | ||
| 993 | /* Make the last PGD entry for this Guest point to the Switcher's PTE | 1164 | /* |
| 994 | * page for this CPU (with appropriate flags). */ | 1165 | * Make the last PGD entry for this Guest point to the Switcher's PTE |
| 1166 | * page for this CPU (with appropriate flags). | ||
| 1167 | */ | ||
| 995 | switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); | 1168 | switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); |
| 996 | 1169 | ||
| 997 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; | 1170 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; |
| 998 | 1171 | ||
| 999 | #endif | 1172 | #endif |
| 1000 | /* We also change the Switcher PTE page. When we're running the Guest, | 1173 | /* |
| 1174 | * We also change the Switcher PTE page. When we're running the Guest, | ||
| 1001 | * we want the Guest's "regs" page to appear where the first Switcher | 1175 | * we want the Guest's "regs" page to appear where the first Switcher |
| 1002 | * page for this CPU is. This is an optimization: when the Switcher | 1176 | * page for this CPU is. This is an optimization: when the Switcher |
| 1003 | * saves the Guest registers, it saves them into the first page of this | 1177 | * saves the Guest registers, it saves them into the first page of this |
| 1004 | * CPU's "struct lguest_pages": if we make sure the Guest's register | 1178 | * CPU's "struct lguest_pages": if we make sure the Guest's register |
| 1005 | * page is already mapped there, we don't have to copy them out | 1179 | * page is already mapped there, we don't have to copy them out |
| 1006 | * again. */ | 1180 | * again. |
| 1181 | */ | ||
| 1007 | pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; | 1182 | pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; |
| 1008 | native_set_pte(®s_pte, pfn_pte(pfn, PAGE_KERNEL)); | 1183 | native_set_pte(®s_pte, pfn_pte(pfn, PAGE_KERNEL)); |
| 1009 | native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], | 1184 | native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], |
| @@ -1019,10 +1194,12 @@ static void free_switcher_pte_pages(void) | |||
| 1019 | free_page((long)switcher_pte_page(i)); | 1194 | free_page((long)switcher_pte_page(i)); |
| 1020 | } | 1195 | } |
| 1021 | 1196 | ||
| 1022 | /*H:520 Setting up the Switcher PTE page for given CPU is fairly easy, given | 1197 | /*H:520 |
| 1198 | * Setting up the Switcher PTE page for given CPU is fairly easy, given | ||
| 1023 | * the CPU number and the "struct page"s for the Switcher code itself. | 1199 | * the CPU number and the "struct page"s for the Switcher code itself. |
| 1024 | * | 1200 | * |
| 1025 | * Currently the Switcher is less than a page long, so "pages" is always 1. */ | 1201 | * Currently the Switcher is less than a page long, so "pages" is always 1. |
| 1202 | */ | ||
| 1026 | static __init void populate_switcher_pte_page(unsigned int cpu, | 1203 | static __init void populate_switcher_pte_page(unsigned int cpu, |
| 1027 | struct page *switcher_page[], | 1204 | struct page *switcher_page[], |
| 1028 | unsigned int pages) | 1205 | unsigned int pages) |
| @@ -1043,13 +1220,16 @@ static __init void populate_switcher_pte_page(unsigned int cpu, | |||
| 1043 | native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), | 1220 | native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), |
| 1044 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); | 1221 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); |
| 1045 | 1222 | ||
| 1046 | /* The second page contains the "struct lguest_ro_state", and is | 1223 | /* |
| 1047 | * read-only. */ | 1224 | * The second page contains the "struct lguest_ro_state", and is |
| 1225 | * read-only. | ||
| 1226 | */ | ||
| 1048 | native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), | 1227 | native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), |
| 1049 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); | 1228 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); |
| 1050 | } | 1229 | } |
| 1051 | 1230 | ||
| 1052 | /* We've made it through the page table code. Perhaps our tired brains are | 1231 | /* |
| 1232 | * We've made it through the page table code. Perhaps our tired brains are | ||
| 1053 | * still processing the details, or perhaps we're simply glad it's over. | 1233 | * still processing the details, or perhaps we're simply glad it's over. |
| 1054 | * | 1234 | * |
| 1055 | * If nothing else, note that all this complexity in juggling shadow page tables | 1235 | * If nothing else, note that all this complexity in juggling shadow page tables |
| @@ -1058,10 +1238,13 @@ static __init void populate_switcher_pte_page(unsigned int cpu, | |||
| 1058 | * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD | 1238 | * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD |
| 1059 | * have implemented shadow page table support directly into hardware. | 1239 | * have implemented shadow page table support directly into hardware. |
| 1060 | * | 1240 | * |
| 1061 | * There is just one file remaining in the Host. */ | 1241 | * There is just one file remaining in the Host. |
| 1242 | */ | ||
| 1062 | 1243 | ||
| 1063 | /*H:510 At boot or module load time, init_pagetables() allocates and populates | 1244 | /*H:510 |
| 1064 | * the Switcher PTE page for each CPU. */ | 1245 | * At boot or module load time, init_pagetables() allocates and populates |
| 1246 | * the Switcher PTE page for each CPU. | ||
| 1247 | */ | ||
| 1065 | __init int init_pagetables(struct page **switcher_page, unsigned int pages) | 1248 | __init int init_pagetables(struct page **switcher_page, unsigned int pages) |
| 1066 | { | 1249 | { |
| 1067 | unsigned int i; | 1250 | unsigned int i; |
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 482ed5a18750..951c57b0a7e0 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | /*P:600 The x86 architecture has segments, which involve a table of descriptors | 1 | /*P:600 |
| 2 | * The x86 architecture has segments, which involve a table of descriptors | ||
| 2 | * which can be used to do funky things with virtual address interpretation. | 3 | * which can be used to do funky things with virtual address interpretation. |
| 3 | * We originally used to use segments so the Guest couldn't alter the | 4 | * We originally used to use segments so the Guest couldn't alter the |
| 4 | * Guest<->Host Switcher, and then we had to trim Guest segments, and restore | 5 | * Guest<->Host Switcher, and then we had to trim Guest segments, and restore |
| @@ -8,7 +9,8 @@ | |||
| 8 | * | 9 | * |
| 9 | * In these modern times, the segment handling code consists of simple sanity | 10 | * In these modern times, the segment handling code consists of simple sanity |
| 10 | * checks, and the worst you'll experience reading this code is butterfly-rash | 11 | * checks, and the worst you'll experience reading this code is butterfly-rash |
| 11 | * from frolicking through its parklike serenity. :*/ | 12 | * from frolicking through its parklike serenity. |
| 13 | :*/ | ||
| 12 | #include "lg.h" | 14 | #include "lg.h" |
| 13 | 15 | ||
| 14 | /*H:600 | 16 | /*H:600 |
| @@ -41,10 +43,12 @@ | |||
| 41 | * begin. | 43 | * begin. |
| 42 | */ | 44 | */ |
| 43 | 45 | ||
| 44 | /* There are several entries we don't let the Guest set. The TSS entry is the | 46 | /* |
| 47 | * There are several entries we don't let the Guest set. The TSS entry is the | ||
| 45 | * "Task State Segment" which controls all kinds of delicate things. The | 48 | * "Task State Segment" which controls all kinds of delicate things. The |
| 46 | * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the | 49 | * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the |
| 47 | * the Guest can't be trusted to deal with double faults. */ | 50 | * the Guest can't be trusted to deal with double faults. |
| 51 | */ | ||
| 48 | static bool ignored_gdt(unsigned int num) | 52 | static bool ignored_gdt(unsigned int num) |
| 49 | { | 53 | { |
| 50 | return (num == GDT_ENTRY_TSS | 54 | return (num == GDT_ENTRY_TSS |
| @@ -53,42 +57,52 @@ static bool ignored_gdt(unsigned int num) | |||
| 53 | || num == GDT_ENTRY_DOUBLEFAULT_TSS); | 57 | || num == GDT_ENTRY_DOUBLEFAULT_TSS); |
| 54 | } | 58 | } |
| 55 | 59 | ||
| 56 | /*H:630 Once the Guest gave us new GDT entries, we fix them up a little. We | 60 | /*H:630 |
| 61 | * Once the Guest gave us new GDT entries, we fix them up a little. We | ||
| 57 | * don't care if they're invalid: the worst that can happen is a General | 62 | * don't care if they're invalid: the worst that can happen is a General |
| 58 | * Protection Fault in the Switcher when it restores a Guest segment register | 63 | * Protection Fault in the Switcher when it restores a Guest segment register |
| 59 | * which tries to use that entry. Then we kill the Guest for causing such a | 64 | * which tries to use that entry. Then we kill the Guest for causing such a |
| 60 | * mess: the message will be "unhandled trap 256". */ | 65 | * mess: the message will be "unhandled trap 256". |
| 66 | */ | ||
| 61 | static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) | 67 | static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) |
| 62 | { | 68 | { |
| 63 | unsigned int i; | 69 | unsigned int i; |
| 64 | 70 | ||
| 65 | for (i = start; i < end; i++) { | 71 | for (i = start; i < end; i++) { |
| 66 | /* We never copy these ones to real GDT, so we don't care what | 72 | /* |
| 67 | * they say */ | 73 | * We never copy these ones to real GDT, so we don't care what |
| 74 | * they say | ||
| 75 | */ | ||
| 68 | if (ignored_gdt(i)) | 76 | if (ignored_gdt(i)) |
| 69 | continue; | 77 | continue; |
| 70 | 78 | ||
| 71 | /* Segment descriptors contain a privilege level: the Guest is | 79 | /* |
| 80 | * Segment descriptors contain a privilege level: the Guest is | ||
| 72 | * sometimes careless and leaves this as 0, even though it's | 81 | * sometimes careless and leaves this as 0, even though it's |
| 73 | * running at privilege level 1. If so, we fix it here. */ | 82 | * running at privilege level 1. If so, we fix it here. |
| 83 | */ | ||
| 74 | if ((cpu->arch.gdt[i].b & 0x00006000) == 0) | 84 | if ((cpu->arch.gdt[i].b & 0x00006000) == 0) |
| 75 | cpu->arch.gdt[i].b |= (GUEST_PL << 13); | 85 | cpu->arch.gdt[i].b |= (GUEST_PL << 13); |
| 76 | 86 | ||
| 77 | /* Each descriptor has an "accessed" bit. If we don't set it | 87 | /* |
| 88 | * Each descriptor has an "accessed" bit. If we don't set it | ||
| 78 | * now, the CPU will try to set it when the Guest first loads | 89 | * now, the CPU will try to set it when the Guest first loads |
| 79 | * that entry into a segment register. But the GDT isn't | 90 | * that entry into a segment register. But the GDT isn't |
| 80 | * writable by the Guest, so bad things can happen. */ | 91 | * writable by the Guest, so bad things can happen. |
| 92 | */ | ||
| 81 | cpu->arch.gdt[i].b |= 0x00000100; | 93 | cpu->arch.gdt[i].b |= 0x00000100; |
| 82 | } | 94 | } |
| 83 | } | 95 | } |
| 84 | 96 | ||
| 85 | /*H:610 Like the IDT, we never simply use the GDT the Guest gives us. We keep | 97 | /*H:610 |
| 98 | * Like the IDT, we never simply use the GDT the Guest gives us. We keep | ||
| 86 | * a GDT for each CPU, and copy across the Guest's entries each time we want to | 99 | * a GDT for each CPU, and copy across the Guest's entries each time we want to |
| 87 | * run the Guest on that CPU. | 100 | * run the Guest on that CPU. |
| 88 | * | 101 | * |
| 89 | * This routine is called at boot or modprobe time for each CPU to set up the | 102 | * This routine is called at boot or modprobe time for each CPU to set up the |
| 90 | * constant GDT entries: the ones which are the same no matter what Guest we're | 103 | * constant GDT entries: the ones which are the same no matter what Guest we're |
| 91 | * running. */ | 104 | * running. |
| 105 | */ | ||
| 92 | void setup_default_gdt_entries(struct lguest_ro_state *state) | 106 | void setup_default_gdt_entries(struct lguest_ro_state *state) |
| 93 | { | 107 | { |
| 94 | struct desc_struct *gdt = state->guest_gdt; | 108 | struct desc_struct *gdt = state->guest_gdt; |
| @@ -98,30 +112,37 @@ void setup_default_gdt_entries(struct lguest_ro_state *state) | |||
| 98 | gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; | 112 | gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; |
| 99 | gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; | 113 | gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; |
| 100 | 114 | ||
| 101 | /* The TSS segment refers to the TSS entry for this particular CPU. | 115 | /* |
| 116 | * The TSS segment refers to the TSS entry for this particular CPU. | ||
| 102 | * Forgive the magic flags: the 0x8900 means the entry is Present, it's | 117 | * Forgive the magic flags: the 0x8900 means the entry is Present, it's |
| 103 | * privilege level 0 Available 386 TSS system segment, and the 0x67 | 118 | * privilege level 0 Available 386 TSS system segment, and the 0x67 |
| 104 | * means Saturn is eclipsed by Mercury in the twelfth house. */ | 119 | * means Saturn is eclipsed by Mercury in the twelfth house. |
| 120 | */ | ||
| 105 | gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); | 121 | gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); |
| 106 | gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) | 122 | gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) |
| 107 | | ((tss >> 16) & 0x000000FF); | 123 | | ((tss >> 16) & 0x000000FF); |
| 108 | } | 124 | } |
| 109 | 125 | ||
| 110 | /* This routine sets up the initial Guest GDT for booting. All entries start | 126 | /* |
| 111 | * as 0 (unusable). */ | 127 | * This routine sets up the initial Guest GDT for booting. All entries start |
| 128 | * as 0 (unusable). | ||
| 129 | */ | ||
| 112 | void setup_guest_gdt(struct lg_cpu *cpu) | 130 | void setup_guest_gdt(struct lg_cpu *cpu) |
| 113 | { | 131 | { |
| 114 | /* Start with full 0-4G segments... */ | 132 | /* |
| 133 | * Start with full 0-4G segments...except the Guest is allowed to use | ||
| 134 | * them, so set the privilege level appropriately in the flags. | ||
| 135 | */ | ||
| 115 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; | 136 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; |
| 116 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; | 137 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; |
| 117 | /* ...except the Guest is allowed to use them, so set the privilege | ||
| 118 | * level appropriately in the flags. */ | ||
| 119 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); | 138 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); |
| 120 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); | 139 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); |
| 121 | } | 140 | } |
| 122 | 141 | ||
| 123 | /*H:650 An optimization of copy_gdt(), for just the three "thead-local storage" | 142 | /*H:650 |
| 124 | * entries. */ | 143 | * An optimization of copy_gdt(), for just the three "thead-local storage" |
| 144 | * entries. | ||
| 145 | */ | ||
| 125 | void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) | 146 | void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) |
| 126 | { | 147 | { |
| 127 | unsigned int i; | 148 | unsigned int i; |
| @@ -130,26 +151,34 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) | |||
| 130 | gdt[i] = cpu->arch.gdt[i]; | 151 | gdt[i] = cpu->arch.gdt[i]; |
| 131 | } | 152 | } |
| 132 | 153 | ||
| 133 | /*H:640 When the Guest is run on a different CPU, or the GDT entries have | 154 | /*H:640 |
| 134 | * changed, copy_gdt() is called to copy the Guest's GDT entries across to this | 155 | * When the Guest is run on a different CPU, or the GDT entries have changed, |
| 135 | * CPU's GDT. */ | 156 | * copy_gdt() is called to copy the Guest's GDT entries across to this CPU's |
| 157 | * GDT. | ||
| 158 | */ | ||
| 136 | void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) | 159 | void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) |
| 137 | { | 160 | { |
| 138 | unsigned int i; | 161 | unsigned int i; |
| 139 | 162 | ||
| 140 | /* The default entries from setup_default_gdt_entries() are not | 163 | /* |
| 141 | * replaced. See ignored_gdt() above. */ | 164 | * The default entries from setup_default_gdt_entries() are not |
| 165 | * replaced. See ignored_gdt() above. | ||
| 166 | */ | ||
| 142 | for (i = 0; i < GDT_ENTRIES; i++) | 167 | for (i = 0; i < GDT_ENTRIES; i++) |
| 143 | if (!ignored_gdt(i)) | 168 | if (!ignored_gdt(i)) |
| 144 | gdt[i] = cpu->arch.gdt[i]; | 169 | gdt[i] = cpu->arch.gdt[i]; |
| 145 | } | 170 | } |
| 146 | 171 | ||
| 147 | /*H:620 This is where the Guest asks us to load a new GDT entry | 172 | /*H:620 |
| 148 | * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. */ | 173 | * This is where the Guest asks us to load a new GDT entry |
| 174 | * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. | ||
| 175 | */ | ||
| 149 | void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) | 176 | void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) |
| 150 | { | 177 | { |
| 151 | /* We assume the Guest has the same number of GDT entries as the | 178 | /* |
| 152 | * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ | 179 | * We assume the Guest has the same number of GDT entries as the |
| 180 | * Host, otherwise we'd have to dynamically allocate the Guest GDT. | ||
| 181 | */ | ||
| 153 | if (num >= ARRAY_SIZE(cpu->arch.gdt)) | 182 | if (num >= ARRAY_SIZE(cpu->arch.gdt)) |
| 154 | kill_guest(cpu, "too many gdt entries %i", num); | 183 | kill_guest(cpu, "too many gdt entries %i", num); |
| 155 | 184 | ||
| @@ -157,15 +186,19 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) | |||
| 157 | cpu->arch.gdt[num].a = lo; | 186 | cpu->arch.gdt[num].a = lo; |
| 158 | cpu->arch.gdt[num].b = hi; | 187 | cpu->arch.gdt[num].b = hi; |
| 159 | fixup_gdt_table(cpu, num, num+1); | 188 | fixup_gdt_table(cpu, num, num+1); |
| 160 | /* Mark that the GDT changed so the core knows it has to copy it again, | 189 | /* |
| 161 | * even if the Guest is run on the same CPU. */ | 190 | * Mark that the GDT changed so the core knows it has to copy it again, |
| 191 | * even if the Guest is run on the same CPU. | ||
| 192 | */ | ||
| 162 | cpu->changed |= CHANGED_GDT; | 193 | cpu->changed |= CHANGED_GDT; |
| 163 | } | 194 | } |
| 164 | 195 | ||
| 165 | /* This is the fast-track version for just changing the three TLS entries. | 196 | /* |
| 197 | * This is the fast-track version for just changing the three TLS entries. | ||
| 166 | * Remember that this happens on every context switch, so it's worth | 198 | * Remember that this happens on every context switch, so it's worth |
| 167 | * optimizing. But wouldn't it be neater to have a single hypercall to cover | 199 | * optimizing. But wouldn't it be neater to have a single hypercall to cover |
| 168 | * both cases? */ | 200 | * both cases? |
| 201 | */ | ||
| 169 | void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) | 202 | void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) |
| 170 | { | 203 | { |
| 171 | struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; | 204 | struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; |
| @@ -175,7 +208,6 @@ void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) | |||
| 175 | /* Note that just the TLS entries have changed. */ | 208 | /* Note that just the TLS entries have changed. */ |
| 176 | cpu->changed |= CHANGED_GDT_TLS; | 209 | cpu->changed |= CHANGED_GDT_TLS; |
| 177 | } | 210 | } |
| 178 | /*:*/ | ||
| 179 | 211 | ||
| 180 | /*H:660 | 212 | /*H:660 |
| 181 | * With this, we have finished the Host. | 213 | * With this, we have finished the Host. |
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index eaf722fe309a..6ae388849a3b 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
| @@ -17,13 +17,15 @@ | |||
| 17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
| 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 19 | */ | 19 | */ |
| 20 | /*P:450 This file contains the x86-specific lguest code. It used to be all | 20 | /*P:450 |
| 21 | * This file contains the x86-specific lguest code. It used to be all | ||
| 21 | * mixed in with drivers/lguest/core.c but several foolhardy code slashers | 22 | * mixed in with drivers/lguest/core.c but several foolhardy code slashers |
| 22 | * wrestled most of the dependencies out to here in preparation for porting | 23 | * wrestled most of the dependencies out to here in preparation for porting |
| 23 | * lguest to other architectures (see what I mean by foolhardy?). | 24 | * lguest to other architectures (see what I mean by foolhardy?). |
| 24 | * | 25 | * |
| 25 | * This also contains a couple of non-obvious setup and teardown pieces which | 26 | * This also contains a couple of non-obvious setup and teardown pieces which |
| 26 | * were implemented after days of debugging pain. :*/ | 27 | * were implemented after days of debugging pain. |
| 28 | :*/ | ||
| 27 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
| 28 | #include <linux/start_kernel.h> | 30 | #include <linux/start_kernel.h> |
| 29 | #include <linux/string.h> | 31 | #include <linux/string.h> |
| @@ -82,25 +84,33 @@ static DEFINE_PER_CPU(struct lg_cpu *, last_cpu); | |||
| 82 | */ | 84 | */ |
| 83 | static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) | 85 | static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) |
| 84 | { | 86 | { |
| 85 | /* Copying all this data can be quite expensive. We usually run the | 87 | /* |
| 88 | * Copying all this data can be quite expensive. We usually run the | ||
| 86 | * same Guest we ran last time (and that Guest hasn't run anywhere else | 89 | * same Guest we ran last time (and that Guest hasn't run anywhere else |
| 87 | * meanwhile). If that's not the case, we pretend everything in the | 90 | * meanwhile). If that's not the case, we pretend everything in the |
| 88 | * Guest has changed. */ | 91 | * Guest has changed. |
| 92 | */ | ||
| 89 | if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) { | 93 | if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) { |
| 90 | __get_cpu_var(last_cpu) = cpu; | 94 | __get_cpu_var(last_cpu) = cpu; |
| 91 | cpu->last_pages = pages; | 95 | cpu->last_pages = pages; |
| 92 | cpu->changed = CHANGED_ALL; | 96 | cpu->changed = CHANGED_ALL; |
| 93 | } | 97 | } |
| 94 | 98 | ||
| 95 | /* These copies are pretty cheap, so we do them unconditionally: */ | 99 | /* |
| 96 | /* Save the current Host top-level page directory. */ | 100 | * These copies are pretty cheap, so we do them unconditionally: */ |
| 101 | /* Save the current Host top-level page directory. | ||
| 102 | */ | ||
| 97 | pages->state.host_cr3 = __pa(current->mm->pgd); | 103 | pages->state.host_cr3 = __pa(current->mm->pgd); |
| 98 | /* Set up the Guest's page tables to see this CPU's pages (and no | 104 | /* |
| 99 | * other CPU's pages). */ | 105 | * Set up the Guest's page tables to see this CPU's pages (and no |
| 106 | * other CPU's pages). | ||
| 107 | */ | ||
| 100 | map_switcher_in_guest(cpu, pages); | 108 | map_switcher_in_guest(cpu, pages); |
| 101 | /* Set up the two "TSS" members which tell the CPU what stack to use | 109 | /* |
| 110 | * Set up the two "TSS" members which tell the CPU what stack to use | ||
| 102 | * for traps which do directly into the Guest (ie. traps at privilege | 111 | * for traps which do directly into the Guest (ie. traps at privilege |
| 103 | * level 1). */ | 112 | * level 1). |
| 113 | */ | ||
| 104 | pages->state.guest_tss.sp1 = cpu->esp1; | 114 | pages->state.guest_tss.sp1 = cpu->esp1; |
| 105 | pages->state.guest_tss.ss1 = cpu->ss1; | 115 | pages->state.guest_tss.ss1 = cpu->ss1; |
| 106 | 116 | ||
| @@ -125,97 +135,126 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) | |||
| 125 | /* This is a dummy value we need for GCC's sake. */ | 135 | /* This is a dummy value we need for GCC's sake. */ |
| 126 | unsigned int clobber; | 136 | unsigned int clobber; |
| 127 | 137 | ||
| 128 | /* Copy the guest-specific information into this CPU's "struct | 138 | /* |
| 129 | * lguest_pages". */ | 139 | * Copy the guest-specific information into this CPU's "struct |
| 140 | * lguest_pages". | ||
| 141 | */ | ||
| 130 | copy_in_guest_info(cpu, pages); | 142 | copy_in_guest_info(cpu, pages); |
| 131 | 143 | ||
| 132 | /* Set the trap number to 256 (impossible value). If we fault while | 144 | /* |
| 145 | * Set the trap number to 256 (impossible value). If we fault while | ||
| 133 | * switching to the Guest (bad segment registers or bug), this will | 146 | * switching to the Guest (bad segment registers or bug), this will |
| 134 | * cause us to abort the Guest. */ | 147 | * cause us to abort the Guest. |
| 148 | */ | ||
| 135 | cpu->regs->trapnum = 256; | 149 | cpu->regs->trapnum = 256; |
| 136 | 150 | ||
| 137 | /* Now: we push the "eflags" register on the stack, then do an "lcall". | 151 | /* |
| 152 | * Now: we push the "eflags" register on the stack, then do an "lcall". | ||
| 138 | * This is how we change from using the kernel code segment to using | 153 | * This is how we change from using the kernel code segment to using |
| 139 | * the dedicated lguest code segment, as well as jumping into the | 154 | * the dedicated lguest code segment, as well as jumping into the |
| 140 | * Switcher. | 155 | * Switcher. |
| 141 | * | 156 | * |
| 142 | * The lcall also pushes the old code segment (KERNEL_CS) onto the | 157 | * The lcall also pushes the old code segment (KERNEL_CS) onto the |
| 143 | * stack, then the address of this call. This stack layout happens to | 158 | * stack, then the address of this call. This stack layout happens to |
| 144 | * exactly match the stack layout created by an interrupt... */ | 159 | * exactly match the stack layout created by an interrupt... |
| 160 | */ | ||
| 145 | asm volatile("pushf; lcall *lguest_entry" | 161 | asm volatile("pushf; lcall *lguest_entry" |
| 146 | /* This is how we tell GCC that %eax ("a") and %ebx ("b") | 162 | /* |
| 147 | * are changed by this routine. The "=" means output. */ | 163 | * This is how we tell GCC that %eax ("a") and %ebx ("b") |
| 164 | * are changed by this routine. The "=" means output. | ||
| 165 | */ | ||
| 148 | : "=a"(clobber), "=b"(clobber) | 166 | : "=a"(clobber), "=b"(clobber) |
| 149 | /* %eax contains the pages pointer. ("0" refers to the | 167 | /* |
| 168 | * %eax contains the pages pointer. ("0" refers to the | ||
| 150 | * 0-th argument above, ie "a"). %ebx contains the | 169 | * 0-th argument above, ie "a"). %ebx contains the |
| 151 | * physical address of the Guest's top-level page | 170 | * physical address of the Guest's top-level page |
| 152 | * directory. */ | 171 | * directory. |
| 172 | */ | ||
| 153 | : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) | 173 | : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) |
| 154 | /* We tell gcc that all these registers could change, | 174 | /* |
| 175 | * We tell gcc that all these registers could change, | ||
| 155 | * which means we don't have to save and restore them in | 176 | * which means we don't have to save and restore them in |
| 156 | * the Switcher. */ | 177 | * the Switcher. |
| 178 | */ | ||
| 157 | : "memory", "%edx", "%ecx", "%edi", "%esi"); | 179 | : "memory", "%edx", "%ecx", "%edi", "%esi"); |
| 158 | } | 180 | } |
| 159 | /*:*/ | 181 | /*:*/ |
| 160 | 182 | ||
| 161 | /*M:002 There are hooks in the scheduler which we can register to tell when we | 183 | /*M:002 |
| 184 | * There are hooks in the scheduler which we can register to tell when we | ||
| 162 | * get kicked off the CPU (preempt_notifier_register()). This would allow us | 185 | * get kicked off the CPU (preempt_notifier_register()). This would allow us |
| 163 | * to lazily disable SYSENTER which would regain some performance, and should | 186 | * to lazily disable SYSENTER which would regain some performance, and should |
| 164 | * also simplify copy_in_guest_info(). Note that we'd still need to restore | 187 | * also simplify copy_in_guest_info(). Note that we'd still need to restore |
| 165 | * things when we exit to Launcher userspace, but that's fairly easy. | 188 | * things when we exit to Launcher userspace, but that's fairly easy. |
| 166 | * | 189 | * |
| 167 | * We could also try using this hooks for PGE, but that might be too expensive. | 190 | * We could also try using these hooks for PGE, but that might be too expensive. |
| 168 | * | 191 | * |
| 169 | * The hooks were designed for KVM, but we can also put them to good use. :*/ | 192 | * The hooks were designed for KVM, but we can also put them to good use. |
| 193 | :*/ | ||
| 170 | 194 | ||
| 171 | /*H:040 This is the i386-specific code to setup and run the Guest. Interrupts | 195 | /*H:040 |
| 172 | * are disabled: we own the CPU. */ | 196 | * This is the i386-specific code to setup and run the Guest. Interrupts |
| 197 | * are disabled: we own the CPU. | ||
| 198 | */ | ||
| 173 | void lguest_arch_run_guest(struct lg_cpu *cpu) | 199 | void lguest_arch_run_guest(struct lg_cpu *cpu) |
| 174 | { | 200 | { |
| 175 | /* Remember the awfully-named TS bit? If the Guest has asked to set it | 201 | /* |
| 202 | * Remember the awfully-named TS bit? If the Guest has asked to set it | ||
| 176 | * we set it now, so we can trap and pass that trap to the Guest if it | 203 | * we set it now, so we can trap and pass that trap to the Guest if it |
| 177 | * uses the FPU. */ | 204 | * uses the FPU. |
| 205 | */ | ||
| 178 | if (cpu->ts) | 206 | if (cpu->ts) |
| 179 | unlazy_fpu(current); | 207 | unlazy_fpu(current); |
| 180 | 208 | ||
| 181 | /* SYSENTER is an optimized way of doing system calls. We can't allow | 209 | /* |
| 210 | * SYSENTER is an optimized way of doing system calls. We can't allow | ||
| 182 | * it because it always jumps to privilege level 0. A normal Guest | 211 | * it because it always jumps to privilege level 0. A normal Guest |
| 183 | * won't try it because we don't advertise it in CPUID, but a malicious | 212 | * won't try it because we don't advertise it in CPUID, but a malicious |
| 184 | * Guest (or malicious Guest userspace program) could, so we tell the | 213 | * Guest (or malicious Guest userspace program) could, so we tell the |
| 185 | * CPU to disable it before running the Guest. */ | 214 | * CPU to disable it before running the Guest. |
| 215 | */ | ||
| 186 | if (boot_cpu_has(X86_FEATURE_SEP)) | 216 | if (boot_cpu_has(X86_FEATURE_SEP)) |
| 187 | wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); | 217 | wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); |
| 188 | 218 | ||
| 189 | /* Now we actually run the Guest. It will return when something | 219 | /* |
| 220 | * Now we actually run the Guest. It will return when something | ||
| 190 | * interesting happens, and we can examine its registers to see what it | 221 | * interesting happens, and we can examine its registers to see what it |
| 191 | * was doing. */ | 222 | * was doing. |
| 223 | */ | ||
| 192 | run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); | 224 | run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); |
| 193 | 225 | ||
| 194 | /* Note that the "regs" structure contains two extra entries which are | 226 | /* |
| 227 | * Note that the "regs" structure contains two extra entries which are | ||
| 195 | * not really registers: a trap number which says what interrupt or | 228 | * not really registers: a trap number which says what interrupt or |
| 196 | * trap made the switcher code come back, and an error code which some | 229 | * trap made the switcher code come back, and an error code which some |
| 197 | * traps set. */ | 230 | * traps set. |
| 231 | */ | ||
| 198 | 232 | ||
| 199 | /* Restore SYSENTER if it's supposed to be on. */ | 233 | /* Restore SYSENTER if it's supposed to be on. */ |
| 200 | if (boot_cpu_has(X86_FEATURE_SEP)) | 234 | if (boot_cpu_has(X86_FEATURE_SEP)) |
| 201 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 235 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
| 202 | 236 | ||
| 203 | /* If the Guest page faulted, then the cr2 register will tell us the | 237 | /* |
| 238 | * If the Guest page faulted, then the cr2 register will tell us the | ||
| 204 | * bad virtual address. We have to grab this now, because once we | 239 | * bad virtual address. We have to grab this now, because once we |
| 205 | * re-enable interrupts an interrupt could fault and thus overwrite | 240 | * re-enable interrupts an interrupt could fault and thus overwrite |
| 206 | * cr2, or we could even move off to a different CPU. */ | 241 | * cr2, or we could even move off to a different CPU. |
| 242 | */ | ||
| 207 | if (cpu->regs->trapnum == 14) | 243 | if (cpu->regs->trapnum == 14) |
| 208 | cpu->arch.last_pagefault = read_cr2(); | 244 | cpu->arch.last_pagefault = read_cr2(); |
| 209 | /* Similarly, if we took a trap because the Guest used the FPU, | 245 | /* |
| 246 | * Similarly, if we took a trap because the Guest used the FPU, | ||
| 210 | * we have to restore the FPU it expects to see. | 247 | * we have to restore the FPU it expects to see. |
| 211 | * math_state_restore() may sleep and we may even move off to | 248 | * math_state_restore() may sleep and we may even move off to |
| 212 | * a different CPU. So all the critical stuff should be done | 249 | * a different CPU. So all the critical stuff should be done |
| 213 | * before this. */ | 250 | * before this. |
| 251 | */ | ||
| 214 | else if (cpu->regs->trapnum == 7) | 252 | else if (cpu->regs->trapnum == 7) |
| 215 | math_state_restore(); | 253 | math_state_restore(); |
| 216 | } | 254 | } |
| 217 | 255 | ||
| 218 | /*H:130 Now we've examined the hypercall code; our Guest can make requests. | 256 | /*H:130 |
| 257 | * Now we've examined the hypercall code; our Guest can make requests. | ||
| 219 | * Our Guest is usually so well behaved; it never tries to do things it isn't | 258 | * Our Guest is usually so well behaved; it never tries to do things it isn't |
| 220 | * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual | 259 | * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual |
| 221 | * infrastructure isn't quite complete, because it doesn't contain replacements | 260 | * infrastructure isn't quite complete, because it doesn't contain replacements |
| @@ -225,26 +264,33 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
| 225 | * | 264 | * |
| 226 | * When the Guest uses one of these instructions, we get a trap (General | 265 | * When the Guest uses one of these instructions, we get a trap (General |
| 227 | * Protection Fault) and come here. We see if it's one of those troublesome | 266 | * Protection Fault) and come here. We see if it's one of those troublesome |
| 228 | * instructions and skip over it. We return true if we did. */ | 267 | * instructions and skip over it. We return true if we did. |
| 268 | */ | ||
| 229 | static int emulate_insn(struct lg_cpu *cpu) | 269 | static int emulate_insn(struct lg_cpu *cpu) |
| 230 | { | 270 | { |
| 231 | u8 insn; | 271 | u8 insn; |
| 232 | unsigned int insnlen = 0, in = 0, shift = 0; | 272 | unsigned int insnlen = 0, in = 0, shift = 0; |
| 233 | /* The eip contains the *virtual* address of the Guest's instruction: | 273 | /* |
| 234 | * guest_pa just subtracts the Guest's page_offset. */ | 274 | * The eip contains the *virtual* address of the Guest's instruction: |
| 275 | * guest_pa just subtracts the Guest's page_offset. | ||
| 276 | */ | ||
| 235 | unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); | 277 | unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); |
| 236 | 278 | ||
| 237 | /* This must be the Guest kernel trying to do something, not userspace! | 279 | /* |
| 280 | * This must be the Guest kernel trying to do something, not userspace! | ||
| 238 | * The bottom two bits of the CS segment register are the privilege | 281 | * The bottom two bits of the CS segment register are the privilege |
| 239 | * level. */ | 282 | * level. |
| 283 | */ | ||
| 240 | if ((cpu->regs->cs & 3) != GUEST_PL) | 284 | if ((cpu->regs->cs & 3) != GUEST_PL) |
| 241 | return 0; | 285 | return 0; |
| 242 | 286 | ||
| 243 | /* Decoding x86 instructions is icky. */ | 287 | /* Decoding x86 instructions is icky. */ |
| 244 | insn = lgread(cpu, physaddr, u8); | 288 | insn = lgread(cpu, physaddr, u8); |
| 245 | 289 | ||
| 246 | /* 0x66 is an "operand prefix". It means it's using the upper 16 bits | 290 | /* |
| 247 | of the eax register. */ | 291 | * 0x66 is an "operand prefix". It means it's using the upper 16 bits |
| 292 | * of the eax register. | ||
| 293 | */ | ||
| 248 | if (insn == 0x66) { | 294 | if (insn == 0x66) { |
| 249 | shift = 16; | 295 | shift = 16; |
| 250 | /* The instruction is 1 byte so far, read the next byte. */ | 296 | /* The instruction is 1 byte so far, read the next byte. */ |
| @@ -252,8 +298,10 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
| 252 | insn = lgread(cpu, physaddr + insnlen, u8); | 298 | insn = lgread(cpu, physaddr + insnlen, u8); |
| 253 | } | 299 | } |
| 254 | 300 | ||
| 255 | /* We can ignore the lower bit for the moment and decode the 4 opcodes | 301 | /* |
| 256 | * we need to emulate. */ | 302 | * We can ignore the lower bit for the moment and decode the 4 opcodes |
| 303 | * we need to emulate. | ||
| 304 | */ | ||
| 257 | switch (insn & 0xFE) { | 305 | switch (insn & 0xFE) { |
| 258 | case 0xE4: /* in <next byte>,%al */ | 306 | case 0xE4: /* in <next byte>,%al */ |
| 259 | insnlen += 2; | 307 | insnlen += 2; |
| @@ -274,9 +322,11 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
| 274 | return 0; | 322 | return 0; |
| 275 | } | 323 | } |
| 276 | 324 | ||
| 277 | /* If it was an "IN" instruction, they expect the result to be read | 325 | /* |
| 326 | * If it was an "IN" instruction, they expect the result to be read | ||
| 278 | * into %eax, so we change %eax. We always return all-ones, which | 327 | * into %eax, so we change %eax. We always return all-ones, which |
| 279 | * traditionally means "there's nothing there". */ | 328 | * traditionally means "there's nothing there". |
| 329 | */ | ||
| 280 | if (in) { | 330 | if (in) { |
| 281 | /* Lower bit tells is whether it's a 16 or 32 bit access */ | 331 | /* Lower bit tells is whether it's a 16 or 32 bit access */ |
| 282 | if (insn & 0x1) | 332 | if (insn & 0x1) |
| @@ -290,7 +340,8 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
| 290 | return 1; | 340 | return 1; |
| 291 | } | 341 | } |
| 292 | 342 | ||
| 293 | /* Our hypercalls mechanism used to be based on direct software interrupts. | 343 | /* |
| 344 | * Our hypercalls mechanism used to be based on direct software interrupts. | ||
| 294 | * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to | 345 | * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to |
| 295 | * change over to using kvm hypercalls. | 346 | * change over to using kvm hypercalls. |
| 296 | * | 347 | * |
| @@ -318,16 +369,20 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
| 318 | */ | 369 | */ |
| 319 | static void rewrite_hypercall(struct lg_cpu *cpu) | 370 | static void rewrite_hypercall(struct lg_cpu *cpu) |
| 320 | { | 371 | { |
| 321 | /* This are the opcodes we use to patch the Guest. The opcode for "int | 372 | /* |
| 373 | * This are the opcodes we use to patch the Guest. The opcode for "int | ||
| 322 | * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we | 374 | * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we |
| 323 | * complete the sequence with a NOP (0x90). */ | 375 | * complete the sequence with a NOP (0x90). |
| 376 | */ | ||
| 324 | u8 insn[3] = {0xcd, 0x1f, 0x90}; | 377 | u8 insn[3] = {0xcd, 0x1f, 0x90}; |
| 325 | 378 | ||
| 326 | __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); | 379 | __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); |
| 327 | /* The above write might have caused a copy of that page to be made | 380 | /* |
| 381 | * The above write might have caused a copy of that page to be made | ||
| 328 | * (if it was read-only). We need to make sure the Guest has | 382 | * (if it was read-only). We need to make sure the Guest has |
| 329 | * up-to-date pagetables. As this doesn't happen often, we can just | 383 | * up-to-date pagetables. As this doesn't happen often, we can just |
| 330 | * drop them all. */ | 384 | * drop them all. |
| 385 | */ | ||
| 331 | guest_pagetable_clear_all(cpu); | 386 | guest_pagetable_clear_all(cpu); |
| 332 | } | 387 | } |
| 333 | 388 | ||
| @@ -335,9 +390,11 @@ static bool is_hypercall(struct lg_cpu *cpu) | |||
| 335 | { | 390 | { |
| 336 | u8 insn[3]; | 391 | u8 insn[3]; |
| 337 | 392 | ||
| 338 | /* This must be the Guest kernel trying to do something. | 393 | /* |
| 394 | * This must be the Guest kernel trying to do something. | ||
| 339 | * The bottom two bits of the CS segment register are the privilege | 395 | * The bottom two bits of the CS segment register are the privilege |
| 340 | * level. */ | 396 | * level. |
| 397 | */ | ||
| 341 | if ((cpu->regs->cs & 3) != GUEST_PL) | 398 | if ((cpu->regs->cs & 3) != GUEST_PL) |
| 342 | return false; | 399 | return false; |
| 343 | 400 | ||
| @@ -351,86 +408,105 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
| 351 | { | 408 | { |
| 352 | switch (cpu->regs->trapnum) { | 409 | switch (cpu->regs->trapnum) { |
| 353 | case 13: /* We've intercepted a General Protection Fault. */ | 410 | case 13: /* We've intercepted a General Protection Fault. */ |
| 354 | /* Check if this was one of those annoying IN or OUT | 411 | /* |
| 412 | * Check if this was one of those annoying IN or OUT | ||
| 355 | * instructions which we need to emulate. If so, we just go | 413 | * instructions which we need to emulate. If so, we just go |
| 356 | * back into the Guest after we've done it. */ | 414 | * back into the Guest after we've done it. |
| 415 | */ | ||
| 357 | if (cpu->regs->errcode == 0) { | 416 | if (cpu->regs->errcode == 0) { |
| 358 | if (emulate_insn(cpu)) | 417 | if (emulate_insn(cpu)) |
| 359 | return; | 418 | return; |
| 360 | } | 419 | } |
| 361 | /* If KVM is active, the vmcall instruction triggers a | 420 | /* |
| 362 | * General Protection Fault. Normally it triggers an | 421 | * If KVM is active, the vmcall instruction triggers a General |
| 363 | * invalid opcode fault (6): */ | 422 | * Protection Fault. Normally it triggers an invalid opcode |
| 423 | * fault (6): | ||
| 424 | */ | ||
| 364 | case 6: | 425 | case 6: |
| 365 | /* We need to check if ring == GUEST_PL and | 426 | /* |
| 366 | * faulting instruction == vmcall. */ | 427 | * We need to check if ring == GUEST_PL and faulting |
| 428 | * instruction == vmcall. | ||
| 429 | */ | ||
| 367 | if (is_hypercall(cpu)) { | 430 | if (is_hypercall(cpu)) { |
| 368 | rewrite_hypercall(cpu); | 431 | rewrite_hypercall(cpu); |
| 369 | return; | 432 | return; |
| 370 | } | 433 | } |
| 371 | break; | 434 | break; |
| 372 | case 14: /* We've intercepted a Page Fault. */ | 435 | case 14: /* We've intercepted a Page Fault. */ |
| 373 | /* The Guest accessed a virtual address that wasn't mapped. | 436 | /* |
| 437 | * The Guest accessed a virtual address that wasn't mapped. | ||
| 374 | * This happens a lot: we don't actually set up most of the page | 438 | * This happens a lot: we don't actually set up most of the page |
| 375 | * tables for the Guest at all when we start: as it runs it asks | 439 | * tables for the Guest at all when we start: as it runs it asks |
| 376 | * for more and more, and we set them up as required. In this | 440 | * for more and more, and we set them up as required. In this |
| 377 | * case, we don't even tell the Guest that the fault happened. | 441 | * case, we don't even tell the Guest that the fault happened. |
| 378 | * | 442 | * |
| 379 | * The errcode tells whether this was a read or a write, and | 443 | * The errcode tells whether this was a read or a write, and |
| 380 | * whether kernel or userspace code. */ | 444 | * whether kernel or userspace code. |
| 445 | */ | ||
| 381 | if (demand_page(cpu, cpu->arch.last_pagefault, | 446 | if (demand_page(cpu, cpu->arch.last_pagefault, |
| 382 | cpu->regs->errcode)) | 447 | cpu->regs->errcode)) |
| 383 | return; | 448 | return; |
| 384 | 449 | ||
| 385 | /* OK, it's really not there (or not OK): the Guest needs to | 450 | /* |
| 451 | * OK, it's really not there (or not OK): the Guest needs to | ||
| 386 | * know. We write out the cr2 value so it knows where the | 452 | * know. We write out the cr2 value so it knows where the |
| 387 | * fault occurred. | 453 | * fault occurred. |
| 388 | * | 454 | * |
| 389 | * Note that if the Guest were really messed up, this could | 455 | * Note that if the Guest were really messed up, this could |
| 390 | * happen before it's done the LHCALL_LGUEST_INIT hypercall, so | 456 | * happen before it's done the LHCALL_LGUEST_INIT hypercall, so |
| 391 | * lg->lguest_data could be NULL */ | 457 | * lg->lguest_data could be NULL |
| 458 | */ | ||
| 392 | if (cpu->lg->lguest_data && | 459 | if (cpu->lg->lguest_data && |
| 393 | put_user(cpu->arch.last_pagefault, | 460 | put_user(cpu->arch.last_pagefault, |
| 394 | &cpu->lg->lguest_data->cr2)) | 461 | &cpu->lg->lguest_data->cr2)) |
| 395 | kill_guest(cpu, "Writing cr2"); | 462 | kill_guest(cpu, "Writing cr2"); |
| 396 | break; | 463 | break; |
| 397 | case 7: /* We've intercepted a Device Not Available fault. */ | 464 | case 7: /* We've intercepted a Device Not Available fault. */ |
| 398 | /* If the Guest doesn't want to know, we already restored the | 465 | /* |
| 399 | * Floating Point Unit, so we just continue without telling | 466 | * If the Guest doesn't want to know, we already restored the |
| 400 | * it. */ | 467 | * Floating Point Unit, so we just continue without telling it. |
| 468 | */ | ||
| 401 | if (!cpu->ts) | 469 | if (!cpu->ts) |
| 402 | return; | 470 | return; |
| 403 | break; | 471 | break; |
| 404 | case 32 ... 255: | 472 | case 32 ... 255: |
| 405 | /* These values mean a real interrupt occurred, in which case | 473 | /* |
| 474 | * These values mean a real interrupt occurred, in which case | ||
| 406 | * the Host handler has already been run. We just do a | 475 | * the Host handler has already been run. We just do a |
| 407 | * friendly check if another process should now be run, then | 476 | * friendly check if another process should now be run, then |
| 408 | * return to run the Guest again */ | 477 | * return to run the Guest again |
| 478 | */ | ||
| 409 | cond_resched(); | 479 | cond_resched(); |
| 410 | return; | 480 | return; |
| 411 | case LGUEST_TRAP_ENTRY: | 481 | case LGUEST_TRAP_ENTRY: |
| 412 | /* Our 'struct hcall_args' maps directly over our regs: we set | 482 | /* |
| 413 | * up the pointer now to indicate a hypercall is pending. */ | 483 | * Our 'struct hcall_args' maps directly over our regs: we set |
| 484 | * up the pointer now to indicate a hypercall is pending. | ||
| 485 | */ | ||
| 414 | cpu->hcall = (struct hcall_args *)cpu->regs; | 486 | cpu->hcall = (struct hcall_args *)cpu->regs; |
| 415 | return; | 487 | return; |
| 416 | } | 488 | } |
| 417 | 489 | ||
| 418 | /* We didn't handle the trap, so it needs to go to the Guest. */ | 490 | /* We didn't handle the trap, so it needs to go to the Guest. */ |
| 419 | if (!deliver_trap(cpu, cpu->regs->trapnum)) | 491 | if (!deliver_trap(cpu, cpu->regs->trapnum)) |
| 420 | /* If the Guest doesn't have a handler (either it hasn't | 492 | /* |
| 493 | * If the Guest doesn't have a handler (either it hasn't | ||
| 421 | * registered any yet, or it's one of the faults we don't let | 494 | * registered any yet, or it's one of the faults we don't let |
| 422 | * it handle), it dies with this cryptic error message. */ | 495 | * it handle), it dies with this cryptic error message. |
| 496 | */ | ||
| 423 | kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", | 497 | kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", |
| 424 | cpu->regs->trapnum, cpu->regs->eip, | 498 | cpu->regs->trapnum, cpu->regs->eip, |
| 425 | cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault | 499 | cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault |
| 426 | : cpu->regs->errcode); | 500 | : cpu->regs->errcode); |
| 427 | } | 501 | } |
| 428 | 502 | ||
| 429 | /* Now we can look at each of the routines this calls, in increasing order of | 503 | /* |
| 504 | * Now we can look at each of the routines this calls, in increasing order of | ||
| 430 | * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(), | 505 | * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(), |
| 431 | * deliver_trap() and demand_page(). After all those, we'll be ready to | 506 | * deliver_trap() and demand_page(). After all those, we'll be ready to |
| 432 | * examine the Switcher, and our philosophical understanding of the Host/Guest | 507 | * examine the Switcher, and our philosophical understanding of the Host/Guest |
| 433 | * duality will be complete. :*/ | 508 | * duality will be complete. |
| 509 | :*/ | ||
| 434 | static void adjust_pge(void *on) | 510 | static void adjust_pge(void *on) |
| 435 | { | 511 | { |
| 436 | if (on) | 512 | if (on) |
| @@ -439,13 +515,16 @@ static void adjust_pge(void *on) | |||
| 439 | write_cr4(read_cr4() & ~X86_CR4_PGE); | 515 | write_cr4(read_cr4() & ~X86_CR4_PGE); |
| 440 | } | 516 | } |
| 441 | 517 | ||
| 442 | /*H:020 Now the Switcher is mapped and every thing else is ready, we need to do | 518 | /*H:020 |
| 443 | * some more i386-specific initialization. */ | 519 | * Now the Switcher is mapped and every thing else is ready, we need to do |
| 520 | * some more i386-specific initialization. | ||
| 521 | */ | ||
| 444 | void __init lguest_arch_host_init(void) | 522 | void __init lguest_arch_host_init(void) |
| 445 | { | 523 | { |
| 446 | int i; | 524 | int i; |
| 447 | 525 | ||
| 448 | /* Most of the i386/switcher.S doesn't care that it's been moved; on | 526 | /* |
| 527 | * Most of the i386/switcher.S doesn't care that it's been moved; on | ||
| 449 | * Intel, jumps are relative, and it doesn't access any references to | 528 | * Intel, jumps are relative, and it doesn't access any references to |
| 450 | * external code or data. | 529 | * external code or data. |
| 451 | * | 530 | * |
| @@ -453,7 +532,8 @@ void __init lguest_arch_host_init(void) | |||
| 453 | * addresses are placed in a table (default_idt_entries), so we need to | 532 | * addresses are placed in a table (default_idt_entries), so we need to |
| 454 | * update the table with the new addresses. switcher_offset() is a | 533 | * update the table with the new addresses. switcher_offset() is a |
| 455 | * convenience function which returns the distance between the | 534 | * convenience function which returns the distance between the |
| 456 | * compiled-in switcher code and the high-mapped copy we just made. */ | 535 | * compiled-in switcher code and the high-mapped copy we just made. |
| 536 | */ | ||
| 457 | for (i = 0; i < IDT_ENTRIES; i++) | 537 | for (i = 0; i < IDT_ENTRIES; i++) |
| 458 | default_idt_entries[i] += switcher_offset(); | 538 | default_idt_entries[i] += switcher_offset(); |
| 459 | 539 | ||
| @@ -468,63 +548,81 @@ void __init lguest_arch_host_init(void) | |||
| 468 | for_each_possible_cpu(i) { | 548 | for_each_possible_cpu(i) { |
| 469 | /* lguest_pages() returns this CPU's two pages. */ | 549 | /* lguest_pages() returns this CPU's two pages. */ |
| 470 | struct lguest_pages *pages = lguest_pages(i); | 550 | struct lguest_pages *pages = lguest_pages(i); |
| 471 | /* This is a convenience pointer to make the code fit one | 551 | /* This is a convenience pointer to make the code neater. */ |
| 472 | * statement to a line. */ | ||
| 473 | struct lguest_ro_state *state = &pages->state; | 552 | struct lguest_ro_state *state = &pages->state; |
| 474 | 553 | ||
| 475 | /* The Global Descriptor Table: the Host has a different one | 554 | /* |
| 555 | * The Global Descriptor Table: the Host has a different one | ||
| 476 | * for each CPU. We keep a descriptor for the GDT which says | 556 | * for each CPU. We keep a descriptor for the GDT which says |
| 477 | * where it is and how big it is (the size is actually the last | 557 | * where it is and how big it is (the size is actually the last |
| 478 | * byte, not the size, hence the "-1"). */ | 558 | * byte, not the size, hence the "-1"). |
| 559 | */ | ||
| 479 | state->host_gdt_desc.size = GDT_SIZE-1; | 560 | state->host_gdt_desc.size = GDT_SIZE-1; |
| 480 | state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); | 561 | state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); |
| 481 | 562 | ||
| 482 | /* All CPUs on the Host use the same Interrupt Descriptor | 563 | /* |
| 564 | * All CPUs on the Host use the same Interrupt Descriptor | ||
| 483 | * Table, so we just use store_idt(), which gets this CPU's IDT | 565 | * Table, so we just use store_idt(), which gets this CPU's IDT |
| 484 | * descriptor. */ | 566 | * descriptor. |
| 567 | */ | ||
| 485 | store_idt(&state->host_idt_desc); | 568 | store_idt(&state->host_idt_desc); |
| 486 | 569 | ||
| 487 | /* The descriptors for the Guest's GDT and IDT can be filled | 570 | /* |
| 571 | * The descriptors for the Guest's GDT and IDT can be filled | ||
| 488 | * out now, too. We copy the GDT & IDT into ->guest_gdt and | 572 | * out now, too. We copy the GDT & IDT into ->guest_gdt and |
| 489 | * ->guest_idt before actually running the Guest. */ | 573 | * ->guest_idt before actually running the Guest. |
| 574 | */ | ||
| 490 | state->guest_idt_desc.size = sizeof(state->guest_idt)-1; | 575 | state->guest_idt_desc.size = sizeof(state->guest_idt)-1; |
| 491 | state->guest_idt_desc.address = (long)&state->guest_idt; | 576 | state->guest_idt_desc.address = (long)&state->guest_idt; |
| 492 | state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; | 577 | state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; |
| 493 | state->guest_gdt_desc.address = (long)&state->guest_gdt; | 578 | state->guest_gdt_desc.address = (long)&state->guest_gdt; |
| 494 | 579 | ||
| 495 | /* We know where we want the stack to be when the Guest enters | 580 | /* |
| 581 | * We know where we want the stack to be when the Guest enters | ||
| 496 | * the Switcher: in pages->regs. The stack grows upwards, so | 582 | * the Switcher: in pages->regs. The stack grows upwards, so |
| 497 | * we start it at the end of that structure. */ | 583 | * we start it at the end of that structure. |
| 584 | */ | ||
| 498 | state->guest_tss.sp0 = (long)(&pages->regs + 1); | 585 | state->guest_tss.sp0 = (long)(&pages->regs + 1); |
| 499 | /* And this is the GDT entry to use for the stack: we keep a | 586 | /* |
| 500 | * couple of special LGUEST entries. */ | 587 | * And this is the GDT entry to use for the stack: we keep a |
| 588 | * couple of special LGUEST entries. | ||
| 589 | */ | ||
| 501 | state->guest_tss.ss0 = LGUEST_DS; | 590 | state->guest_tss.ss0 = LGUEST_DS; |
| 502 | 591 | ||
| 503 | /* x86 can have a finegrained bitmap which indicates what I/O | 592 | /* |
| 593 | * x86 can have a finegrained bitmap which indicates what I/O | ||
| 504 | * ports the process can use. We set it to the end of our | 594 | * ports the process can use. We set it to the end of our |
| 505 | * structure, meaning "none". */ | 595 | * structure, meaning "none". |
| 596 | */ | ||
| 506 | state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); | 597 | state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); |
| 507 | 598 | ||
| 508 | /* Some GDT entries are the same across all Guests, so we can | 599 | /* |
| 509 | * set them up now. */ | 600 | * Some GDT entries are the same across all Guests, so we can |
| 601 | * set them up now. | ||
| 602 | */ | ||
| 510 | setup_default_gdt_entries(state); | 603 | setup_default_gdt_entries(state); |
| 511 | /* Most IDT entries are the same for all Guests, too.*/ | 604 | /* Most IDT entries are the same for all Guests, too.*/ |
| 512 | setup_default_idt_entries(state, default_idt_entries); | 605 | setup_default_idt_entries(state, default_idt_entries); |
| 513 | 606 | ||
| 514 | /* The Host needs to be able to use the LGUEST segments on this | 607 | /* |
| 515 | * CPU, too, so put them in the Host GDT. */ | 608 | * The Host needs to be able to use the LGUEST segments on this |
| 609 | * CPU, too, so put them in the Host GDT. | ||
| 610 | */ | ||
| 516 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; | 611 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; |
| 517 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; | 612 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; |
| 518 | } | 613 | } |
| 519 | 614 | ||
| 520 | /* In the Switcher, we want the %cs segment register to use the | 615 | /* |
| 616 | * In the Switcher, we want the %cs segment register to use the | ||
| 521 | * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so | 617 | * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so |
| 522 | * it will be undisturbed when we switch. To change %cs and jump we | 618 | * it will be undisturbed when we switch. To change %cs and jump we |
| 523 | * need this structure to feed to Intel's "lcall" instruction. */ | 619 | * need this structure to feed to Intel's "lcall" instruction. |
| 620 | */ | ||
| 524 | lguest_entry.offset = (long)switch_to_guest + switcher_offset(); | 621 | lguest_entry.offset = (long)switch_to_guest + switcher_offset(); |
| 525 | lguest_entry.segment = LGUEST_CS; | 622 | lguest_entry.segment = LGUEST_CS; |
| 526 | 623 | ||
| 527 | /* Finally, we need to turn off "Page Global Enable". PGE is an | 624 | /* |
| 625 | * Finally, we need to turn off "Page Global Enable". PGE is an | ||
| 528 | * optimization where page table entries are specially marked to show | 626 | * optimization where page table entries are specially marked to show |
| 529 | * they never change. The Host kernel marks all the kernel pages this | 627 | * they never change. The Host kernel marks all the kernel pages this |
| 530 | * way because it's always present, even when userspace is running. | 628 | * way because it's always present, even when userspace is running. |
| @@ -534,16 +632,21 @@ void __init lguest_arch_host_init(void) | |||
| 534 | * you'll get really weird bugs that you'll chase for two days. | 632 | * you'll get really weird bugs that you'll chase for two days. |
| 535 | * | 633 | * |
| 536 | * I used to turn PGE off every time we switched to the Guest and back | 634 | * I used to turn PGE off every time we switched to the Guest and back |
| 537 | * on when we return, but that slowed the Switcher down noticibly. */ | 635 | * on when we return, but that slowed the Switcher down noticibly. |
| 636 | */ | ||
| 538 | 637 | ||
| 539 | /* We don't need the complexity of CPUs coming and going while we're | 638 | /* |
| 540 | * doing this. */ | 639 | * We don't need the complexity of CPUs coming and going while we're |
| 640 | * doing this. | ||
| 641 | */ | ||
| 541 | get_online_cpus(); | 642 | get_online_cpus(); |
| 542 | if (cpu_has_pge) { /* We have a broader idea of "global". */ | 643 | if (cpu_has_pge) { /* We have a broader idea of "global". */ |
| 543 | /* Remember that this was originally set (for cleanup). */ | 644 | /* Remember that this was originally set (for cleanup). */ |
| 544 | cpu_had_pge = 1; | 645 | cpu_had_pge = 1; |
| 545 | /* adjust_pge is a helper function which sets or unsets the PGE | 646 | /* |
| 546 | * bit on its CPU, depending on the argument (0 == unset). */ | 647 | * adjust_pge is a helper function which sets or unsets the PGE |
| 648 | * bit on its CPU, depending on the argument (0 == unset). | ||
| 649 | */ | ||
| 547 | on_each_cpu(adjust_pge, (void *)0, 1); | 650 | on_each_cpu(adjust_pge, (void *)0, 1); |
| 548 | /* Turn off the feature in the global feature set. */ | 651 | /* Turn off the feature in the global feature set. */ |
| 549 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); | 652 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); |
| @@ -590,26 +693,32 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) | |||
| 590 | { | 693 | { |
| 591 | u32 tsc_speed; | 694 | u32 tsc_speed; |
| 592 | 695 | ||
| 593 | /* The pointer to the Guest's "struct lguest_data" is the only argument. | 696 | /* |
| 594 | * We check that address now. */ | 697 | * The pointer to the Guest's "struct lguest_data" is the only argument. |
| 698 | * We check that address now. | ||
| 699 | */ | ||
| 595 | if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, | 700 | if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, |
| 596 | sizeof(*cpu->lg->lguest_data))) | 701 | sizeof(*cpu->lg->lguest_data))) |
| 597 | return -EFAULT; | 702 | return -EFAULT; |
| 598 | 703 | ||
| 599 | /* Having checked it, we simply set lg->lguest_data to point straight | 704 | /* |
| 705 | * Having checked it, we simply set lg->lguest_data to point straight | ||
| 600 | * into the Launcher's memory at the right place and then use | 706 | * into the Launcher's memory at the right place and then use |
| 601 | * copy_to_user/from_user from now on, instead of lgread/write. I put | 707 | * copy_to_user/from_user from now on, instead of lgread/write. I put |
| 602 | * this in to show that I'm not immune to writing stupid | 708 | * this in to show that I'm not immune to writing stupid |
| 603 | * optimizations. */ | 709 | * optimizations. |
| 710 | */ | ||
| 604 | cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; | 711 | cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; |
| 605 | 712 | ||
| 606 | /* We insist that the Time Stamp Counter exist and doesn't change with | 713 | /* |
| 714 | * We insist that the Time Stamp Counter exist and doesn't change with | ||
| 607 | * cpu frequency. Some devious chip manufacturers decided that TSC | 715 | * cpu frequency. Some devious chip manufacturers decided that TSC |
| 608 | * changes could be handled in software. I decided that time going | 716 | * changes could be handled in software. I decided that time going |
| 609 | * backwards might be good for benchmarks, but it's bad for users. | 717 | * backwards might be good for benchmarks, but it's bad for users. |
| 610 | * | 718 | * |
| 611 | * We also insist that the TSC be stable: the kernel detects unreliable | 719 | * We also insist that the TSC be stable: the kernel detects unreliable |
| 612 | * TSCs for its own purposes, and we use that here. */ | 720 | * TSCs for its own purposes, and we use that here. |
| 721 | */ | ||
| 613 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) | 722 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) |
| 614 | tsc_speed = tsc_khz; | 723 | tsc_speed = tsc_khz; |
| 615 | else | 724 | else |
| @@ -625,38 +734,47 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) | |||
| 625 | } | 734 | } |
| 626 | /*:*/ | 735 | /*:*/ |
| 627 | 736 | ||
| 628 | /*L:030 lguest_arch_setup_regs() | 737 | /*L:030 |
| 738 | * lguest_arch_setup_regs() | ||
| 629 | * | 739 | * |
| 630 | * Most of the Guest's registers are left alone: we used get_zeroed_page() to | 740 | * Most of the Guest's registers are left alone: we used get_zeroed_page() to |
| 631 | * allocate the structure, so they will be 0. */ | 741 | * allocate the structure, so they will be 0. |
| 742 | */ | ||
| 632 | void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) | 743 | void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) |
| 633 | { | 744 | { |
| 634 | struct lguest_regs *regs = cpu->regs; | 745 | struct lguest_regs *regs = cpu->regs; |
| 635 | 746 | ||
| 636 | /* There are four "segment" registers which the Guest needs to boot: | 747 | /* |
| 748 | * There are four "segment" registers which the Guest needs to boot: | ||
| 637 | * The "code segment" register (cs) refers to the kernel code segment | 749 | * The "code segment" register (cs) refers to the kernel code segment |
| 638 | * __KERNEL_CS, and the "data", "extra" and "stack" segment registers | 750 | * __KERNEL_CS, and the "data", "extra" and "stack" segment registers |
| 639 | * refer to the kernel data segment __KERNEL_DS. | 751 | * refer to the kernel data segment __KERNEL_DS. |
| 640 | * | 752 | * |
| 641 | * The privilege level is packed into the lower bits. The Guest runs | 753 | * The privilege level is packed into the lower bits. The Guest runs |
| 642 | * at privilege level 1 (GUEST_PL).*/ | 754 | * at privilege level 1 (GUEST_PL). |
| 755 | */ | ||
| 643 | regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; | 756 | regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; |
| 644 | regs->cs = __KERNEL_CS|GUEST_PL; | 757 | regs->cs = __KERNEL_CS|GUEST_PL; |
| 645 | 758 | ||
| 646 | /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002) | 759 | /* |
| 760 | * The "eflags" register contains miscellaneous flags. Bit 1 (0x002) | ||
| 647 | * is supposed to always be "1". Bit 9 (0x200) controls whether | 761 | * is supposed to always be "1". Bit 9 (0x200) controls whether |
| 648 | * interrupts are enabled. We always leave interrupts enabled while | 762 | * interrupts are enabled. We always leave interrupts enabled while |
| 649 | * running the Guest. */ | 763 | * running the Guest. |
| 764 | */ | ||
| 650 | regs->eflags = X86_EFLAGS_IF | 0x2; | 765 | regs->eflags = X86_EFLAGS_IF | 0x2; |
| 651 | 766 | ||
| 652 | /* The "Extended Instruction Pointer" register says where the Guest is | 767 | /* |
| 653 | * running. */ | 768 | * The "Extended Instruction Pointer" register says where the Guest is |
| 769 | * running. | ||
| 770 | */ | ||
| 654 | regs->eip = start; | 771 | regs->eip = start; |
| 655 | 772 | ||
| 656 | /* %esi points to our boot information, at physical address 0, so don't | 773 | /* |
| 657 | * touch it. */ | 774 | * %esi points to our boot information, at physical address 0, so don't |
| 775 | * touch it. | ||
| 776 | */ | ||
| 658 | 777 | ||
| 659 | /* There are a couple of GDT entries the Guest expects when first | 778 | /* There are a couple of GDT entries the Guest expects at boot. */ |
| 660 | * booting. */ | ||
| 661 | setup_guest_gdt(cpu); | 779 | setup_guest_gdt(cpu); |
| 662 | } | 780 | } |
diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S index 3fc15318a80f..40634b0db9f7 100644 --- a/drivers/lguest/x86/switcher_32.S +++ b/drivers/lguest/x86/switcher_32.S | |||
| @@ -1,12 +1,15 @@ | |||
| 1 | /*P:900 This is the Switcher: code which sits at 0xFFC00000 astride both the | 1 | /*P:900 |
| 2 | * Host and Guest to do the low-level Guest<->Host switch. It is as simple as | 2 | * This is the Switcher: code which sits at 0xFFC00000 (or 0xFFE00000) astride |
| 3 | * it can be made, but it's naturally very specific to x86. | 3 | * both the Host and Guest to do the low-level Guest<->Host switch. It is as |
| 4 | * simple as it can be made, but it's naturally very specific to x86. | ||
| 4 | * | 5 | * |
| 5 | * You have now completed Preparation. If this has whet your appetite; if you | 6 | * You have now completed Preparation. If this has whet your appetite; if you |
| 6 | * are feeling invigorated and refreshed then the next, more challenging stage | 7 | * are feeling invigorated and refreshed then the next, more challenging stage |
| 7 | * can be found in "make Guest". :*/ | 8 | * can be found in "make Guest". |
| 9 | :*/ | ||
| 8 | 10 | ||
| 9 | /*M:012 Lguest is meant to be simple: my rule of thumb is that 1% more LOC must | 11 | /*M:012 |
| 12 | * Lguest is meant to be simple: my rule of thumb is that 1% more LOC must | ||
| 10 | * gain at least 1% more performance. Since neither LOC nor performance can be | 13 | * gain at least 1% more performance. Since neither LOC nor performance can be |
| 11 | * measured beforehand, it generally means implementing a feature then deciding | 14 | * measured beforehand, it generally means implementing a feature then deciding |
| 12 | * if it's worth it. And once it's implemented, who can say no? | 15 | * if it's worth it. And once it's implemented, who can say no? |
| @@ -31,11 +34,14 @@ | |||
| 31 | * Host (which is actually really easy). | 34 | * Host (which is actually really easy). |
| 32 | * | 35 | * |
| 33 | * Two questions remain. Would the performance gain outweigh the complexity? | 36 | * Two questions remain. Would the performance gain outweigh the complexity? |
| 34 | * And who would write the verse documenting it? :*/ | 37 | * And who would write the verse documenting it? |
| 38 | :*/ | ||
| 35 | 39 | ||
| 36 | /*M:011 Lguest64 handles NMI. This gave me NMI envy (until I looked at their | 40 | /*M:011 |
| 41 | * Lguest64 handles NMI. This gave me NMI envy (until I looked at their | ||
| 37 | * code). It's worth doing though, since it would let us use oprofile in the | 42 | * code). It's worth doing though, since it would let us use oprofile in the |
| 38 | * Host when a Guest is running. :*/ | 43 | * Host when a Guest is running. |
| 44 | :*/ | ||
| 39 | 45 | ||
| 40 | /*S:100 | 46 | /*S:100 |
| 41 | * Welcome to the Switcher itself! | 47 | * Welcome to the Switcher itself! |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5810fa906af0..5fe39c2a3d2b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
| @@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev) | |||
| 220 | mddev->queue->unplug_fn = linear_unplug; | 220 | mddev->queue->unplug_fn = linear_unplug; |
| 221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; | 221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; |
| 222 | mddev->queue->backing_dev_info.congested_data = mddev; | 222 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 223 | md_integrity_register(mddev); | ||
| 223 | return 0; | 224 | return 0; |
| 224 | } | 225 | } |
| 225 | 226 | ||
| @@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 256 | rcu_assign_pointer(mddev->private, newconf); | 257 | rcu_assign_pointer(mddev->private, newconf); |
| 257 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 258 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
| 258 | set_capacity(mddev->gendisk, mddev->array_sectors); | 259 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 260 | revalidate_disk(mddev->gendisk); | ||
| 259 | call_rcu(&oldconf->rcu, free_conf); | 261 | call_rcu(&oldconf->rcu, free_conf); |
| 260 | return 0; | 262 | return 0; |
| 261 | } | 263 | } |
diff --git a/drivers/md/md.c b/drivers/md/md.c index d4351ff0849f..5b98bea4ff9b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -1308,7 +1308,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1308 | } | 1308 | } |
| 1309 | if (mddev->level != LEVEL_MULTIPATH) { | 1309 | if (mddev->level != LEVEL_MULTIPATH) { |
| 1310 | int role; | 1310 | int role; |
| 1311 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1311 | if (rdev->desc_nr < 0 || |
| 1312 | rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { | ||
| 1313 | role = 0xffff; | ||
| 1314 | rdev->desc_nr = -1; | ||
| 1315 | } else | ||
| 1316 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | ||
| 1312 | switch(role) { | 1317 | switch(role) { |
| 1313 | case 0xffff: /* spare */ | 1318 | case 0xffff: /* spare */ |
| 1314 | break; | 1319 | break; |
| @@ -1394,8 +1399,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1394 | if (rdev2->desc_nr+1 > max_dev) | 1399 | if (rdev2->desc_nr+1 > max_dev) |
| 1395 | max_dev = rdev2->desc_nr+1; | 1400 | max_dev = rdev2->desc_nr+1; |
| 1396 | 1401 | ||
| 1397 | if (max_dev > le32_to_cpu(sb->max_dev)) | 1402 | if (max_dev > le32_to_cpu(sb->max_dev)) { |
| 1403 | int bmask; | ||
| 1398 | sb->max_dev = cpu_to_le32(max_dev); | 1404 | sb->max_dev = cpu_to_le32(max_dev); |
| 1405 | rdev->sb_size = max_dev * 2 + 256; | ||
| 1406 | bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; | ||
| 1407 | if (rdev->sb_size & bmask) | ||
| 1408 | rdev->sb_size = (rdev->sb_size | bmask) + 1; | ||
| 1409 | } | ||
| 1399 | for (i=0; i<max_dev;i++) | 1410 | for (i=0; i<max_dev;i++) |
| 1400 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1411 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
| 1401 | 1412 | ||
| @@ -1487,37 +1498,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
| 1487 | 1498 | ||
| 1488 | static LIST_HEAD(pending_raid_disks); | 1499 | static LIST_HEAD(pending_raid_disks); |
| 1489 | 1500 | ||
| 1490 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | 1501 | /* |
| 1502 | * Try to register data integrity profile for an mddev | ||
| 1503 | * | ||
| 1504 | * This is called when an array is started and after a disk has been kicked | ||
| 1505 | * from the array. It only succeeds if all working and active component devices | ||
| 1506 | * are integrity capable with matching profiles. | ||
| 1507 | */ | ||
| 1508 | int md_integrity_register(mddev_t *mddev) | ||
| 1509 | { | ||
| 1510 | mdk_rdev_t *rdev, *reference = NULL; | ||
| 1511 | |||
| 1512 | if (list_empty(&mddev->disks)) | ||
| 1513 | return 0; /* nothing to do */ | ||
| 1514 | if (blk_get_integrity(mddev->gendisk)) | ||
| 1515 | return 0; /* already registered */ | ||
| 1516 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
| 1517 | /* skip spares and non-functional disks */ | ||
| 1518 | if (test_bit(Faulty, &rdev->flags)) | ||
| 1519 | continue; | ||
| 1520 | if (rdev->raid_disk < 0) | ||
| 1521 | continue; | ||
| 1522 | /* | ||
| 1523 | * If at least one rdev is not integrity capable, we can not | ||
| 1524 | * enable data integrity for the md device. | ||
| 1525 | */ | ||
| 1526 | if (!bdev_get_integrity(rdev->bdev)) | ||
| 1527 | return -EINVAL; | ||
| 1528 | if (!reference) { | ||
| 1529 | /* Use the first rdev as the reference */ | ||
| 1530 | reference = rdev; | ||
| 1531 | continue; | ||
| 1532 | } | ||
| 1533 | /* does this rdev's profile match the reference profile? */ | ||
| 1534 | if (blk_integrity_compare(reference->bdev->bd_disk, | ||
| 1535 | rdev->bdev->bd_disk) < 0) | ||
| 1536 | return -EINVAL; | ||
| 1537 | } | ||
| 1538 | /* | ||
| 1539 | * All component devices are integrity capable and have matching | ||
| 1540 | * profiles, register the common profile for the md device. | ||
| 1541 | */ | ||
| 1542 | if (blk_integrity_register(mddev->gendisk, | ||
| 1543 | bdev_get_integrity(reference->bdev)) != 0) { | ||
| 1544 | printk(KERN_ERR "md: failed to register integrity for %s\n", | ||
| 1545 | mdname(mddev)); | ||
| 1546 | return -EINVAL; | ||
| 1547 | } | ||
| 1548 | printk(KERN_NOTICE "md: data integrity on %s enabled\n", | ||
| 1549 | mdname(mddev)); | ||
| 1550 | return 0; | ||
| 1551 | } | ||
| 1552 | EXPORT_SYMBOL(md_integrity_register); | ||
| 1553 | |||
| 1554 | /* Disable data integrity if non-capable/non-matching disk is being added */ | ||
| 1555 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | ||
| 1491 | { | 1556 | { |
| 1492 | struct mdk_personality *pers = mddev->pers; | ||
| 1493 | struct gendisk *disk = mddev->gendisk; | ||
| 1494 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | 1557 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); |
| 1495 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | 1558 | struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); |
| 1496 | 1559 | ||
| 1497 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | 1560 | if (!bi_mddev) /* nothing to do */ |
| 1498 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
| 1499 | return; | 1561 | return; |
| 1500 | 1562 | if (rdev->raid_disk < 0) /* skip spares */ | |
| 1501 | /* If rdev is integrity capable, register profile for mddev */ | ||
| 1502 | if (!bi_mddev && bi_rdev) { | ||
| 1503 | if (blk_integrity_register(disk, bi_rdev)) | ||
| 1504 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
| 1505 | __func__, disk->disk_name); | ||
| 1506 | else | ||
| 1507 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
| 1508 | disk->disk_name); | ||
| 1509 | return; | 1563 | return; |
| 1510 | } | 1564 | if (bi_rdev && blk_integrity_compare(mddev->gendisk, |
| 1511 | 1565 | rdev->bdev->bd_disk) >= 0) | |
| 1512 | /* Check that mddev and rdev have matching profiles */ | 1566 | return; |
| 1513 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | 1567 | printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); |
| 1514 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | 1568 | blk_integrity_unregister(mddev->gendisk); |
| 1515 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
| 1516 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
| 1517 | disk->disk_name); | ||
| 1518 | blk_integrity_unregister(disk); | ||
| 1519 | } | ||
| 1520 | } | 1569 | } |
| 1570 | EXPORT_SYMBOL(md_integrity_add_rdev); | ||
| 1521 | 1571 | ||
| 1522 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1572 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
| 1523 | { | 1573 | { |
| @@ -1591,7 +1641,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
| 1591 | /* May as well allow recovery to be retried once */ | 1641 | /* May as well allow recovery to be retried once */ |
| 1592 | mddev->recovery_disabled = 0; | 1642 | mddev->recovery_disabled = 0; |
| 1593 | 1643 | ||
| 1594 | md_integrity_check(rdev, mddev); | ||
| 1595 | return 0; | 1644 | return 0; |
| 1596 | 1645 | ||
| 1597 | fail: | 1646 | fail: |
| @@ -2657,6 +2706,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2657 | ssize_t rv = len; | 2706 | ssize_t rv = len; |
| 2658 | struct mdk_personality *pers; | 2707 | struct mdk_personality *pers; |
| 2659 | void *priv; | 2708 | void *priv; |
| 2709 | mdk_rdev_t *rdev; | ||
| 2660 | 2710 | ||
| 2661 | if (mddev->pers == NULL) { | 2711 | if (mddev->pers == NULL) { |
| 2662 | if (len == 0) | 2712 | if (len == 0) |
| @@ -2736,6 +2786,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2736 | mddev_suspend(mddev); | 2786 | mddev_suspend(mddev); |
| 2737 | mddev->pers->stop(mddev); | 2787 | mddev->pers->stop(mddev); |
| 2738 | module_put(mddev->pers->owner); | 2788 | module_put(mddev->pers->owner); |
| 2789 | /* Invalidate devices that are now superfluous */ | ||
| 2790 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
| 2791 | if (rdev->raid_disk >= mddev->raid_disks) { | ||
| 2792 | rdev->raid_disk = -1; | ||
| 2793 | clear_bit(In_sync, &rdev->flags); | ||
| 2794 | } | ||
| 2739 | mddev->pers = pers; | 2795 | mddev->pers = pers; |
| 2740 | mddev->private = priv; | 2796 | mddev->private = priv; |
| 2741 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2797 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
| @@ -3685,17 +3741,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3685 | 3741 | ||
| 3686 | mddev->array_sectors = sectors; | 3742 | mddev->array_sectors = sectors; |
| 3687 | set_capacity(mddev->gendisk, mddev->array_sectors); | 3743 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 3688 | if (mddev->pers) { | 3744 | if (mddev->pers) |
| 3689 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | 3745 | revalidate_disk(mddev->gendisk); |
| 3690 | |||
| 3691 | if (bdev) { | ||
| 3692 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
| 3693 | i_size_write(bdev->bd_inode, | ||
| 3694 | (loff_t)mddev->array_sectors << 9); | ||
| 3695 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
| 3696 | bdput(bdev); | ||
| 3697 | } | ||
| 3698 | } | ||
| 3699 | 3746 | ||
| 3700 | return len; | 3747 | return len; |
| 3701 | } | 3748 | } |
| @@ -4048,10 +4095,6 @@ static int do_md_run(mddev_t * mddev) | |||
| 4048 | } | 4095 | } |
| 4049 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4096 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
| 4050 | 4097 | ||
| 4051 | if (pers->level >= 4 && pers->level <= 6) | ||
| 4052 | /* Cannot support integrity (yet) */ | ||
| 4053 | blk_integrity_unregister(mddev->gendisk); | ||
| 4054 | |||
| 4055 | if (mddev->reshape_position != MaxSector && | 4098 | if (mddev->reshape_position != MaxSector && |
| 4056 | pers->start_reshape == NULL) { | 4099 | pers->start_reshape == NULL) { |
| 4057 | /* This personality cannot handle reshaping... */ | 4100 | /* This personality cannot handle reshaping... */ |
| @@ -4189,6 +4232,7 @@ static int do_md_run(mddev_t * mddev) | |||
| 4189 | md_wakeup_thread(mddev->thread); | 4232 | md_wakeup_thread(mddev->thread); |
| 4190 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4233 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
| 4191 | 4234 | ||
| 4235 | revalidate_disk(mddev->gendisk); | ||
| 4192 | mddev->changed = 1; | 4236 | mddev->changed = 1; |
| 4193 | md_new_event(mddev); | 4237 | md_new_event(mddev); |
| 4194 | sysfs_notify_dirent(mddev->sysfs_state); | 4238 | sysfs_notify_dirent(mddev->sysfs_state); |
| @@ -5087,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
| 5087 | return -ENOSPC; | 5131 | return -ENOSPC; |
| 5088 | } | 5132 | } |
| 5089 | rv = mddev->pers->resize(mddev, num_sectors); | 5133 | rv = mddev->pers->resize(mddev, num_sectors); |
| 5090 | if (!rv) { | 5134 | if (!rv) |
| 5091 | struct block_device *bdev; | 5135 | revalidate_disk(mddev->gendisk); |
| 5092 | |||
| 5093 | bdev = bdget_disk(mddev->gendisk, 0); | ||
| 5094 | if (bdev) { | ||
| 5095 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
| 5096 | i_size_write(bdev->bd_inode, | ||
| 5097 | (loff_t)mddev->array_sectors << 9); | ||
| 5098 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
| 5099 | bdput(bdev); | ||
| 5100 | } | ||
| 5101 | } | ||
| 5102 | return rv; | 5136 | return rv; |
| 5103 | } | 5137 | } |
| 5104 | 5138 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index 9430a110db93..78f03168baf9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -431,5 +431,7 @@ extern int md_allow_write(mddev_t *mddev); | |||
| 431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
| 432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); | 432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); |
| 433 | extern int md_check_no_bitmap(mddev_t *mddev); | 433 | extern int md_check_no_bitmap(mddev_t *mddev); |
| 434 | extern int md_integrity_register(mddev_t *mddev); | ||
| 435 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | ||
| 434 | 436 | ||
| 435 | #endif /* _MD_MD_H */ | 437 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 237fe3fd235c..7140909f6662 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
| @@ -313,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 313 | set_bit(In_sync, &rdev->flags); | 313 | set_bit(In_sync, &rdev->flags); |
| 314 | rcu_assign_pointer(p->rdev, rdev); | 314 | rcu_assign_pointer(p->rdev, rdev); |
| 315 | err = 0; | 315 | err = 0; |
| 316 | md_integrity_add_rdev(rdev, mddev); | ||
| 316 | break; | 317 | break; |
| 317 | } | 318 | } |
| 318 | 319 | ||
| @@ -345,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number) | |||
| 345 | /* lost the race, try later */ | 346 | /* lost the race, try later */ |
| 346 | err = -EBUSY; | 347 | err = -EBUSY; |
| 347 | p->rdev = rdev; | 348 | p->rdev = rdev; |
| 349 | goto abort; | ||
| 348 | } | 350 | } |
| 351 | md_integrity_register(mddev); | ||
| 349 | } | 352 | } |
| 350 | abort: | 353 | abort: |
| 351 | 354 | ||
| @@ -519,7 +522,7 @@ static int multipath_run (mddev_t *mddev) | |||
| 519 | mddev->queue->unplug_fn = multipath_unplug; | 522 | mddev->queue->unplug_fn = multipath_unplug; |
| 520 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; | 523 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; |
| 521 | mddev->queue->backing_dev_info.congested_data = mddev; | 524 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 522 | 525 | md_integrity_register(mddev); | |
| 523 | return 0; | 526 | return 0; |
| 524 | 527 | ||
| 525 | out_free_conf: | 528 | out_free_conf: |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 335f490dcad6..898e2bdfee47 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
| @@ -351,6 +351,7 @@ static int raid0_run(mddev_t *mddev) | |||
| 351 | 351 | ||
| 352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | 352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); |
| 353 | dump_zones(mddev); | 353 | dump_zones(mddev); |
| 354 | md_integrity_register(mddev); | ||
| 354 | return 0; | 355 | return 0; |
| 355 | } | 356 | } |
| 356 | 357 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0569efba0c02..8726fd7ebce5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1144 | rcu_assign_pointer(p->rdev, rdev); | 1144 | rcu_assign_pointer(p->rdev, rdev); |
| 1145 | break; | 1145 | break; |
| 1146 | } | 1146 | } |
| 1147 | 1147 | md_integrity_add_rdev(rdev, mddev); | |
| 1148 | print_conf(conf); | 1148 | print_conf(conf); |
| 1149 | return err; | 1149 | return err; |
| 1150 | } | 1150 | } |
| @@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
| 1178 | /* lost the race, try later */ | 1178 | /* lost the race, try later */ |
| 1179 | err = -EBUSY; | 1179 | err = -EBUSY; |
| 1180 | p->rdev = rdev; | 1180 | p->rdev = rdev; |
| 1181 | goto abort; | ||
| 1181 | } | 1182 | } |
| 1183 | md_integrity_register(mddev); | ||
| 1182 | } | 1184 | } |
| 1183 | abort: | 1185 | abort: |
| 1184 | 1186 | ||
| @@ -2067,7 +2069,7 @@ static int run(mddev_t *mddev) | |||
| 2067 | mddev->queue->unplug_fn = raid1_unplug; | 2069 | mddev->queue->unplug_fn = raid1_unplug; |
| 2068 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2070 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
| 2069 | mddev->queue->backing_dev_info.congested_data = mddev; | 2071 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 2070 | 2072 | md_integrity_register(mddev); | |
| 2071 | return 0; | 2073 | return 0; |
| 2072 | 2074 | ||
| 2073 | out_no_mem: | 2075 | out_no_mem: |
| @@ -2132,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) | |||
| 2132 | return -EINVAL; | 2134 | return -EINVAL; |
| 2133 | set_capacity(mddev->gendisk, mddev->array_sectors); | 2135 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 2134 | mddev->changed = 1; | 2136 | mddev->changed = 1; |
| 2137 | revalidate_disk(mddev->gendisk); | ||
| 2135 | if (sectors > mddev->dev_sectors && | 2138 | if (sectors > mddev->dev_sectors && |
| 2136 | mddev->recovery_cp == MaxSector) { | 2139 | mddev->recovery_cp == MaxSector) { |
| 2137 | mddev->recovery_cp = mddev->dev_sectors; | 2140 | mddev->recovery_cp = mddev->dev_sectors; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7298a5e5a183..3d9020cf6f6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1170 | break; | 1170 | break; |
| 1171 | } | 1171 | } |
| 1172 | 1172 | ||
| 1173 | md_integrity_add_rdev(rdev, mddev); | ||
| 1173 | print_conf(conf); | 1174 | print_conf(conf); |
| 1174 | return err; | 1175 | return err; |
| 1175 | } | 1176 | } |
| @@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) | |||
| 1203 | /* lost the race, try later */ | 1204 | /* lost the race, try later */ |
| 1204 | err = -EBUSY; | 1205 | err = -EBUSY; |
| 1205 | p->rdev = rdev; | 1206 | p->rdev = rdev; |
| 1207 | goto abort; | ||
| 1206 | } | 1208 | } |
| 1209 | md_integrity_register(mddev); | ||
| 1207 | } | 1210 | } |
| 1208 | abort: | 1211 | abort: |
| 1209 | 1212 | ||
| @@ -2225,6 +2228,7 @@ static int run(mddev_t *mddev) | |||
| 2225 | 2228 | ||
| 2226 | if (conf->near_copies < mddev->raid_disks) | 2229 | if (conf->near_copies < mddev->raid_disks) |
| 2227 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | 2230 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
| 2231 | md_integrity_register(mddev); | ||
| 2228 | return 0; | 2232 | return 0; |
| 2229 | 2233 | ||
| 2230 | out_free_conf: | 2234 | out_free_conf: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37835538b58e..2b521ee67dfa 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
| 3999 | return 0; | 3999 | return 0; |
| 4000 | } | 4000 | } |
| 4001 | 4001 | ||
| 4002 | /* Allow raid5_quiesce to complete */ | ||
| 4003 | wait_event(conf->wait_for_overlap, conf->quiesce != 2); | ||
| 4004 | |||
| 4002 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4005 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
| 4003 | return reshape_request(mddev, sector_nr, skipped); | 4006 | return reshape_request(mddev, sector_nr, skipped); |
| 4004 | 4007 | ||
| @@ -4316,6 +4319,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
| 4316 | return sectors * (raid_disks - conf->max_degraded); | 4319 | return sectors * (raid_disks - conf->max_degraded); |
| 4317 | } | 4320 | } |
| 4318 | 4321 | ||
| 4322 | static void free_conf(raid5_conf_t *conf) | ||
| 4323 | { | ||
| 4324 | shrink_stripes(conf); | ||
| 4325 | safe_put_page(conf->spare_page); | ||
| 4326 | kfree(conf->disks); | ||
| 4327 | kfree(conf->stripe_hashtbl); | ||
| 4328 | kfree(conf); | ||
| 4329 | } | ||
| 4330 | |||
| 4319 | static raid5_conf_t *setup_conf(mddev_t *mddev) | 4331 | static raid5_conf_t *setup_conf(mddev_t *mddev) |
| 4320 | { | 4332 | { |
| 4321 | raid5_conf_t *conf; | 4333 | raid5_conf_t *conf; |
| @@ -4447,11 +4459,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4447 | 4459 | ||
| 4448 | abort: | 4460 | abort: |
| 4449 | if (conf) { | 4461 | if (conf) { |
| 4450 | shrink_stripes(conf); | 4462 | free_conf(conf); |
| 4451 | safe_put_page(conf->spare_page); | ||
| 4452 | kfree(conf->disks); | ||
| 4453 | kfree(conf->stripe_hashtbl); | ||
| 4454 | kfree(conf); | ||
| 4455 | return ERR_PTR(-EIO); | 4463 | return ERR_PTR(-EIO); |
| 4456 | } else | 4464 | } else |
| 4457 | return ERR_PTR(-ENOMEM); | 4465 | return ERR_PTR(-ENOMEM); |
| @@ -4629,12 +4637,8 @@ abort: | |||
| 4629 | md_unregister_thread(mddev->thread); | 4637 | md_unregister_thread(mddev->thread); |
| 4630 | mddev->thread = NULL; | 4638 | mddev->thread = NULL; |
| 4631 | if (conf) { | 4639 | if (conf) { |
| 4632 | shrink_stripes(conf); | ||
| 4633 | print_raid5_conf(conf); | 4640 | print_raid5_conf(conf); |
| 4634 | safe_put_page(conf->spare_page); | 4641 | free_conf(conf); |
| 4635 | kfree(conf->disks); | ||
| 4636 | kfree(conf->stripe_hashtbl); | ||
| 4637 | kfree(conf); | ||
| 4638 | } | 4642 | } |
| 4639 | mddev->private = NULL; | 4643 | mddev->private = NULL; |
| 4640 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); | 4644 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); |
| @@ -4649,13 +4653,10 @@ static int stop(mddev_t *mddev) | |||
| 4649 | 4653 | ||
| 4650 | md_unregister_thread(mddev->thread); | 4654 | md_unregister_thread(mddev->thread); |
| 4651 | mddev->thread = NULL; | 4655 | mddev->thread = NULL; |
| 4652 | shrink_stripes(conf); | ||
| 4653 | kfree(conf->stripe_hashtbl); | ||
| 4654 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4656 | mddev->queue->backing_dev_info.congested_fn = NULL; |
| 4655 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 4657 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
| 4656 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); | 4658 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); |
| 4657 | kfree(conf->disks); | 4659 | free_conf(conf); |
| 4658 | kfree(conf); | ||
| 4659 | mddev->private = NULL; | 4660 | mddev->private = NULL; |
| 4660 | return 0; | 4661 | return 0; |
| 4661 | } | 4662 | } |
| @@ -4857,6 +4858,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
| 4857 | return -EINVAL; | 4858 | return -EINVAL; |
| 4858 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4859 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 4859 | mddev->changed = 1; | 4860 | mddev->changed = 1; |
| 4861 | revalidate_disk(mddev->gendisk); | ||
| 4860 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { | 4862 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { |
| 4861 | mddev->recovery_cp = mddev->dev_sectors; | 4863 | mddev->recovery_cp = mddev->dev_sectors; |
| 4862 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4864 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| @@ -5002,7 +5004,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
| 5002 | spin_unlock_irqrestore(&conf->device_lock, flags); | 5004 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 5003 | } | 5005 | } |
| 5004 | mddev->raid_disks = conf->raid_disks; | 5006 | mddev->raid_disks = conf->raid_disks; |
| 5005 | mddev->reshape_position = 0; | 5007 | mddev->reshape_position = conf->reshape_progress; |
| 5006 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5008 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
| 5007 | 5009 | ||
| 5008 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 5010 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
| @@ -5057,7 +5059,6 @@ static void end_reshape(raid5_conf_t *conf) | |||
| 5057 | */ | 5059 | */ |
| 5058 | static void raid5_finish_reshape(mddev_t *mddev) | 5060 | static void raid5_finish_reshape(mddev_t *mddev) |
| 5059 | { | 5061 | { |
| 5060 | struct block_device *bdev; | ||
| 5061 | raid5_conf_t *conf = mddev->private; | 5062 | raid5_conf_t *conf = mddev->private; |
| 5062 | 5063 | ||
| 5063 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5064 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
| @@ -5066,15 +5067,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
| 5066 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5067 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
| 5067 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5068 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 5068 | mddev->changed = 1; | 5069 | mddev->changed = 1; |
| 5069 | 5070 | revalidate_disk(mddev->gendisk); | |
| 5070 | bdev = bdget_disk(mddev->gendisk, 0); | ||
| 5071 | if (bdev) { | ||
| 5072 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
| 5073 | i_size_write(bdev->bd_inode, | ||
| 5074 | (loff_t)mddev->array_sectors << 9); | ||
| 5075 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
| 5076 | bdput(bdev); | ||
| 5077 | } | ||
| 5078 | } else { | 5071 | } else { |
| 5079 | int d; | 5072 | int d; |
| 5080 | mddev->degraded = conf->raid_disks; | 5073 | mddev->degraded = conf->raid_disks; |
| @@ -5106,12 +5099,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) | |||
| 5106 | 5099 | ||
| 5107 | case 1: /* stop all writes */ | 5100 | case 1: /* stop all writes */ |
| 5108 | spin_lock_irq(&conf->device_lock); | 5101 | spin_lock_irq(&conf->device_lock); |
| 5109 | conf->quiesce = 1; | 5102 | /* '2' tells resync/reshape to pause so that all |
| 5103 | * active stripes can drain | ||
| 5104 | */ | ||
| 5105 | conf->quiesce = 2; | ||
| 5110 | wait_event_lock_irq(conf->wait_for_stripe, | 5106 | wait_event_lock_irq(conf->wait_for_stripe, |
| 5111 | atomic_read(&conf->active_stripes) == 0 && | 5107 | atomic_read(&conf->active_stripes) == 0 && |
| 5112 | atomic_read(&conf->active_aligned_reads) == 0, | 5108 | atomic_read(&conf->active_aligned_reads) == 0, |
| 5113 | conf->device_lock, /* nothing */); | 5109 | conf->device_lock, /* nothing */); |
| 5110 | conf->quiesce = 1; | ||
| 5114 | spin_unlock_irq(&conf->device_lock); | 5111 | spin_unlock_irq(&conf->device_lock); |
| 5112 | /* allow reshape to continue */ | ||
| 5113 | wake_up(&conf->wait_for_overlap); | ||
| 5115 | break; | 5114 | break; |
| 5116 | 5115 | ||
| 5117 | case 0: /* re-enable writes */ | 5116 | case 0: /* re-enable writes */ |
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index bae61b22501c..7d430835655f 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c | |||
| @@ -180,14 +180,9 @@ static struct completion irq_event; | |||
| 180 | static int twl4030_irq_thread(void *data) | 180 | static int twl4030_irq_thread(void *data) |
| 181 | { | 181 | { |
| 182 | long irq = (long)data; | 182 | long irq = (long)data; |
| 183 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 184 | static unsigned i2c_errors; | 183 | static unsigned i2c_errors; |
| 185 | static const unsigned max_i2c_errors = 100; | 184 | static const unsigned max_i2c_errors = 100; |
| 186 | 185 | ||
| 187 | if (!desc) { | ||
| 188 | pr_err("twl4030: Invalid IRQ: %ld\n", irq); | ||
| 189 | return -EINVAL; | ||
| 190 | } | ||
| 191 | 186 | ||
| 192 | current->flags |= PF_NOFREEZE; | 187 | current->flags |= PF_NOFREEZE; |
| 193 | 188 | ||
| @@ -240,7 +235,7 @@ static int twl4030_irq_thread(void *data) | |||
| 240 | } | 235 | } |
| 241 | local_irq_enable(); | 236 | local_irq_enable(); |
| 242 | 237 | ||
| 243 | desc->chip->unmask(irq); | 238 | enable_irq(irq); |
| 244 | } | 239 | } |
| 245 | 240 | ||
| 246 | return 0; | 241 | return 0; |
| @@ -255,25 +250,13 @@ static int twl4030_irq_thread(void *data) | |||
| 255 | * thread. All we do here is acknowledge and mask the interrupt and wakeup | 250 | * thread. All we do here is acknowledge and mask the interrupt and wakeup |
| 256 | * the kernel thread. | 251 | * the kernel thread. |
| 257 | */ | 252 | */ |
| 258 | static void handle_twl4030_pih(unsigned int irq, struct irq_desc *desc) | 253 | static irqreturn_t handle_twl4030_pih(int irq, void *devid) |
| 259 | { | 254 | { |
| 260 | /* Acknowledge, clear *AND* mask the interrupt... */ | 255 | /* Acknowledge, clear *AND* mask the interrupt... */ |
| 261 | desc->chip->ack(irq); | 256 | disable_irq_nosync(irq); |
| 262 | complete(&irq_event); | 257 | complete(devid); |
| 263 | } | 258 | return IRQ_HANDLED; |
| 264 | |||
| 265 | static struct task_struct *start_twl4030_irq_thread(long irq) | ||
| 266 | { | ||
| 267 | struct task_struct *thread; | ||
| 268 | |||
| 269 | init_completion(&irq_event); | ||
| 270 | thread = kthread_run(twl4030_irq_thread, (void *)irq, "twl4030-irq"); | ||
| 271 | if (!thread) | ||
| 272 | pr_err("twl4030: could not create irq %ld thread!\n", irq); | ||
| 273 | |||
| 274 | return thread; | ||
| 275 | } | 259 | } |
| 276 | |||
| 277 | /*----------------------------------------------------------------------*/ | 260 | /*----------------------------------------------------------------------*/ |
| 278 | 261 | ||
| 279 | /* | 262 | /* |
| @@ -734,18 +717,28 @@ int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end) | |||
| 734 | } | 717 | } |
| 735 | 718 | ||
| 736 | /* install an irq handler to demultiplex the TWL4030 interrupt */ | 719 | /* install an irq handler to demultiplex the TWL4030 interrupt */ |
| 737 | task = start_twl4030_irq_thread(irq_num); | ||
| 738 | if (!task) { | ||
| 739 | pr_err("twl4030: irq thread FAIL\n"); | ||
| 740 | status = -ESRCH; | ||
| 741 | goto fail; | ||
| 742 | } | ||
| 743 | 720 | ||
| 744 | set_irq_data(irq_num, task); | ||
| 745 | set_irq_chained_handler(irq_num, handle_twl4030_pih); | ||
| 746 | 721 | ||
| 747 | return status; | 722 | init_completion(&irq_event); |
| 748 | 723 | ||
| 724 | status = request_irq(irq_num, handle_twl4030_pih, IRQF_DISABLED, | ||
| 725 | "TWL4030-PIH", &irq_event); | ||
| 726 | if (status < 0) { | ||
| 727 | pr_err("twl4030: could not claim irq%d: %d\n", irq_num, status); | ||
| 728 | goto fail_rqirq; | ||
| 729 | } | ||
| 730 | |||
| 731 | task = kthread_run(twl4030_irq_thread, (void *)irq_num, "twl4030-irq"); | ||
| 732 | if (IS_ERR(task)) { | ||
| 733 | pr_err("twl4030: could not create irq %d thread!\n", irq_num); | ||
| 734 | status = PTR_ERR(task); | ||
| 735 | goto fail_kthread; | ||
| 736 | } | ||
| 737 | return status; | ||
| 738 | fail_kthread: | ||
| 739 | free_irq(irq_num, &irq_event); | ||
| 740 | fail_rqirq: | ||
| 741 | /* clean up twl4030_sih_setup */ | ||
| 749 | fail: | 742 | fail: |
| 750 | for (i = irq_base; i < irq_end; i++) | 743 | for (i = irq_base; i < irq_end; i++) |
| 751 | set_irq_chip_and_handler(i, NULL, NULL); | 744 | set_irq_chip_and_handler(i, NULL, NULL); |
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c index d38a7acdb6ec..d019746551f3 100644 --- a/drivers/misc/cb710/sgbuf2.c +++ b/drivers/misc/cb710/sgbuf2.c | |||
| @@ -114,7 +114,6 @@ static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data) | |||
| 114 | if (!left) | 114 | if (!left) |
| 115 | return; | 115 | return; |
| 116 | addr += len; | 116 | addr += len; |
| 117 | flush_kernel_dcache_page(miter->page); | ||
| 118 | } while (sg_dwiter_next(miter)); | 117 | } while (sg_dwiter_next(miter)); |
| 119 | } | 118 | } |
| 120 | 119 | ||
| @@ -142,9 +141,6 @@ void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t da | |||
| 142 | return; | 141 | return; |
| 143 | } else | 142 | } else |
| 144 | sg_dwiter_write_slow(miter, data); | 143 | sg_dwiter_write_slow(miter, data); |
| 145 | |||
| 146 | if (miter->length == miter->consumed) | ||
| 147 | flush_kernel_dcache_page(miter->page); | ||
| 148 | } | 144 | } |
| 149 | EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); | 145 | EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); |
| 150 | 146 | ||
diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c index 11efefb1af51..4e72964a7b43 100644 --- a/drivers/mmc/host/cb710-mmc.c +++ b/drivers/mmc/host/cb710-mmc.c | |||
| @@ -278,7 +278,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data) | |||
| 278 | if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8))) | 278 | if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8))) |
| 279 | return -EINVAL; | 279 | return -EINVAL; |
| 280 | 280 | ||
| 281 | sg_miter_start(&miter, data->sg, data->sg_len, 0); | 281 | sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_TO_SG); |
| 282 | 282 | ||
| 283 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, | 283 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, |
| 284 | 15, CB710_MMC_C2_READ_PIO_SIZE_MASK); | 284 | 15, CB710_MMC_C2_READ_PIO_SIZE_MASK); |
| @@ -307,7 +307,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data) | |||
| 307 | goto out; | 307 | goto out; |
| 308 | } | 308 | } |
| 309 | out: | 309 | out: |
| 310 | cb710_sg_miter_stop_writing(&miter); | 310 | sg_miter_stop(&miter); |
| 311 | return err; | 311 | return err; |
| 312 | } | 312 | } |
| 313 | 313 | ||
| @@ -322,7 +322,7 @@ static int cb710_mmc_send(struct cb710_slot *slot, struct mmc_data *data) | |||
| 322 | if (unlikely(data->blocks > 1 && data->blksz & 15)) | 322 | if (unlikely(data->blocks > 1 && data->blksz & 15)) |
| 323 | return -EINVAL; | 323 | return -EINVAL; |
| 324 | 324 | ||
| 325 | sg_miter_start(&miter, data->sg, data->sg_len, 0); | 325 | sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_FROM_SG); |
| 326 | 326 | ||
| 327 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, | 327 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, |
| 328 | 0, CB710_MMC_C2_READ_PIO_SIZE_MASK); | 328 | 0, CB710_MMC_C2_READ_PIO_SIZE_MASK); |
diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c index e0be21a4a696..bf98d7cc928a 100644 --- a/drivers/mmc/host/imxmmc.c +++ b/drivers/mmc/host/imxmmc.c | |||
| @@ -652,7 +652,7 @@ static irqreturn_t imxmci_irq(int irq, void *devid) | |||
| 652 | set_bit(IMXMCI_PEND_STARTED_b, &host->pending_events); | 652 | set_bit(IMXMCI_PEND_STARTED_b, &host->pending_events); |
| 653 | tasklet_schedule(&host->tasklet); | 653 | tasklet_schedule(&host->tasklet); |
| 654 | 654 | ||
| 655 | return IRQ_RETVAL(handled);; | 655 | return IRQ_RETVAL(handled); |
| 656 | } | 656 | } |
| 657 | 657 | ||
| 658 | static void imxmci_tasklet_fnc(unsigned long data) | 658 | static void imxmci_tasklet_fnc(unsigned long data) |
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 62041c7e9246..fc96f8cb9c0b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c | |||
| @@ -773,8 +773,14 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) | |||
| 773 | } | 773 | } |
| 774 | 774 | ||
| 775 | if (!(host->flags & SDHCI_REQ_USE_DMA)) { | 775 | if (!(host->flags & SDHCI_REQ_USE_DMA)) { |
| 776 | sg_miter_start(&host->sg_miter, | 776 | int flags; |
| 777 | data->sg, data->sg_len, SG_MITER_ATOMIC); | 777 | |
| 778 | flags = SG_MITER_ATOMIC; | ||
| 779 | if (host->data->flags & MMC_DATA_READ) | ||
| 780 | flags |= SG_MITER_TO_SG; | ||
| 781 | else | ||
| 782 | flags |= SG_MITER_FROM_SG; | ||
| 783 | sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags); | ||
| 778 | host->blocks = data->blocks; | 784 | host->blocks = data->blocks; |
| 779 | } | 785 | } |
| 780 | 786 | ||
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c index 3e00fa8ea65f..4a7c32895be5 100644 --- a/drivers/net/3c515.c +++ b/drivers/net/3c515.c | |||
| @@ -832,7 +832,9 @@ static int corkscrew_open(struct net_device *dev) | |||
| 832 | skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ | 832 | skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ |
| 833 | vp->rx_ring[i].addr = isa_virt_to_bus(skb->data); | 833 | vp->rx_ring[i].addr = isa_virt_to_bus(skb->data); |
| 834 | } | 834 | } |
| 835 | vp->rx_ring[i - 1].next = isa_virt_to_bus(&vp->rx_ring[0]); /* Wrap the ring. */ | 835 | if (i != 0) |
| 836 | vp->rx_ring[i - 1].next = | ||
| 837 | isa_virt_to_bus(&vp->rx_ring[0]); /* Wrap the ring. */ | ||
| 836 | outl(isa_virt_to_bus(&vp->rx_ring[0]), ioaddr + UpListPtr); | 838 | outl(isa_virt_to_bus(&vp->rx_ring[0]), ioaddr + UpListPtr); |
| 837 | } | 839 | } |
| 838 | if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */ | 840 | if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */ |
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index c34aee91250b..c20416850948 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c | |||
| @@ -2721,13 +2721,15 @@ dump_tx_ring(struct net_device *dev) | |||
| 2721 | &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]); | 2721 | &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]); |
| 2722 | issue_and_wait(dev, DownStall); | 2722 | issue_and_wait(dev, DownStall); |
| 2723 | for (i = 0; i < TX_RING_SIZE; i++) { | 2723 | for (i = 0; i < TX_RING_SIZE; i++) { |
| 2724 | pr_err(" %d: @%p length %8.8x status %8.8x\n", i, | 2724 | unsigned int length; |
| 2725 | &vp->tx_ring[i], | 2725 | |
| 2726 | #if DO_ZEROCOPY | 2726 | #if DO_ZEROCOPY |
| 2727 | le32_to_cpu(vp->tx_ring[i].frag[0].length), | 2727 | length = le32_to_cpu(vp->tx_ring[i].frag[0].length); |
| 2728 | #else | 2728 | #else |
| 2729 | le32_to_cpu(vp->tx_ring[i].length), | 2729 | length = le32_to_cpu(vp->tx_ring[i].length); |
| 2730 | #endif | 2730 | #endif |
| 2731 | pr_err(" %d: @%p length %8.8x status %8.8x\n", | ||
| 2732 | i, &vp->tx_ring[i], length, | ||
| 2731 | le32_to_cpu(vp->tx_ring[i].status)); | 2733 | le32_to_cpu(vp->tx_ring[i].status)); |
| 2732 | } | 2734 | } |
| 2733 | if (!stalled) | 2735 | if (!stalled) |
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index 1686dca28748..1f016d66684a 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c | |||
| @@ -1474,13 +1474,13 @@ static void eexp_hw_init586(struct net_device *dev) | |||
| 1474 | outw(0x0000, ioaddr + 0x800c); | 1474 | outw(0x0000, ioaddr + 0x800c); |
| 1475 | outw(0x0000, ioaddr + 0x800e); | 1475 | outw(0x0000, ioaddr + 0x800e); |
| 1476 | 1476 | ||
| 1477 | for (i = 0; i < (sizeof(start_code)); i+=32) { | 1477 | for (i = 0; i < ARRAY_SIZE(start_code) * 2; i+=32) { |
| 1478 | int j; | 1478 | int j; |
| 1479 | outw(i, ioaddr + SM_PTR); | 1479 | outw(i, ioaddr + SM_PTR); |
| 1480 | for (j = 0; j < 16; j+=2) | 1480 | for (j = 0; j < 16 && (i+j)/2 < ARRAY_SIZE(start_code); j+=2) |
| 1481 | outw(start_code[(i+j)/2], | 1481 | outw(start_code[(i+j)/2], |
| 1482 | ioaddr+0x4000+j); | 1482 | ioaddr+0x4000+j); |
| 1483 | for (j = 0; j < 16; j+=2) | 1483 | for (j = 0; j < 16 && (i+j+16)/2 < ARRAY_SIZE(start_code); j+=2) |
| 1484 | outw(start_code[(i+j+16)/2], | 1484 | outw(start_code[(i+j+16)/2], |
| 1485 | ioaddr+0x8000+j); | 1485 | ioaddr+0x8000+j); |
| 1486 | } | 1486 | } |
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index 78952f8324e2..fa311a950996 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h | |||
| @@ -40,7 +40,7 @@ | |||
| 40 | #include <asm/io.h> | 40 | #include <asm/io.h> |
| 41 | 41 | ||
| 42 | #define DRV_NAME "ehea" | 42 | #define DRV_NAME "ehea" |
| 43 | #define DRV_VERSION "EHEA_0101" | 43 | #define DRV_VERSION "EHEA_0102" |
| 44 | 44 | ||
| 45 | /* eHEA capability flags */ | 45 | /* eHEA capability flags */ |
| 46 | #define DLPAR_PORT_ADD_REM 1 | 46 | #define DLPAR_PORT_ADD_REM 1 |
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index e8d46cc1bec2..977c3d358279 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c | |||
| @@ -1545,6 +1545,9 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr) | |||
| 1545 | { | 1545 | { |
| 1546 | int ret, i; | 1546 | int ret, i; |
| 1547 | 1547 | ||
| 1548 | if (pr->qp) | ||
| 1549 | netif_napi_del(&pr->napi); | ||
| 1550 | |||
| 1548 | ret = ehea_destroy_qp(pr->qp); | 1551 | ret = ehea_destroy_qp(pr->qp); |
| 1549 | 1552 | ||
| 1550 | if (!ret) { | 1553 | if (!ret) { |
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c index dbf06e9313cc..2234118eedbb 100644 --- a/drivers/net/gianfar_ethtool.c +++ b/drivers/net/gianfar_ethtool.c | |||
| @@ -366,9 +366,8 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals | |||
| 366 | return -EINVAL; | 366 | return -EINVAL; |
| 367 | } | 367 | } |
| 368 | 368 | ||
| 369 | priv->rxic = mk_ic_value( | 369 | priv->rxic = mk_ic_value(cvals->rx_max_coalesced_frames, |
| 370 | gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs), | 370 | gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs)); |
| 371 | cvals->rx_max_coalesced_frames); | ||
| 372 | 371 | ||
| 373 | /* Set up tx coalescing */ | 372 | /* Set up tx coalescing */ |
| 374 | if ((cvals->tx_coalesce_usecs == 0) || | 373 | if ((cvals->tx_coalesce_usecs == 0) || |
| @@ -390,9 +389,8 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals | |||
| 390 | return -EINVAL; | 389 | return -EINVAL; |
| 391 | } | 390 | } |
| 392 | 391 | ||
| 393 | priv->txic = mk_ic_value( | 392 | priv->txic = mk_ic_value(cvals->tx_max_coalesced_frames, |
| 394 | gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs), | 393 | gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs)); |
| 395 | cvals->tx_max_coalesced_frames); | ||
| 396 | 394 | ||
| 397 | gfar_write(&priv->regs->rxic, 0); | 395 | gfar_write(&priv->regs->rxic, 0); |
| 398 | if (priv->rxcoalescing) | 396 | if (priv->rxcoalescing) |
diff --git a/drivers/net/igbvf/vf.c b/drivers/net/igbvf/vf.c index 2a4faf9ade69..a9a61efa964c 100644 --- a/drivers/net/igbvf/vf.c +++ b/drivers/net/igbvf/vf.c | |||
| @@ -274,6 +274,8 @@ static s32 e1000_set_vfta_vf(struct e1000_hw *hw, u16 vid, bool set) | |||
| 274 | 274 | ||
| 275 | err = mbx->ops.read_posted(hw, msgbuf, 2); | 275 | err = mbx->ops.read_posted(hw, msgbuf, 2); |
| 276 | 276 | ||
| 277 | msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS; | ||
| 278 | |||
| 277 | /* if nacked the vlan was rejected */ | 279 | /* if nacked the vlan was rejected */ |
| 278 | if (!err && (msgbuf[0] == (E1000_VF_SET_VLAN | E1000_VT_MSGTYPE_NACK))) | 280 | if (!err && (msgbuf[0] == (E1000_VF_SET_VLAN | E1000_VT_MSGTYPE_NACK))) |
| 279 | err = -E1000_ERR_MAC_INIT; | 281 | err = -E1000_ERR_MAC_INIT; |
| @@ -317,6 +319,8 @@ static void e1000_rar_set_vf(struct e1000_hw *hw, u8 * addr, u32 index) | |||
| 317 | if (!ret_val) | 319 | if (!ret_val) |
| 318 | ret_val = mbx->ops.read_posted(hw, msgbuf, 3); | 320 | ret_val = mbx->ops.read_posted(hw, msgbuf, 3); |
| 319 | 321 | ||
| 322 | msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS; | ||
| 323 | |||
| 320 | /* if nacked the address was rejected, use "perm_addr" */ | 324 | /* if nacked the address was rejected, use "perm_addr" */ |
| 321 | if (!ret_val && | 325 | if (!ret_val && |
| 322 | (msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK))) | 326 | (msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK))) |
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 1b12c7ba275f..e11d83d5852b 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h | |||
| @@ -96,6 +96,8 @@ | |||
| 96 | #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 | 96 | #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 |
| 97 | #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 | 97 | #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 |
| 98 | 98 | ||
| 99 | #define IXGBE_MAX_RSC_INT_RATE 162760 | ||
| 100 | |||
| 99 | /* wrapper around a pointer to a socket buffer, | 101 | /* wrapper around a pointer to a socket buffer, |
| 100 | * so a DMA handle can be stored along with the buffer */ | 102 | * so a DMA handle can be stored along with the buffer */ |
| 101 | struct ixgbe_tx_buffer { | 103 | struct ixgbe_tx_buffer { |
diff --git a/drivers/net/ixgbe/ixgbe_82598.c b/drivers/net/ixgbe/ixgbe_82598.c index b9923047ce11..522c03bc1dad 100644 --- a/drivers/net/ixgbe/ixgbe_82598.c +++ b/drivers/net/ixgbe/ixgbe_82598.c | |||
| @@ -50,6 +50,51 @@ static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, | |||
| 50 | u8 *eeprom_data); | 50 | u8 *eeprom_data); |
| 51 | 51 | ||
| 52 | /** | 52 | /** |
| 53 | * ixgbe_set_pcie_completion_timeout - set pci-e completion timeout | ||
| 54 | * @hw: pointer to the HW structure | ||
| 55 | * | ||
| 56 | * The defaults for 82598 should be in the range of 50us to 50ms, | ||
| 57 | * however the hardware default for these parts is 500us to 1ms which is less | ||
| 58 | * than the 10ms recommended by the pci-e spec. To address this we need to | ||
| 59 | * increase the value to either 10ms to 250ms for capability version 1 config, | ||
| 60 | * or 16ms to 55ms for version 2. | ||
| 61 | **/ | ||
| 62 | void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw) | ||
| 63 | { | ||
| 64 | struct ixgbe_adapter *adapter = hw->back; | ||
| 65 | u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR); | ||
| 66 | u16 pcie_devctl2; | ||
| 67 | |||
| 68 | /* only take action if timeout value is defaulted to 0 */ | ||
| 69 | if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK) | ||
| 70 | goto out; | ||
| 71 | |||
| 72 | /* | ||
| 73 | * if capababilities version is type 1 we can write the | ||
| 74 | * timeout of 10ms to 250ms through the GCR register | ||
| 75 | */ | ||
| 76 | if (!(gcr & IXGBE_GCR_CAP_VER2)) { | ||
| 77 | gcr |= IXGBE_GCR_CMPL_TMOUT_10ms; | ||
| 78 | goto out; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* | ||
| 82 | * for version 2 capabilities we need to write the config space | ||
| 83 | * directly in order to set the completion timeout value for | ||
| 84 | * 16ms to 55ms | ||
| 85 | */ | ||
| 86 | pci_read_config_word(adapter->pdev, | ||
| 87 | IXGBE_PCI_DEVICE_CONTROL2, &pcie_devctl2); | ||
| 88 | pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms; | ||
| 89 | pci_write_config_word(adapter->pdev, | ||
| 90 | IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2); | ||
| 91 | out: | ||
| 92 | /* disable completion timeout resend */ | ||
| 93 | gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND; | ||
| 94 | IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr); | ||
| 95 | } | ||
| 96 | |||
| 97 | /** | ||
| 53 | * ixgbe_get_pcie_msix_count_82598 - Gets MSI-X vector count | 98 | * ixgbe_get_pcie_msix_count_82598 - Gets MSI-X vector count |
| 54 | * @hw: pointer to hardware structure | 99 | * @hw: pointer to hardware structure |
| 55 | * | 100 | * |
| @@ -153,6 +198,26 @@ out: | |||
| 153 | } | 198 | } |
| 154 | 199 | ||
| 155 | /** | 200 | /** |
| 201 | * ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx | ||
| 202 | * @hw: pointer to hardware structure | ||
| 203 | * | ||
| 204 | * Starts the hardware using the generic start_hw function. | ||
| 205 | * Then set pcie completion timeout | ||
| 206 | **/ | ||
| 207 | s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) | ||
| 208 | { | ||
| 209 | s32 ret_val = 0; | ||
| 210 | |||
| 211 | ret_val = ixgbe_start_hw_generic(hw); | ||
| 212 | |||
| 213 | /* set the completion timeout for interface */ | ||
| 214 | if (ret_val == 0) | ||
| 215 | ixgbe_set_pcie_completion_timeout(hw); | ||
| 216 | |||
| 217 | return ret_val; | ||
| 218 | } | ||
| 219 | |||
| 220 | /** | ||
| 156 | * ixgbe_get_link_capabilities_82598 - Determines link capabilities | 221 | * ixgbe_get_link_capabilities_82598 - Determines link capabilities |
| 157 | * @hw: pointer to hardware structure | 222 | * @hw: pointer to hardware structure |
| 158 | * @speed: pointer to link speed | 223 | * @speed: pointer to link speed |
| @@ -1085,7 +1150,7 @@ out: | |||
| 1085 | static struct ixgbe_mac_operations mac_ops_82598 = { | 1150 | static struct ixgbe_mac_operations mac_ops_82598 = { |
| 1086 | .init_hw = &ixgbe_init_hw_generic, | 1151 | .init_hw = &ixgbe_init_hw_generic, |
| 1087 | .reset_hw = &ixgbe_reset_hw_82598, | 1152 | .reset_hw = &ixgbe_reset_hw_82598, |
| 1088 | .start_hw = &ixgbe_start_hw_generic, | 1153 | .start_hw = &ixgbe_start_hw_82598, |
| 1089 | .clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic, | 1154 | .clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic, |
| 1090 | .get_media_type = &ixgbe_get_media_type_82598, | 1155 | .get_media_type = &ixgbe_get_media_type_82598, |
| 1091 | .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82598, | 1156 | .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82598, |
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 2a978008fd6e..79144e950a34 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c | |||
| @@ -1975,7 +1975,10 @@ static int ixgbe_set_coalesce(struct net_device *netdev, | |||
| 1975 | * any other value means disable eitr, which is best | 1975 | * any other value means disable eitr, which is best |
| 1976 | * served by setting the interrupt rate very high | 1976 | * served by setting the interrupt rate very high |
| 1977 | */ | 1977 | */ |
| 1978 | adapter->eitr_param = IXGBE_MAX_INT_RATE; | 1978 | if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) |
| 1979 | adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE; | ||
| 1980 | else | ||
| 1981 | adapter->eitr_param = IXGBE_MAX_INT_RATE; | ||
| 1979 | adapter->itr_setting = 0; | 1982 | adapter->itr_setting = 0; |
| 1980 | } | 1983 | } |
| 1981 | 1984 | ||
| @@ -1999,13 +2002,13 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data) | |||
| 1999 | 2002 | ||
| 2000 | ethtool_op_set_flags(netdev, data); | 2003 | ethtool_op_set_flags(netdev, data); |
| 2001 | 2004 | ||
| 2002 | if (!(adapter->flags & IXGBE_FLAG2_RSC_CAPABLE)) | 2005 | if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)) |
| 2003 | return 0; | 2006 | return 0; |
| 2004 | 2007 | ||
| 2005 | /* if state changes we need to update adapter->flags and reset */ | 2008 | /* if state changes we need to update adapter->flags and reset */ |
| 2006 | if ((!!(data & ETH_FLAG_LRO)) != | 2009 | if ((!!(data & ETH_FLAG_LRO)) != |
| 2007 | (!!(adapter->flags & IXGBE_FLAG2_RSC_ENABLED))) { | 2010 | (!!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))) { |
| 2008 | adapter->flags ^= IXGBE_FLAG2_RSC_ENABLED; | 2011 | adapter->flags2 ^= IXGBE_FLAG2_RSC_ENABLED; |
| 2009 | if (netif_running(netdev)) | 2012 | if (netif_running(netdev)) |
| 2010 | ixgbe_reinit_locked(adapter); | 2013 | ixgbe_reinit_locked(adapter); |
| 2011 | else | 2014 | else |
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 200454f30f6a..110c65ab5cb5 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c | |||
| @@ -780,7 +780,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, | |||
| 780 | prefetch(next_rxd); | 780 | prefetch(next_rxd); |
| 781 | cleaned_count++; | 781 | cleaned_count++; |
| 782 | 782 | ||
| 783 | if (adapter->flags & IXGBE_FLAG2_RSC_CAPABLE) | 783 | if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) |
| 784 | rsc_count = ixgbe_get_rsc_count(rx_desc); | 784 | rsc_count = ixgbe_get_rsc_count(rx_desc); |
| 785 | 785 | ||
| 786 | if (rsc_count) { | 786 | if (rsc_count) { |
| @@ -2036,7 +2036,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) | |||
| 2036 | IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); | 2036 | IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); |
| 2037 | } | 2037 | } |
| 2038 | } else { | 2038 | } else { |
| 2039 | if (!(adapter->flags & IXGBE_FLAG2_RSC_ENABLED) && | 2039 | if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && |
| 2040 | (netdev->mtu <= ETH_DATA_LEN)) | 2040 | (netdev->mtu <= ETH_DATA_LEN)) |
| 2041 | rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; | 2041 | rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; |
| 2042 | else | 2042 | else |
| @@ -2165,7 +2165,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) | |||
| 2165 | IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); | 2165 | IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); |
| 2166 | } | 2166 | } |
| 2167 | 2167 | ||
| 2168 | if (adapter->flags & IXGBE_FLAG2_RSC_ENABLED) { | 2168 | if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { |
| 2169 | /* Enable 82599 HW-RSC */ | 2169 | /* Enable 82599 HW-RSC */ |
| 2170 | for (i = 0; i < adapter->num_rx_queues; i++) { | 2170 | for (i = 0; i < adapter->num_rx_queues; i++) { |
| 2171 | j = adapter->rx_ring[i].reg_idx; | 2171 | j = adapter->rx_ring[i].reg_idx; |
| @@ -3812,8 +3812,8 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) | |||
| 3812 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82598; | 3812 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82598; |
| 3813 | } else if (hw->mac.type == ixgbe_mac_82599EB) { | 3813 | } else if (hw->mac.type == ixgbe_mac_82599EB) { |
| 3814 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82599; | 3814 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82599; |
| 3815 | adapter->flags |= IXGBE_FLAG2_RSC_CAPABLE; | 3815 | adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; |
| 3816 | adapter->flags |= IXGBE_FLAG2_RSC_ENABLED; | 3816 | adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; |
| 3817 | adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; | 3817 | adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; |
| 3818 | adapter->ring_feature[RING_F_FDIR].indices = | 3818 | adapter->ring_feature[RING_F_FDIR].indices = |
| 3819 | IXGBE_MAX_FDIR_INDICES; | 3819 | IXGBE_MAX_FDIR_INDICES; |
| @@ -5360,12 +5360,19 @@ static int ixgbe_del_sanmac_netdev(struct net_device *dev) | |||
| 5360 | static void ixgbe_netpoll(struct net_device *netdev) | 5360 | static void ixgbe_netpoll(struct net_device *netdev) |
| 5361 | { | 5361 | { |
| 5362 | struct ixgbe_adapter *adapter = netdev_priv(netdev); | 5362 | struct ixgbe_adapter *adapter = netdev_priv(netdev); |
| 5363 | int i; | ||
| 5363 | 5364 | ||
| 5364 | disable_irq(adapter->pdev->irq); | ||
| 5365 | adapter->flags |= IXGBE_FLAG_IN_NETPOLL; | 5365 | adapter->flags |= IXGBE_FLAG_IN_NETPOLL; |
| 5366 | ixgbe_intr(adapter->pdev->irq, netdev); | 5366 | if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { |
| 5367 | int num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; | ||
| 5368 | for (i = 0; i < num_q_vectors; i++) { | ||
| 5369 | struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; | ||
| 5370 | ixgbe_msix_clean_many(0, q_vector); | ||
| 5371 | } | ||
| 5372 | } else { | ||
| 5373 | ixgbe_intr(adapter->pdev->irq, netdev); | ||
| 5374 | } | ||
| 5367 | adapter->flags &= ~IXGBE_FLAG_IN_NETPOLL; | 5375 | adapter->flags &= ~IXGBE_FLAG_IN_NETPOLL; |
| 5368 | enable_irq(adapter->pdev->irq); | ||
| 5369 | } | 5376 | } |
| 5370 | #endif | 5377 | #endif |
| 5371 | 5378 | ||
| @@ -5611,7 +5618,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, | |||
| 5611 | if (pci_using_dac) | 5618 | if (pci_using_dac) |
| 5612 | netdev->features |= NETIF_F_HIGHDMA; | 5619 | netdev->features |= NETIF_F_HIGHDMA; |
| 5613 | 5620 | ||
| 5614 | if (adapter->flags & IXGBE_FLAG2_RSC_ENABLED) | 5621 | if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) |
| 5615 | netdev->features |= NETIF_F_LRO; | 5622 | netdev->features |= NETIF_F_LRO; |
| 5616 | 5623 | ||
| 5617 | /* make sure the EEPROM is good */ | 5624 | /* make sure the EEPROM is good */ |
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index fa87309dc087..be90eb4575f6 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h | |||
| @@ -718,6 +718,12 @@ | |||
| 718 | #define IXGBE_ECC_STATUS_82599 0x110E0 | 718 | #define IXGBE_ECC_STATUS_82599 0x110E0 |
| 719 | #define IXGBE_BAR_CTRL_82599 0x110F4 | 719 | #define IXGBE_BAR_CTRL_82599 0x110F4 |
| 720 | 720 | ||
| 721 | /* PCI Express Control */ | ||
| 722 | #define IXGBE_GCR_CMPL_TMOUT_MASK 0x0000F000 | ||
| 723 | #define IXGBE_GCR_CMPL_TMOUT_10ms 0x00001000 | ||
| 724 | #define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000 | ||
| 725 | #define IXGBE_GCR_CAP_VER2 0x00040000 | ||
| 726 | |||
| 721 | /* Time Sync Registers */ | 727 | /* Time Sync Registers */ |
| 722 | #define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ | 728 | #define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ |
| 723 | #define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ | 729 | #define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ |
| @@ -1521,6 +1527,7 @@ | |||
| 1521 | 1527 | ||
| 1522 | /* PCI Bus Info */ | 1528 | /* PCI Bus Info */ |
| 1523 | #define IXGBE_PCI_LINK_STATUS 0xB2 | 1529 | #define IXGBE_PCI_LINK_STATUS 0xB2 |
| 1530 | #define IXGBE_PCI_DEVICE_CONTROL2 0xC8 | ||
| 1524 | #define IXGBE_PCI_LINK_WIDTH 0x3F0 | 1531 | #define IXGBE_PCI_LINK_WIDTH 0x3F0 |
| 1525 | #define IXGBE_PCI_LINK_WIDTH_1 0x10 | 1532 | #define IXGBE_PCI_LINK_WIDTH_1 0x10 |
| 1526 | #define IXGBE_PCI_LINK_WIDTH_2 0x20 | 1533 | #define IXGBE_PCI_LINK_WIDTH_2 0x20 |
| @@ -1531,6 +1538,7 @@ | |||
| 1531 | #define IXGBE_PCI_LINK_SPEED_5000 0x2 | 1538 | #define IXGBE_PCI_LINK_SPEED_5000 0x2 |
| 1532 | #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E | 1539 | #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E |
| 1533 | #define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 | 1540 | #define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 |
| 1541 | #define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 | ||
| 1534 | 1542 | ||
| 1535 | /* Number of 100 microseconds we wait for PCI Express master disable */ | 1543 | /* Number of 100 microseconds we wait for PCI Express master disable */ |
| 1536 | #define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 | 1544 | #define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 |
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 08c43f2ae72b..5a88b3f57693 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c | |||
| @@ -249,6 +249,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
| 249 | pci_unmap_page(mdev->pdev, | 249 | pci_unmap_page(mdev->pdev, |
| 250 | (dma_addr_t) be64_to_cpu(data->addr), | 250 | (dma_addr_t) be64_to_cpu(data->addr), |
| 251 | frag->size, PCI_DMA_TODEVICE); | 251 | frag->size, PCI_DMA_TODEVICE); |
| 252 | ++data; | ||
| 252 | } | 253 | } |
| 253 | } | 254 | } |
| 254 | /* Stamp the freed descriptor */ | 255 | /* Stamp the freed descriptor */ |
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 637ac8b89bac..3cd8cfcf627b 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c | |||
| @@ -221,7 +221,7 @@ netxen_napi_disable(struct netxen_adapter *adapter) | |||
| 221 | } | 221 | } |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | static int nx_set_dma_mask(struct netxen_adapter *adapter, uint8_t revision_id) | 224 | static int nx_set_dma_mask(struct netxen_adapter *adapter) |
| 225 | { | 225 | { |
| 226 | struct pci_dev *pdev = adapter->pdev; | 226 | struct pci_dev *pdev = adapter->pdev; |
| 227 | uint64_t mask, cmask; | 227 | uint64_t mask, cmask; |
| @@ -229,19 +229,17 @@ static int nx_set_dma_mask(struct netxen_adapter *adapter, uint8_t revision_id) | |||
| 229 | adapter->pci_using_dac = 0; | 229 | adapter->pci_using_dac = 0; |
| 230 | 230 | ||
| 231 | mask = DMA_BIT_MASK(32); | 231 | mask = DMA_BIT_MASK(32); |
| 232 | /* | ||
| 233 | * Consistent DMA mask is set to 32 bit because it cannot be set to | ||
| 234 | * 35 bits. For P3 also leave it at 32 bits for now. Only the rings | ||
| 235 | * come off this pool. | ||
| 236 | */ | ||
| 237 | cmask = DMA_BIT_MASK(32); | 232 | cmask = DMA_BIT_MASK(32); |
| 238 | 233 | ||
| 234 | if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { | ||
| 239 | #ifndef CONFIG_IA64 | 235 | #ifndef CONFIG_IA64 |
| 240 | if (revision_id >= NX_P3_B0) | ||
| 241 | mask = DMA_BIT_MASK(39); | ||
| 242 | else if (revision_id == NX_P2_C1) | ||
| 243 | mask = DMA_BIT_MASK(35); | 236 | mask = DMA_BIT_MASK(35); |
| 244 | #endif | 237 | #endif |
| 238 | } else { | ||
| 239 | mask = DMA_BIT_MASK(39); | ||
| 240 | cmask = mask; | ||
| 241 | } | ||
| 242 | |||
| 245 | if (pci_set_dma_mask(pdev, mask) == 0 && | 243 | if (pci_set_dma_mask(pdev, mask) == 0 && |
| 246 | pci_set_consistent_dma_mask(pdev, cmask) == 0) { | 244 | pci_set_consistent_dma_mask(pdev, cmask) == 0) { |
| 247 | adapter->pci_using_dac = 1; | 245 | adapter->pci_using_dac = 1; |
| @@ -256,7 +254,7 @@ static int | |||
| 256 | nx_update_dma_mask(struct netxen_adapter *adapter) | 254 | nx_update_dma_mask(struct netxen_adapter *adapter) |
| 257 | { | 255 | { |
| 258 | int change, shift, err; | 256 | int change, shift, err; |
| 259 | uint64_t mask, old_mask; | 257 | uint64_t mask, old_mask, old_cmask; |
| 260 | struct pci_dev *pdev = adapter->pdev; | 258 | struct pci_dev *pdev = adapter->pdev; |
| 261 | 259 | ||
| 262 | change = 0; | 260 | change = 0; |
| @@ -272,14 +270,29 @@ nx_update_dma_mask(struct netxen_adapter *adapter) | |||
| 272 | 270 | ||
| 273 | if (change) { | 271 | if (change) { |
| 274 | old_mask = pdev->dma_mask; | 272 | old_mask = pdev->dma_mask; |
| 273 | old_cmask = pdev->dev.coherent_dma_mask; | ||
| 274 | |||
| 275 | mask = (1ULL<<(32+shift)) - 1; | 275 | mask = (1ULL<<(32+shift)) - 1; |
| 276 | 276 | ||
| 277 | err = pci_set_dma_mask(pdev, mask); | 277 | err = pci_set_dma_mask(pdev, mask); |
| 278 | if (err) | 278 | if (err) |
| 279 | return pci_set_dma_mask(pdev, old_mask); | 279 | goto err_out; |
| 280 | |||
| 281 | if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) { | ||
| 282 | |||
| 283 | err = pci_set_consistent_dma_mask(pdev, mask); | ||
| 284 | if (err) | ||
| 285 | goto err_out; | ||
| 286 | } | ||
| 287 | dev_info(&pdev->dev, "using %d-bit dma mask\n", 32+shift); | ||
| 280 | } | 288 | } |
| 281 | 289 | ||
| 282 | return 0; | 290 | return 0; |
| 291 | |||
| 292 | err_out: | ||
| 293 | pci_set_dma_mask(pdev, old_mask); | ||
| 294 | pci_set_consistent_dma_mask(pdev, old_cmask); | ||
| 295 | return err; | ||
| 283 | } | 296 | } |
| 284 | 297 | ||
| 285 | static void netxen_check_options(struct netxen_adapter *adapter) | 298 | static void netxen_check_options(struct netxen_adapter *adapter) |
| @@ -1006,7 +1019,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
| 1006 | revision_id = pdev->revision; | 1019 | revision_id = pdev->revision; |
| 1007 | adapter->ahw.revision_id = revision_id; | 1020 | adapter->ahw.revision_id = revision_id; |
| 1008 | 1021 | ||
| 1009 | err = nx_set_dma_mask(adapter, revision_id); | 1022 | err = nx_set_dma_mask(adapter); |
| 1010 | if (err) | 1023 | if (err) |
| 1011 | goto err_out_free_netdev; | 1024 | goto err_out_free_netdev; |
| 1012 | 1025 | ||
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 28368157dac4..a646a445fda9 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c | |||
| @@ -1611,8 +1611,11 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
| 1611 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 | 1611 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 |
| 1612 | && pcnet32_dwio_check(ioaddr)) { | 1612 | && pcnet32_dwio_check(ioaddr)) { |
| 1613 | a = &pcnet32_dwio; | 1613 | a = &pcnet32_dwio; |
| 1614 | } else | 1614 | } else { |
| 1615 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1616 | printk(KERN_ERR PFX "No access methods\n"); | ||
| 1615 | goto err_release_region; | 1617 | goto err_release_region; |
| 1618 | } | ||
| 1616 | } | 1619 | } |
| 1617 | 1620 | ||
| 1618 | chip_version = | 1621 | chip_version = |
| @@ -1719,7 +1722,9 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
| 1719 | ret = -ENOMEM; | 1722 | ret = -ENOMEM; |
| 1720 | goto err_release_region; | 1723 | goto err_release_region; |
| 1721 | } | 1724 | } |
| 1722 | SET_NETDEV_DEV(dev, &pdev->dev); | 1725 | |
| 1726 | if (pdev) | ||
| 1727 | SET_NETDEV_DEV(dev, &pdev->dev); | ||
| 1723 | 1728 | ||
| 1724 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1729 | if (pcnet32_debug & NETIF_MSG_PROBE) |
| 1725 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); | 1730 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); |
| @@ -1818,7 +1823,6 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
| 1818 | 1823 | ||
| 1819 | spin_lock_init(&lp->lock); | 1824 | spin_lock_init(&lp->lock); |
| 1820 | 1825 | ||
| 1821 | SET_NETDEV_DEV(dev, &pdev->dev); | ||
| 1822 | lp->name = chipname; | 1826 | lp->name = chipname; |
| 1823 | lp->shared_irq = shared; | 1827 | lp->shared_irq = shared; |
| 1824 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ | 1828 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ |
| @@ -1852,12 +1856,6 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
| 1852 | ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) | 1856 | ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) |
| 1853 | lp->options |= PCNET32_PORT_FD; | 1857 | lp->options |= PCNET32_PORT_FD; |
| 1854 | 1858 | ||
| 1855 | if (!a) { | ||
| 1856 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
| 1857 | printk(KERN_ERR PFX "No access methods\n"); | ||
| 1858 | ret = -ENODEV; | ||
| 1859 | goto err_free_consistent; | ||
| 1860 | } | ||
| 1861 | lp->a = *a; | 1859 | lp->a = *a; |
| 1862 | 1860 | ||
| 1863 | /* prior to register_netdev, dev->name is not yet correct */ | 1861 | /* prior to register_netdev, dev->name is not yet correct */ |
| @@ -1973,14 +1971,13 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
| 1973 | 1971 | ||
| 1974 | return 0; | 1972 | return 0; |
| 1975 | 1973 | ||
| 1976 | err_free_ring: | 1974 | err_free_ring: |
| 1977 | pcnet32_free_ring(dev); | 1975 | pcnet32_free_ring(dev); |
| 1978 | err_free_consistent: | ||
| 1979 | pci_free_consistent(lp->pci_dev, sizeof(*lp->init_block), | 1976 | pci_free_consistent(lp->pci_dev, sizeof(*lp->init_block), |
| 1980 | lp->init_block, lp->init_dma_addr); | 1977 | lp->init_block, lp->init_dma_addr); |
| 1981 | err_free_netdev: | 1978 | err_free_netdev: |
| 1982 | free_netdev(dev); | 1979 | free_netdev(dev); |
| 1983 | err_release_region: | 1980 | err_release_region: |
| 1984 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | 1981 | release_region(ioaddr, PCNET32_TOTAL_SIZE); |
| 1985 | return ret; | 1982 | return ret; |
| 1986 | } | 1983 | } |
| @@ -2089,6 +2086,7 @@ static void pcnet32_free_ring(struct net_device *dev) | |||
| 2089 | static int pcnet32_open(struct net_device *dev) | 2086 | static int pcnet32_open(struct net_device *dev) |
| 2090 | { | 2087 | { |
| 2091 | struct pcnet32_private *lp = netdev_priv(dev); | 2088 | struct pcnet32_private *lp = netdev_priv(dev); |
| 2089 | struct pci_dev *pdev = lp->pci_dev; | ||
| 2092 | unsigned long ioaddr = dev->base_addr; | 2090 | unsigned long ioaddr = dev->base_addr; |
| 2093 | u16 val; | 2091 | u16 val; |
| 2094 | int i; | 2092 | int i; |
| @@ -2149,9 +2147,9 @@ static int pcnet32_open(struct net_device *dev) | |||
| 2149 | lp->a.write_csr(ioaddr, 124, val); | 2147 | lp->a.write_csr(ioaddr, 124, val); |
| 2150 | 2148 | ||
| 2151 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ | 2149 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ |
| 2152 | if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT && | 2150 | if (pdev && pdev->subsystem_vendor == PCI_VENDOR_ID_AT && |
| 2153 | (lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || | 2151 | (pdev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || |
| 2154 | lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { | 2152 | pdev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { |
| 2155 | if (lp->options & PCNET32_PORT_ASEL) { | 2153 | if (lp->options & PCNET32_PORT_ASEL) { |
| 2156 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; | 2154 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; |
| 2157 | if (netif_msg_link(lp)) | 2155 | if (netif_msg_link(lp)) |
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 639d11bc444e..cd37d739ac74 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c | |||
| @@ -1384,7 +1384,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
| 1384 | 1384 | ||
| 1385 | /* create a fragment for each channel */ | 1385 | /* create a fragment for each channel */ |
| 1386 | bits = B; | 1386 | bits = B; |
| 1387 | while (nfree > 0 && len > 0) { | 1387 | while (len > 0) { |
| 1388 | list = list->next; | 1388 | list = list->next; |
| 1389 | if (list == &ppp->channels) { | 1389 | if (list == &ppp->channels) { |
| 1390 | i = 0; | 1390 | i = 0; |
| @@ -1431,29 +1431,31 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
| 1431 | *otherwise divide it according to the speed | 1431 | *otherwise divide it according to the speed |
| 1432 | *of the channel we are going to transmit on | 1432 | *of the channel we are going to transmit on |
| 1433 | */ | 1433 | */ |
| 1434 | if (pch->speed == 0) { | 1434 | if (nfree > 0) { |
| 1435 | flen = totlen/nfree ; | 1435 | if (pch->speed == 0) { |
| 1436 | if (nbigger > 0) { | 1436 | flen = totlen/nfree ; |
| 1437 | flen++; | 1437 | if (nbigger > 0) { |
| 1438 | nbigger--; | 1438 | flen++; |
| 1439 | } | 1439 | nbigger--; |
| 1440 | } else { | 1440 | } |
| 1441 | flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) / | 1441 | } else { |
| 1442 | ((totspeed*totfree)/pch->speed)) - hdrlen; | 1442 | flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) / |
| 1443 | if (nbigger > 0) { | 1443 | ((totspeed*totfree)/pch->speed)) - hdrlen; |
| 1444 | flen += ((totfree - nzero)*pch->speed)/totspeed; | 1444 | if (nbigger > 0) { |
| 1445 | nbigger -= ((totfree - nzero)*pch->speed)/ | 1445 | flen += ((totfree - nzero)*pch->speed)/totspeed; |
| 1446 | nbigger -= ((totfree - nzero)*pch->speed)/ | ||
| 1446 | totspeed; | 1447 | totspeed; |
| 1448 | } | ||
| 1447 | } | 1449 | } |
| 1450 | nfree--; | ||
| 1448 | } | 1451 | } |
| 1449 | nfree--; | ||
| 1450 | 1452 | ||
| 1451 | /* | 1453 | /* |
| 1452 | *check if we are on the last channel or | 1454 | *check if we are on the last channel or |
| 1453 | *we exceded the lenght of the data to | 1455 | *we exceded the lenght of the data to |
| 1454 | *fragment | 1456 | *fragment |
| 1455 | */ | 1457 | */ |
| 1456 | if ((nfree == 0) || (flen > len)) | 1458 | if ((nfree <= 0) || (flen > len)) |
| 1457 | flen = len; | 1459 | flen = len; |
| 1458 | /* | 1460 | /* |
| 1459 | *it is not worth to tx on slow channels: | 1461 | *it is not worth to tx on slow channels: |
| @@ -1467,7 +1469,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
| 1467 | continue; | 1469 | continue; |
| 1468 | } | 1470 | } |
| 1469 | 1471 | ||
| 1470 | mtu = pch->chan->mtu + 2 - hdrlen; | 1472 | mtu = pch->chan->mtu - hdrlen; |
| 1471 | if (mtu < 4) | 1473 | if (mtu < 4) |
| 1472 | mtu = 4; | 1474 | mtu = 4; |
| 1473 | if (flen > mtu) | 1475 | if (flen > mtu) |
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index f0031f1f97e5..5f2090233d7b 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c | |||
| @@ -1063,6 +1063,7 @@ static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 1063 | else { | 1063 | else { |
| 1064 | int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); | 1064 | int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); |
| 1065 | 1065 | ||
| 1066 | po = NULL; | ||
| 1066 | while (++hash < PPPOE_HASH_SIZE) { | 1067 | while (++hash < PPPOE_HASH_SIZE) { |
| 1067 | po = pn->hash_table[hash]; | 1068 | po = pn->hash_table[hash]; |
| 1068 | if (po) | 1069 | if (po) |
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index e7935d09c896..e0f9219a0aea 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c | |||
| @@ -2680,6 +2680,7 @@ out_unregister_pppol2tp_proto: | |||
| 2680 | static void __exit pppol2tp_exit(void) | 2680 | static void __exit pppol2tp_exit(void) |
| 2681 | { | 2681 | { |
| 2682 | unregister_pppox_proto(PX_PROTO_OL2TP); | 2682 | unregister_pppox_proto(PX_PROTO_OL2TP); |
| 2683 | unregister_pernet_gen_device(pppol2tp_net_id, &pppol2tp_net_ops); | ||
| 2683 | proto_unregister(&pppol2tp_sk_proto); | 2684 | proto_unregister(&pppol2tp_sk_proto); |
| 2684 | } | 2685 | } |
| 2685 | 2686 | ||
diff --git a/drivers/net/s6gmac.c b/drivers/net/s6gmac.c index 5345e47b35ac..4525cbe8dd69 100644 --- a/drivers/net/s6gmac.c +++ b/drivers/net/s6gmac.c | |||
| @@ -793,7 +793,7 @@ static inline int s6gmac_phy_start(struct net_device *dev) | |||
| 793 | struct s6gmac *pd = netdev_priv(dev); | 793 | struct s6gmac *pd = netdev_priv(dev); |
| 794 | int i = 0; | 794 | int i = 0; |
| 795 | struct phy_device *p = NULL; | 795 | struct phy_device *p = NULL; |
| 796 | while ((!(p = pd->mii.bus->phy_map[i])) && (i < PHY_MAX_ADDR)) | 796 | while ((i < PHY_MAX_ADDR) && (!(p = pd->mii.bus->phy_map[i]))) |
| 797 | i++; | 797 | i++; |
| 798 | p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0, | 798 | p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0, |
| 799 | PHY_INTERFACE_MODE_RGMII); | 799 | PHY_INTERFACE_MODE_RGMII); |
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 3550c5dcd93c..0a551d8f5d95 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c | |||
| @@ -1488,6 +1488,8 @@ static int sky2_up(struct net_device *dev) | |||
| 1488 | sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL); | 1488 | sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL); |
| 1489 | #endif | 1489 | #endif |
| 1490 | 1490 | ||
| 1491 | sky2->restarting = 0; | ||
| 1492 | |||
| 1491 | err = sky2_rx_start(sky2); | 1493 | err = sky2_rx_start(sky2); |
| 1492 | if (err) | 1494 | if (err) |
| 1493 | goto err_out; | 1495 | goto err_out; |
| @@ -1500,6 +1502,9 @@ static int sky2_up(struct net_device *dev) | |||
| 1500 | 1502 | ||
| 1501 | sky2_set_multicast(dev); | 1503 | sky2_set_multicast(dev); |
| 1502 | 1504 | ||
| 1505 | /* wake queue incase we are restarting */ | ||
| 1506 | netif_wake_queue(dev); | ||
| 1507 | |||
| 1503 | if (netif_msg_ifup(sky2)) | 1508 | if (netif_msg_ifup(sky2)) |
| 1504 | printk(KERN_INFO PFX "%s: enabling interface\n", dev->name); | 1509 | printk(KERN_INFO PFX "%s: enabling interface\n", dev->name); |
| 1505 | return 0; | 1510 | return 0; |
| @@ -1533,6 +1538,8 @@ static inline int tx_dist(unsigned tail, unsigned head) | |||
| 1533 | /* Number of list elements available for next tx */ | 1538 | /* Number of list elements available for next tx */ |
| 1534 | static inline int tx_avail(const struct sky2_port *sky2) | 1539 | static inline int tx_avail(const struct sky2_port *sky2) |
| 1535 | { | 1540 | { |
| 1541 | if (unlikely(sky2->restarting)) | ||
| 1542 | return 0; | ||
| 1536 | return sky2->tx_pending - tx_dist(sky2->tx_cons, sky2->tx_prod); | 1543 | return sky2->tx_pending - tx_dist(sky2->tx_cons, sky2->tx_prod); |
| 1537 | } | 1544 | } |
| 1538 | 1545 | ||
| @@ -1818,6 +1825,10 @@ static int sky2_down(struct net_device *dev) | |||
| 1818 | if (netif_msg_ifdown(sky2)) | 1825 | if (netif_msg_ifdown(sky2)) |
| 1819 | printk(KERN_INFO PFX "%s: disabling interface\n", dev->name); | 1826 | printk(KERN_INFO PFX "%s: disabling interface\n", dev->name); |
| 1820 | 1827 | ||
| 1828 | /* explicitly shut off tx incase we're restarting */ | ||
| 1829 | sky2->restarting = 1; | ||
| 1830 | netif_tx_disable(dev); | ||
| 1831 | |||
| 1821 | /* Force flow control off */ | 1832 | /* Force flow control off */ |
| 1822 | sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF); | 1833 | sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF); |
| 1823 | 1834 | ||
| @@ -2359,7 +2370,7 @@ static inline void sky2_tx_done(struct net_device *dev, u16 last) | |||
| 2359 | { | 2370 | { |
| 2360 | struct sky2_port *sky2 = netdev_priv(dev); | 2371 | struct sky2_port *sky2 = netdev_priv(dev); |
| 2361 | 2372 | ||
| 2362 | if (netif_running(dev)) { | 2373 | if (likely(netif_running(dev) && !sky2->restarting)) { |
| 2363 | netif_tx_lock(dev); | 2374 | netif_tx_lock(dev); |
| 2364 | sky2_tx_complete(sky2, last); | 2375 | sky2_tx_complete(sky2, last); |
| 2365 | netif_tx_unlock(dev); | 2376 | netif_tx_unlock(dev); |
| @@ -4283,6 +4294,7 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, | |||
| 4283 | spin_lock_init(&sky2->phy_lock); | 4294 | spin_lock_init(&sky2->phy_lock); |
| 4284 | sky2->tx_pending = TX_DEF_PENDING; | 4295 | sky2->tx_pending = TX_DEF_PENDING; |
| 4285 | sky2->rx_pending = RX_DEF_PENDING; | 4296 | sky2->rx_pending = RX_DEF_PENDING; |
| 4297 | sky2->restarting = 0; | ||
| 4286 | 4298 | ||
| 4287 | hw->dev[port] = dev; | 4299 | hw->dev[port] = dev; |
| 4288 | 4300 | ||
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index b5549c9e5107..4486b066b43f 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h | |||
| @@ -2051,6 +2051,7 @@ struct sky2_port { | |||
| 2051 | u8 duplex; /* DUPLEX_HALF, DUPLEX_FULL */ | 2051 | u8 duplex; /* DUPLEX_HALF, DUPLEX_FULL */ |
| 2052 | u8 rx_csum; | 2052 | u8 rx_csum; |
| 2053 | u8 wol; | 2053 | u8 wol; |
| 2054 | u8 restarting; | ||
| 2054 | enum flow_control flow_mode; | 2055 | enum flow_control flow_mode; |
| 2055 | enum flow_control flow_status; | 2056 | enum flow_control flow_status; |
| 2056 | 2057 | ||
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index eb72d2e9ab3d..acfdccd44567 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c | |||
| @@ -5059,7 +5059,7 @@ mii_get_phy(struct net_device *dev) | |||
| 5059 | if ((id == 0) || (id == 65535)) continue; /* Valid ID? */ | 5059 | if ((id == 0) || (id == 65535)) continue; /* Valid ID? */ |
| 5060 | for (j=0; j<limit; j++) { /* Search PHY table */ | 5060 | for (j=0; j<limit; j++) { /* Search PHY table */ |
| 5061 | if (id != phy_info[j].id) continue; /* ID match? */ | 5061 | if (id != phy_info[j].id) continue; /* ID match? */ |
| 5062 | for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++); | 5062 | for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); |
| 5063 | if (k < DE4X5_MAX_PHY) { | 5063 | if (k < DE4X5_MAX_PHY) { |
| 5064 | memcpy((char *)&lp->phy[k], | 5064 | memcpy((char *)&lp->phy[k], |
| 5065 | (char *)&phy_info[j], sizeof(struct phy_table)); | 5065 | (char *)&phy_info[j], sizeof(struct phy_table)); |
| @@ -5072,7 +5072,7 @@ mii_get_phy(struct net_device *dev) | |||
| 5072 | break; | 5072 | break; |
| 5073 | } | 5073 | } |
| 5074 | if ((j == limit) && (i < DE4X5_MAX_MII)) { | 5074 | if ((j == limit) && (i < DE4X5_MAX_MII)) { |
| 5075 | for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++); | 5075 | for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); |
| 5076 | lp->phy[k].addr = i; | 5076 | lp->phy[k].addr = i; |
| 5077 | lp->phy[k].id = id; | 5077 | lp->phy[k].id = id; |
| 5078 | lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ | 5078 | lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ |
| @@ -5091,7 +5091,7 @@ mii_get_phy(struct net_device *dev) | |||
| 5091 | purgatory: | 5091 | purgatory: |
| 5092 | lp->active = 0; | 5092 | lp->active = 0; |
| 5093 | if (lp->phy[0].id) { /* Reset the PHY devices */ | 5093 | if (lp->phy[0].id) { /* Reset the PHY devices */ |
| 5094 | for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++) { /*For each PHY*/ | 5094 | for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++) { /*For each PHY*/ |
| 5095 | mii_wr(MII_CR_RST, MII_CR, lp->phy[k].addr, DE4X5_MII); | 5095 | mii_wr(MII_CR_RST, MII_CR, lp->phy[k].addr, DE4X5_MII); |
| 5096 | while (mii_rd(MII_CR, lp->phy[k].addr, DE4X5_MII) & MII_CR_RST); | 5096 | while (mii_rd(MII_CR, lp->phy[k].addr, DE4X5_MII) & MII_CR_RST); |
| 5097 | 5097 | ||
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index c70604f0329e..8ce5e4cee168 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c | |||
| @@ -5918,20 +5918,19 @@ static int airo_set_essid(struct net_device *dev, | |||
| 5918 | readSsidRid(local, &SSID_rid); | 5918 | readSsidRid(local, &SSID_rid); |
| 5919 | 5919 | ||
| 5920 | /* Check if we asked for `any' */ | 5920 | /* Check if we asked for `any' */ |
| 5921 | if(dwrq->flags == 0) { | 5921 | if (dwrq->flags == 0) { |
| 5922 | /* Just send an empty SSID list */ | 5922 | /* Just send an empty SSID list */ |
| 5923 | memset(&SSID_rid, 0, sizeof(SSID_rid)); | 5923 | memset(&SSID_rid, 0, sizeof(SSID_rid)); |
| 5924 | } else { | 5924 | } else { |
| 5925 | int index = (dwrq->flags & IW_ENCODE_INDEX) - 1; | 5925 | unsigned index = (dwrq->flags & IW_ENCODE_INDEX) - 1; |
| 5926 | 5926 | ||
| 5927 | /* Check the size of the string */ | 5927 | /* Check the size of the string */ |
| 5928 | if(dwrq->length > IW_ESSID_MAX_SIZE) { | 5928 | if (dwrq->length > IW_ESSID_MAX_SIZE) |
| 5929 | return -E2BIG ; | 5929 | return -E2BIG ; |
| 5930 | } | 5930 | |
| 5931 | /* Check if index is valid */ | 5931 | /* Check if index is valid */ |
| 5932 | if((index < 0) || (index >= 4)) { | 5932 | if (index >= ARRAY_SIZE(SSID_rid.ssids)) |
| 5933 | return -EINVAL; | 5933 | return -EINVAL; |
| 5934 | } | ||
| 5935 | 5934 | ||
| 5936 | /* Set the SSID */ | 5935 | /* Set the SSID */ |
| 5937 | memset(SSID_rid.ssids[index].ssid, 0, | 5936 | memset(SSID_rid.ssids[index].ssid, 0, |
| @@ -6819,7 +6818,7 @@ static int airo_set_txpow(struct net_device *dev, | |||
| 6819 | return -EINVAL; | 6818 | return -EINVAL; |
| 6820 | } | 6819 | } |
| 6821 | clear_bit (FLAG_RADIO_OFF, &local->flags); | 6820 | clear_bit (FLAG_RADIO_OFF, &local->flags); |
| 6822 | for (i = 0; cap_rid.txPowerLevels[i] && (i < 8); i++) | 6821 | for (i = 0; i < 8 && cap_rid.txPowerLevels[i]; i++) |
| 6823 | if (v == cap_rid.txPowerLevels[i]) { | 6822 | if (v == cap_rid.txPowerLevels[i]) { |
| 6824 | readConfigRid(local, 1); | 6823 | readConfigRid(local, 1); |
| 6825 | local->config.txPower = v; | 6824 | local->config.txPower = v; |
diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c index a2fda702b620..ce0e86c36a82 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.c +++ b/drivers/net/wireless/ath/ath9k/eeprom.c | |||
| @@ -460,7 +460,7 @@ static int ath9k_hw_4k_check_eeprom(struct ath_hw *ah) | |||
| 460 | integer = swab32(eep->modalHeader.antCtrlCommon); | 460 | integer = swab32(eep->modalHeader.antCtrlCommon); |
| 461 | eep->modalHeader.antCtrlCommon = integer; | 461 | eep->modalHeader.antCtrlCommon = integer; |
| 462 | 462 | ||
| 463 | for (i = 0; i < AR5416_MAX_CHAINS; i++) { | 463 | for (i = 0; i < AR5416_EEP4K_MAX_CHAINS; i++) { |
| 464 | integer = swab32(eep->modalHeader.antCtrlChain[i]); | 464 | integer = swab32(eep->modalHeader.antCtrlChain[i]); |
| 465 | eep->modalHeader.antCtrlChain[i] = integer; | 465 | eep->modalHeader.antCtrlChain[i] = integer; |
| 466 | } | 466 | } |
| @@ -914,7 +914,7 @@ static void ath9k_hw_set_4k_power_per_rate_table(struct ath_hw *ah, | |||
| 914 | ctlMode, numCtlModes, isHt40CtlMode, | 914 | ctlMode, numCtlModes, isHt40CtlMode, |
| 915 | (pCtlMode[ctlMode] & EXT_ADDITIVE)); | 915 | (pCtlMode[ctlMode] & EXT_ADDITIVE)); |
| 916 | 916 | ||
| 917 | for (i = 0; (i < AR5416_NUM_CTLS) && | 917 | for (i = 0; (i < AR5416_EEP4K_NUM_CTLS) && |
| 918 | pEepData->ctlIndex[i]; i++) { | 918 | pEepData->ctlIndex[i]; i++) { |
| 919 | DPRINTF(ah->ah_sc, ATH_DBG_EEPROM, | 919 | DPRINTF(ah->ah_sc, ATH_DBG_EEPROM, |
| 920 | " LOOP-Ctlidx %d: cfgCtl 0x%2.2x " | 920 | " LOOP-Ctlidx %d: cfgCtl 0x%2.2x " |
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h index fbb3a573463e..2de6471d4be9 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945.h | |||
| @@ -112,7 +112,7 @@ enum iwl3945_antenna { | |||
| 112 | #define IWL_TX_FIFO_NONE 7 | 112 | #define IWL_TX_FIFO_NONE 7 |
| 113 | 113 | ||
| 114 | /* Minimum number of queues. MAX_NUM is defined in hw specific files */ | 114 | /* Minimum number of queues. MAX_NUM is defined in hw specific files */ |
| 115 | #define IWL_MIN_NUM_QUEUES 4 | 115 | #define IWL39_MIN_NUM_QUEUES 4 |
| 116 | 116 | ||
| 117 | #define IEEE80211_DATA_LEN 2304 | 117 | #define IEEE80211_DATA_LEN 2304 |
| 118 | #define IEEE80211_4ADDR_LEN 30 | 118 | #define IEEE80211_4ADDR_LEN 30 |
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 6ab07165ea28..18b135f510e5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c | |||
| @@ -1332,6 +1332,9 @@ int iwl_setup_mac(struct iwl_priv *priv) | |||
| 1332 | 1332 | ||
| 1333 | hw->wiphy->custom_regulatory = true; | 1333 | hw->wiphy->custom_regulatory = true; |
| 1334 | 1334 | ||
| 1335 | /* Firmware does not support this */ | ||
| 1336 | hw->wiphy->disable_beacon_hints = true; | ||
| 1337 | |||
| 1335 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; | 1338 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; |
| 1336 | /* we create the 802.11 header and a zero-length SSID element */ | 1339 | /* we create the 802.11 header and a zero-length SSID element */ |
| 1337 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; | 1340 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; |
diff --git a/drivers/net/wireless/iwlwifi/iwl-debugfs.c b/drivers/net/wireless/iwlwifi/iwl-debugfs.c index 11e08c068917..ca00cc8ad4c7 100644 --- a/drivers/net/wireless/iwlwifi/iwl-debugfs.c +++ b/drivers/net/wireless/iwlwifi/iwl-debugfs.c | |||
| @@ -308,18 +308,18 @@ static ssize_t iwl_dbgfs_nvm_read(struct file *file, | |||
| 308 | return -ENODATA; | 308 | return -ENODATA; |
| 309 | } | 309 | } |
| 310 | 310 | ||
| 311 | ptr = priv->eeprom; | ||
| 312 | if (!ptr) { | ||
| 313 | IWL_ERR(priv, "Invalid EEPROM/OTP memory\n"); | ||
| 314 | return -ENOMEM; | ||
| 315 | } | ||
| 316 | |||
| 311 | /* 4 characters for byte 0xYY */ | 317 | /* 4 characters for byte 0xYY */ |
| 312 | buf = kzalloc(buf_size, GFP_KERNEL); | 318 | buf = kzalloc(buf_size, GFP_KERNEL); |
| 313 | if (!buf) { | 319 | if (!buf) { |
| 314 | IWL_ERR(priv, "Can not allocate Buffer\n"); | 320 | IWL_ERR(priv, "Can not allocate Buffer\n"); |
| 315 | return -ENOMEM; | 321 | return -ENOMEM; |
| 316 | } | 322 | } |
| 317 | |||
| 318 | ptr = priv->eeprom; | ||
| 319 | if (!ptr) { | ||
| 320 | IWL_ERR(priv, "Invalid EEPROM/OTP memory\n"); | ||
| 321 | return -ENOMEM; | ||
| 322 | } | ||
| 323 | pos += scnprintf(buf + pos, buf_size - pos, "NVM Type: %s\n", | 323 | pos += scnprintf(buf + pos, buf_size - pos, "NVM Type: %s\n", |
| 324 | (priv->nvm_device_type == NVM_DEVICE_TYPE_OTP) | 324 | (priv->nvm_device_type == NVM_DEVICE_TYPE_OTP) |
| 325 | ? "OTP" : "EEPROM"); | 325 | ? "OTP" : "EEPROM"); |
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index e2d620f0b6e8..650e20af20fa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h | |||
| @@ -258,8 +258,10 @@ struct iwl_channel_info { | |||
| 258 | #define IWL_TX_FIFO_HCCA_2 6 | 258 | #define IWL_TX_FIFO_HCCA_2 6 |
| 259 | #define IWL_TX_FIFO_NONE 7 | 259 | #define IWL_TX_FIFO_NONE 7 |
| 260 | 260 | ||
| 261 | /* Minimum number of queues. MAX_NUM is defined in hw specific files */ | 261 | /* Minimum number of queues. MAX_NUM is defined in hw specific files. |
| 262 | #define IWL_MIN_NUM_QUEUES 4 | 262 | * Set the minimum to accommodate the 4 standard TX queues, 1 command |
| 263 | * queue, 2 (unused) HCCA queues, and 4 HT queues (one for each AC) */ | ||
| 264 | #define IWL_MIN_NUM_QUEUES 10 | ||
| 263 | 265 | ||
| 264 | /* Power management (not Tx power) structures */ | 266 | /* Power management (not Tx power) structures */ |
| 265 | 267 | ||
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 2addf735b193..ffd5c61a7553 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c | |||
| @@ -566,6 +566,8 @@ int iwl_remove_default_wep_key(struct iwl_priv *priv, | |||
| 566 | unsigned long flags; | 566 | unsigned long flags; |
| 567 | 567 | ||
| 568 | spin_lock_irqsave(&priv->sta_lock, flags); | 568 | spin_lock_irqsave(&priv->sta_lock, flags); |
| 569 | IWL_DEBUG_WEP(priv, "Removing default WEP key: idx=%d\n", | ||
| 570 | keyconf->keyidx); | ||
| 569 | 571 | ||
| 570 | if (!test_and_clear_bit(keyconf->keyidx, &priv->ucode_key_table)) | 572 | if (!test_and_clear_bit(keyconf->keyidx, &priv->ucode_key_table)) |
| 571 | IWL_ERR(priv, "index %d not used in uCode key table.\n", | 573 | IWL_ERR(priv, "index %d not used in uCode key table.\n", |
| @@ -573,6 +575,11 @@ int iwl_remove_default_wep_key(struct iwl_priv *priv, | |||
| 573 | 575 | ||
| 574 | priv->default_wep_key--; | 576 | priv->default_wep_key--; |
| 575 | memset(&priv->wep_keys[keyconf->keyidx], 0, sizeof(priv->wep_keys[0])); | 577 | memset(&priv->wep_keys[keyconf->keyidx], 0, sizeof(priv->wep_keys[0])); |
| 578 | if (iwl_is_rfkill(priv)) { | ||
| 579 | IWL_DEBUG_WEP(priv, "Not sending REPLY_WEPKEY command due to RFKILL.\n"); | ||
| 580 | spin_unlock_irqrestore(&priv->sta_lock, flags); | ||
| 581 | return 0; | ||
| 582 | } | ||
| 576 | ret = iwl_send_static_wepkey_cmd(priv, 1); | 583 | ret = iwl_send_static_wepkey_cmd(priv, 1); |
| 577 | IWL_DEBUG_WEP(priv, "Remove default WEP key: idx=%d ret=%d\n", | 584 | IWL_DEBUG_WEP(priv, "Remove default WEP key: idx=%d ret=%d\n", |
| 578 | keyconf->keyidx, ret); | 585 | keyconf->keyidx, ret); |
| @@ -853,6 +860,11 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv, | |||
| 853 | priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; | 860 | priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; |
| 854 | priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; | 861 | priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; |
| 855 | 862 | ||
| 863 | if (iwl_is_rfkill(priv)) { | ||
| 864 | IWL_DEBUG_WEP(priv, "Not sending REPLY_ADD_STA command because RFKILL enabled. \n"); | ||
| 865 | spin_unlock_irqrestore(&priv->sta_lock, flags); | ||
| 866 | return 0; | ||
| 867 | } | ||
| 856 | ret = iwl_send_add_sta(priv, &priv->stations[sta_id].sta, CMD_ASYNC); | 868 | ret = iwl_send_add_sta(priv, &priv->stations[sta_id].sta, CMD_ASYNC); |
| 857 | spin_unlock_irqrestore(&priv->sta_lock, flags); | 869 | spin_unlock_irqrestore(&priv->sta_lock, flags); |
| 858 | return ret; | 870 | return ret; |
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 9bbeec9427f0..2e89040e63be 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c | |||
| @@ -720,8 +720,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
| 720 | goto drop_unlock; | 720 | goto drop_unlock; |
| 721 | } | 721 | } |
| 722 | 722 | ||
| 723 | spin_unlock_irqrestore(&priv->lock, flags); | ||
| 724 | |||
| 725 | hdr_len = ieee80211_hdrlen(fc); | 723 | hdr_len = ieee80211_hdrlen(fc); |
| 726 | 724 | ||
| 727 | /* Find (or create) index into station table for destination station */ | 725 | /* Find (or create) index into station table for destination station */ |
| @@ -729,7 +727,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
| 729 | if (sta_id == IWL_INVALID_STATION) { | 727 | if (sta_id == IWL_INVALID_STATION) { |
| 730 | IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n", | 728 | IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n", |
| 731 | hdr->addr1); | 729 | hdr->addr1); |
| 732 | goto drop; | 730 | goto drop_unlock; |
| 733 | } | 731 | } |
| 734 | 732 | ||
| 735 | IWL_DEBUG_TX(priv, "station Id %d\n", sta_id); | 733 | IWL_DEBUG_TX(priv, "station Id %d\n", sta_id); |
| @@ -750,14 +748,17 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
| 750 | txq_id = priv->stations[sta_id].tid[tid].agg.txq_id; | 748 | txq_id = priv->stations[sta_id].tid[tid].agg.txq_id; |
| 751 | swq_id = iwl_virtual_agg_queue_num(swq_id, txq_id); | 749 | swq_id = iwl_virtual_agg_queue_num(swq_id, txq_id); |
| 752 | } | 750 | } |
| 753 | priv->stations[sta_id].tid[tid].tfds_in_queue++; | ||
| 754 | } | 751 | } |
| 755 | 752 | ||
| 756 | txq = &priv->txq[txq_id]; | 753 | txq = &priv->txq[txq_id]; |
| 757 | q = &txq->q; | 754 | q = &txq->q; |
| 758 | txq->swq_id = swq_id; | 755 | txq->swq_id = swq_id; |
| 759 | 756 | ||
| 760 | spin_lock_irqsave(&priv->lock, flags); | 757 | if (unlikely(iwl_queue_space(q) < q->high_mark)) |
| 758 | goto drop_unlock; | ||
| 759 | |||
| 760 | if (ieee80211_is_data_qos(fc)) | ||
| 761 | priv->stations[sta_id].tid[tid].tfds_in_queue++; | ||
| 761 | 762 | ||
| 762 | /* Set up driver data for this TFD */ | 763 | /* Set up driver data for this TFD */ |
| 763 | memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info)); | 764 | memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info)); |
| @@ -902,7 +903,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
| 902 | 903 | ||
| 903 | drop_unlock: | 904 | drop_unlock: |
| 904 | spin_unlock_irqrestore(&priv->lock, flags); | 905 | spin_unlock_irqrestore(&priv->lock, flags); |
| 905 | drop: | ||
| 906 | return -1; | 906 | return -1; |
| 907 | } | 907 | } |
| 908 | EXPORT_SYMBOL(iwl_tx_skb); | 908 | EXPORT_SYMBOL(iwl_tx_skb); |
| @@ -1171,6 +1171,8 @@ int iwl_tx_agg_start(struct iwl_priv *priv, const u8 *ra, u16 tid, u16 *ssn) | |||
| 1171 | IWL_ERR(priv, "Start AGG on invalid station\n"); | 1171 | IWL_ERR(priv, "Start AGG on invalid station\n"); |
| 1172 | return -ENXIO; | 1172 | return -ENXIO; |
| 1173 | } | 1173 | } |
| 1174 | if (unlikely(tid >= MAX_TID_COUNT)) | ||
| 1175 | return -EINVAL; | ||
| 1174 | 1176 | ||
| 1175 | if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_OFF) { | 1177 | if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_OFF) { |
| 1176 | IWL_ERR(priv, "Start AGG when state is not IWL_AGG_OFF !\n"); | 1178 | IWL_ERR(priv, "Start AGG when state is not IWL_AGG_OFF !\n"); |
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 956798f2c80c..523843369ca2 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c | |||
| @@ -3968,6 +3968,9 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) | |||
| 3968 | 3968 | ||
| 3969 | hw->wiphy->custom_regulatory = true; | 3969 | hw->wiphy->custom_regulatory = true; |
| 3970 | 3970 | ||
| 3971 | /* Firmware does not support this */ | ||
| 3972 | hw->wiphy->disable_beacon_hints = true; | ||
| 3973 | |||
| 3971 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; | 3974 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; |
| 3972 | /* we create the 802.11 header and a zero-length SSID element */ | 3975 | /* we create the 802.11 header and a zero-length SSID element */ |
| 3973 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; | 3976 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; |
| @@ -4018,10 +4021,10 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e | |||
| 4018 | SET_IEEE80211_DEV(hw, &pdev->dev); | 4021 | SET_IEEE80211_DEV(hw, &pdev->dev); |
| 4019 | 4022 | ||
| 4020 | if ((iwl3945_mod_params.num_of_queues > IWL39_MAX_NUM_QUEUES) || | 4023 | if ((iwl3945_mod_params.num_of_queues > IWL39_MAX_NUM_QUEUES) || |
| 4021 | (iwl3945_mod_params.num_of_queues < IWL_MIN_NUM_QUEUES)) { | 4024 | (iwl3945_mod_params.num_of_queues < IWL39_MIN_NUM_QUEUES)) { |
| 4022 | IWL_ERR(priv, | 4025 | IWL_ERR(priv, |
| 4023 | "invalid queues_num, should be between %d and %d\n", | 4026 | "invalid queues_num, should be between %d and %d\n", |
| 4024 | IWL_MIN_NUM_QUEUES, IWL39_MAX_NUM_QUEUES); | 4027 | IWL39_MIN_NUM_QUEUES, IWL39_MAX_NUM_QUEUES); |
| 4025 | err = -EINVAL; | 4028 | err = -EINVAL; |
| 4026 | goto out_ieee80211_free_hw; | 4029 | goto out_ieee80211_free_hw; |
| 4027 | } | 4030 | } |
diff --git a/drivers/net/wireless/iwmc3200wifi/commands.c b/drivers/net/wireless/iwmc3200wifi/commands.c index 834a7f544e5d..e2334d123599 100644 --- a/drivers/net/wireless/iwmc3200wifi/commands.c +++ b/drivers/net/wireless/iwmc3200wifi/commands.c | |||
| @@ -220,6 +220,7 @@ int iwm_store_rxiq_calib_result(struct iwm_priv *iwm) | |||
| 220 | eeprom_rxiq = iwm_eeprom_access(iwm, IWM_EEPROM_CALIB_RXIQ); | 220 | eeprom_rxiq = iwm_eeprom_access(iwm, IWM_EEPROM_CALIB_RXIQ); |
| 221 | if (IS_ERR(eeprom_rxiq)) { | 221 | if (IS_ERR(eeprom_rxiq)) { |
| 222 | IWM_ERR(iwm, "Couldn't access EEPROM RX IQ entry\n"); | 222 | IWM_ERR(iwm, "Couldn't access EEPROM RX IQ entry\n"); |
| 223 | kfree(rxiq); | ||
| 223 | return PTR_ERR(eeprom_rxiq); | 224 | return PTR_ERR(eeprom_rxiq); |
| 224 | } | 225 | } |
| 225 | 226 | ||
diff --git a/drivers/net/wireless/iwmc3200wifi/netdev.c b/drivers/net/wireless/iwmc3200wifi/netdev.c index aea5ccf24ccf..bf294e41753b 100644 --- a/drivers/net/wireless/iwmc3200wifi/netdev.c +++ b/drivers/net/wireless/iwmc3200wifi/netdev.c | |||
| @@ -106,10 +106,8 @@ void *iwm_if_alloc(int sizeof_bus, struct device *dev, | |||
| 106 | int ret = 0; | 106 | int ret = 0; |
| 107 | 107 | ||
| 108 | wdev = iwm_wdev_alloc(sizeof_bus, dev); | 108 | wdev = iwm_wdev_alloc(sizeof_bus, dev); |
| 109 | if (!wdev) { | 109 | if (IS_ERR(wdev)) |
| 110 | dev_err(dev, "no memory for wireless device instance\n"); | 110 | return wdev; |
| 111 | return ERR_PTR(-ENOMEM); | ||
| 112 | } | ||
| 113 | 111 | ||
| 114 | iwm = wdev_to_iwm(wdev); | 112 | iwm = wdev_to_iwm(wdev); |
| 115 | iwm->bus_ops = if_ops; | 113 | iwm->bus_ops = if_ops; |
diff --git a/drivers/net/wireless/libertas/11d.c b/drivers/net/wireless/libertas/11d.c index 9a5408e7d94a..5c6968101f0d 100644 --- a/drivers/net/wireless/libertas/11d.c +++ b/drivers/net/wireless/libertas/11d.c | |||
| @@ -47,7 +47,7 @@ static u8 lbs_region_2_code(u8 *region) | |||
| 47 | { | 47 | { |
| 48 | u8 i; | 48 | u8 i; |
| 49 | 49 | ||
| 50 | for (i = 0; region[i] && i < COUNTRY_CODE_LEN; i++) | 50 | for (i = 0; i < COUNTRY_CODE_LEN && region[i]; i++) |
| 51 | region[i] = toupper(region[i]); | 51 | region[i] = toupper(region[i]); |
| 52 | 52 | ||
| 53 | for (i = 0; i < ARRAY_SIZE(region_code_mapping); i++) { | 53 | for (i = 0; i < ARRAY_SIZE(region_code_mapping); i++) { |
diff --git a/drivers/net/wireless/libertas/assoc.c b/drivers/net/wireless/libertas/assoc.c index b9b374119033..d6997371c27e 100644 --- a/drivers/net/wireless/libertas/assoc.c +++ b/drivers/net/wireless/libertas/assoc.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | /* Copyright (C) 2006, Red Hat, Inc. */ | 1 | /* Copyright (C) 2006, Red Hat, Inc. */ |
| 2 | 2 | ||
| 3 | #include <linux/types.h> | 3 | #include <linux/types.h> |
| 4 | #include <linux/kernel.h> | ||
| 4 | #include <linux/etherdevice.h> | 5 | #include <linux/etherdevice.h> |
| 5 | #include <linux/ieee80211.h> | 6 | #include <linux/ieee80211.h> |
| 6 | #include <linux/if_arp.h> | 7 | #include <linux/if_arp.h> |
| @@ -43,21 +44,21 @@ static int get_common_rates(struct lbs_private *priv, | |||
| 43 | u16 *rates_size) | 44 | u16 *rates_size) |
| 44 | { | 45 | { |
| 45 | u8 *card_rates = lbs_bg_rates; | 46 | u8 *card_rates = lbs_bg_rates; |
| 46 | size_t num_card_rates = sizeof(lbs_bg_rates); | ||
| 47 | int ret = 0, i, j; | 47 | int ret = 0, i, j; |
| 48 | u8 tmp[30]; | 48 | u8 tmp[(ARRAY_SIZE(lbs_bg_rates) - 1) * (*rates_size - 1)]; |
| 49 | size_t tmp_size = 0; | 49 | size_t tmp_size = 0; |
| 50 | 50 | ||
| 51 | /* For each rate in card_rates that exists in rate1, copy to tmp */ | 51 | /* For each rate in card_rates that exists in rate1, copy to tmp */ |
| 52 | for (i = 0; card_rates[i] && (i < num_card_rates); i++) { | 52 | for (i = 0; i < ARRAY_SIZE(lbs_bg_rates) && card_rates[i]; i++) { |
| 53 | for (j = 0; rates[j] && (j < *rates_size); j++) { | 53 | for (j = 0; j < *rates_size && rates[j]; j++) { |
| 54 | if (rates[j] == card_rates[i]) | 54 | if (rates[j] == card_rates[i]) |
| 55 | tmp[tmp_size++] = card_rates[i]; | 55 | tmp[tmp_size++] = card_rates[i]; |
| 56 | } | 56 | } |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | lbs_deb_hex(LBS_DEB_JOIN, "AP rates ", rates, *rates_size); | 59 | lbs_deb_hex(LBS_DEB_JOIN, "AP rates ", rates, *rates_size); |
| 60 | lbs_deb_hex(LBS_DEB_JOIN, "card rates ", card_rates, num_card_rates); | 60 | lbs_deb_hex(LBS_DEB_JOIN, "card rates ", card_rates, |
| 61 | ARRAY_SIZE(lbs_bg_rates)); | ||
| 61 | lbs_deb_hex(LBS_DEB_JOIN, "common rates", tmp, tmp_size); | 62 | lbs_deb_hex(LBS_DEB_JOIN, "common rates", tmp, tmp_size); |
| 62 | lbs_deb_join("TX data rate 0x%02x\n", priv->cur_rate); | 63 | lbs_deb_join("TX data rate 0x%02x\n", priv->cur_rate); |
| 63 | 64 | ||
| @@ -69,10 +70,7 @@ static int get_common_rates(struct lbs_private *priv, | |||
| 69 | lbs_pr_alert("Previously set fixed data rate %#x isn't " | 70 | lbs_pr_alert("Previously set fixed data rate %#x isn't " |
| 70 | "compatible with the network.\n", priv->cur_rate); | 71 | "compatible with the network.\n", priv->cur_rate); |
| 71 | ret = -1; | 72 | ret = -1; |
| 72 | goto done; | ||
| 73 | } | 73 | } |
| 74 | ret = 0; | ||
| 75 | |||
| 76 | done: | 74 | done: |
| 77 | memset(rates, 0, *rates_size); | 75 | memset(rates, 0, *rates_size); |
| 78 | *rates_size = min_t(int, tmp_size, *rates_size); | 76 | *rates_size = min_t(int, tmp_size, *rates_size); |
| @@ -322,7 +320,7 @@ static int lbs_associate(struct lbs_private *priv, | |||
| 322 | rates = (struct mrvl_ie_rates_param_set *) pos; | 320 | rates = (struct mrvl_ie_rates_param_set *) pos; |
| 323 | rates->header.type = cpu_to_le16(TLV_TYPE_RATES); | 321 | rates->header.type = cpu_to_le16(TLV_TYPE_RATES); |
| 324 | memcpy(&rates->rates, &bss->rates, MAX_RATES); | 322 | memcpy(&rates->rates, &bss->rates, MAX_RATES); |
| 325 | tmplen = MAX_RATES; | 323 | tmplen = min_t(u16, ARRAY_SIZE(rates->rates), MAX_RATES); |
| 326 | if (get_common_rates(priv, rates->rates, &tmplen)) { | 324 | if (get_common_rates(priv, rates->rates, &tmplen)) { |
| 327 | ret = -1; | 325 | ret = -1; |
| 328 | goto done; | 326 | goto done; |
| @@ -598,7 +596,7 @@ static int lbs_adhoc_join(struct lbs_private *priv, | |||
| 598 | 596 | ||
| 599 | /* Copy Data rates from the rates recorded in scan response */ | 597 | /* Copy Data rates from the rates recorded in scan response */ |
| 600 | memset(cmd.bss.rates, 0, sizeof(cmd.bss.rates)); | 598 | memset(cmd.bss.rates, 0, sizeof(cmd.bss.rates)); |
| 601 | ratesize = min_t(u16, sizeof(cmd.bss.rates), MAX_RATES); | 599 | ratesize = min_t(u16, ARRAY_SIZE(cmd.bss.rates), MAX_RATES); |
| 602 | memcpy(cmd.bss.rates, bss->rates, ratesize); | 600 | memcpy(cmd.bss.rates, bss->rates, ratesize); |
| 603 | if (get_common_rates(priv, cmd.bss.rates, &ratesize)) { | 601 | if (get_common_rates(priv, cmd.bss.rates, &ratesize)) { |
| 604 | lbs_deb_join("ADHOC_JOIN: get_common_rates returned error.\n"); | 602 | lbs_deb_join("ADHOC_JOIN: get_common_rates returned error.\n"); |
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index 601b54249677..6c95af3023cc 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | * for sending scan commands to the firmware. | 5 | * for sending scan commands to the firmware. |
| 6 | */ | 6 | */ |
| 7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
| 8 | #include <linux/kernel.h> | ||
| 8 | #include <linux/etherdevice.h> | 9 | #include <linux/etherdevice.h> |
| 9 | #include <linux/if_arp.h> | 10 | #include <linux/if_arp.h> |
| 10 | #include <asm/unaligned.h> | 11 | #include <asm/unaligned.h> |
| @@ -876,7 +877,7 @@ static inline char *lbs_translate_scan(struct lbs_private *priv, | |||
| 876 | iwe.u.bitrate.disabled = 0; | 877 | iwe.u.bitrate.disabled = 0; |
| 877 | iwe.u.bitrate.value = 0; | 878 | iwe.u.bitrate.value = 0; |
| 878 | 879 | ||
| 879 | for (j = 0; bss->rates[j] && (j < sizeof(bss->rates)); j++) { | 880 | for (j = 0; j < ARRAY_SIZE(bss->rates) && bss->rates[j]; j++) { |
| 880 | /* Bit rate given in 500 kb/s units */ | 881 | /* Bit rate given in 500 kb/s units */ |
| 881 | iwe.u.bitrate.value = bss->rates[j] * 500000; | 882 | iwe.u.bitrate.value = bss->rates[j] * 500000; |
| 882 | current_val = iwe_stream_add_value(info, start, current_val, | 883 | current_val = iwe_stream_add_value(info, start, current_val, |
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 40b07b988224..3bd3c779fff3 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c | |||
| @@ -698,7 +698,7 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length) | |||
| 698 | && !mac->pass_ctrl) | 698 | && !mac->pass_ctrl) |
| 699 | return 0; | 699 | return 0; |
| 700 | 700 | ||
| 701 | fc = *(__le16 *)buffer; | 701 | fc = get_unaligned((__le16*)buffer); |
| 702 | need_padding = ieee80211_is_data_qos(fc) ^ ieee80211_has_a4(fc); | 702 | need_padding = ieee80211_is_data_qos(fc) ^ ieee80211_has_a4(fc); |
| 703 | 703 | ||
| 704 | skb = dev_alloc_skb(length + (need_padding ? 2 : 0)); | 704 | skb = dev_alloc_skb(length + (need_padding ? 2 : 0)); |
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 0f0e0b919ef4..a45b0c0d574e 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c | |||
| @@ -70,7 +70,6 @@ | |||
| 70 | #undef CCIO_COLLECT_STATS | 70 | #undef CCIO_COLLECT_STATS |
| 71 | #endif | 71 | #endif |
| 72 | 72 | ||
| 73 | #include <linux/proc_fs.h> | ||
| 74 | #include <asm/runway.h> /* for proc_runway_root */ | 73 | #include <asm/runway.h> /* for proc_runway_root */ |
| 75 | 74 | ||
| 76 | #ifdef DEBUG_CCIO_INIT | 75 | #ifdef DEBUG_CCIO_INIT |
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index c590974e9815..d69bde6a2343 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c | |||
| @@ -614,7 +614,7 @@ dino_fixup_bus(struct pci_bus *bus) | |||
| 614 | dev_name(&bus->self->dev), i, | 614 | dev_name(&bus->self->dev), i, |
| 615 | bus->self->resource[i].start, | 615 | bus->self->resource[i].start, |
| 616 | bus->self->resource[i].end); | 616 | bus->self->resource[i].end); |
| 617 | pci_assign_resource(bus->self, i); | 617 | WARN_ON(pci_assign_resource(bus->self, i)); |
| 618 | DBG("DEBUG %s after assign %d [0x%lx,0x%lx]\n", | 618 | DBG("DEBUG %s after assign %d [0x%lx,0x%lx]\n", |
| 619 | dev_name(&bus->self->dev), i, | 619 | dev_name(&bus->self->dev), i, |
| 620 | bus->self->resource[i].start, | 620 | bus->self->resource[i].start, |
diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 685d94e69d44..8c0b26e9b98a 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c | |||
| @@ -55,7 +55,7 @@ static ssize_t eisa_eeprom_read(struct file * file, | |||
| 55 | ssize_t ret; | 55 | ssize_t ret; |
| 56 | int i; | 56 | int i; |
| 57 | 57 | ||
| 58 | if (*ppos >= HPEE_MAX_LENGTH) | 58 | if (*ppos < 0 || *ppos >= HPEE_MAX_LENGTH) |
| 59 | return 0; | 59 | return 0; |
| 60 | 60 | ||
| 61 | count = *ppos + count < HPEE_MAX_LENGTH ? count : HPEE_MAX_LENGTH - *ppos; | 61 | count = *ppos + count < HPEE_MAX_LENGTH ? count : HPEE_MAX_LENGTH - *ppos; |
diff --git a/drivers/parisc/hppb.c b/drivers/parisc/hppb.c index 13856415b432..815db175d427 100644 --- a/drivers/parisc/hppb.c +++ b/drivers/parisc/hppb.c | |||
| @@ -62,7 +62,8 @@ static int hppb_probe(struct parisc_device *dev) | |||
| 62 | } | 62 | } |
| 63 | card = card->next; | 63 | card = card->next; |
| 64 | } | 64 | } |
| 65 | printk(KERN_INFO "Found GeckoBoa at 0x%x\n", dev->hpa.start); | 65 | printk(KERN_INFO "Found GeckoBoa at 0x%llx\n", |
| 66 | (unsigned long long) dev->hpa.start); | ||
| 66 | 67 | ||
| 67 | card->hpa = dev->hpa.start; | 68 | card->hpa = dev->hpa.start; |
| 68 | card->mmio_region.name = "HP-PB Bus"; | 69 | card->mmio_region.name = "HP-PB Bus"; |
| @@ -73,8 +74,10 @@ static int hppb_probe(struct parisc_device *dev) | |||
| 73 | 74 | ||
| 74 | status = ccio_request_resource(dev, &card->mmio_region); | 75 | status = ccio_request_resource(dev, &card->mmio_region); |
| 75 | if(status < 0) { | 76 | if(status < 0) { |
| 76 | printk(KERN_ERR "%s: failed to claim HP-PB bus space (%08x, %08x)\n", | 77 | printk(KERN_ERR "%s: failed to claim HP-PB " |
| 77 | __FILE__, card->mmio_region.start, card->mmio_region.end); | 78 | "bus space (0x%08llx, 0x%08llx)\n", |
| 79 | __FILE__, (unsigned long long) card->mmio_region.start, | ||
| 80 | (unsigned long long) card->mmio_region.end); | ||
| 78 | } | 81 | } |
| 79 | 82 | ||
| 80 | return 0; | 83 | return 0; |
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index ede614616f8e..3aeb3279c92a 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c | |||
| @@ -992,7 +992,7 @@ lba_pat_resources(struct parisc_device *pa_dev, struct lba_device *lba_dev) | |||
| 992 | return; | 992 | return; |
| 993 | 993 | ||
| 994 | io_pdc_cell = kzalloc(sizeof(pdc_pat_cell_mod_maddr_block_t), GFP_KERNEL); | 994 | io_pdc_cell = kzalloc(sizeof(pdc_pat_cell_mod_maddr_block_t), GFP_KERNEL); |
| 995 | if (!pa_pdc_cell) { | 995 | if (!io_pdc_cell) { |
| 996 | kfree(pa_pdc_cell); | 996 | kfree(pa_pdc_cell); |
| 997 | return; | 997 | return; |
| 998 | } | 998 | } |
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index f9f9a5f1bbd0..13a64bc081b6 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c | |||
| @@ -370,7 +370,7 @@ pdcspath_layer_read(struct pdcspath_entry *entry, char *buf) | |||
| 370 | if (!i) /* entry is not ready */ | 370 | if (!i) /* entry is not ready */ |
| 371 | return -ENODATA; | 371 | return -ENODATA; |
| 372 | 372 | ||
| 373 | for (i = 0; devpath->layers[i] && (likely(i < 6)); i++) | 373 | for (i = 0; i < 6 && devpath->layers[i]; i++) |
| 374 | out += sprintf(out, "%u ", devpath->layers[i]); | 374 | out += sprintf(out, "%u ", devpath->layers[i]); |
| 375 | 375 | ||
| 376 | out += sprintf(out, "\n"); | 376 | out += sprintf(out, "\n"); |
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index b711fb7181e2..1898c7b47907 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c | |||
| @@ -100,16 +100,16 @@ int pci_claim_resource(struct pci_dev *dev, int resource) | |||
| 100 | { | 100 | { |
| 101 | struct resource *res = &dev->resource[resource]; | 101 | struct resource *res = &dev->resource[resource]; |
| 102 | struct resource *root; | 102 | struct resource *root; |
| 103 | char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; | ||
| 104 | int err; | 103 | int err; |
| 105 | 104 | ||
| 106 | root = pci_find_parent_resource(dev, res); | 105 | root = pci_find_parent_resource(dev, res); |
| 107 | 106 | ||
| 108 | err = -EINVAL; | 107 | err = -EINVAL; |
| 109 | if (root != NULL) | 108 | if (root != NULL) |
| 110 | err = insert_resource(root, res); | 109 | err = request_resource(root, res); |
| 111 | 110 | ||
| 112 | if (err) { | 111 | if (err) { |
| 112 | const char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; | ||
| 113 | dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", | 113 | dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", |
| 114 | resource, | 114 | resource, |
| 115 | root ? "address space collision on" : | 115 | root ? "address space collision on" : |
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 46dad12f952f..77c6097ced80 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig | |||
| @@ -277,31 +277,6 @@ config THINKPAD_ACPI_UNSAFE_LEDS | |||
| 277 | Say N here, unless you are building a kernel for your own | 277 | Say N here, unless you are building a kernel for your own |
| 278 | use, and need to control the important firmware LEDs. | 278 | use, and need to control the important firmware LEDs. |
| 279 | 279 | ||
| 280 | config THINKPAD_ACPI_DOCK | ||
| 281 | bool "Legacy Docking Station Support" | ||
| 282 | depends on THINKPAD_ACPI | ||
| 283 | depends on ACPI_DOCK=n | ||
| 284 | default n | ||
| 285 | ---help--- | ||
| 286 | Allows the thinkpad_acpi driver to handle docking station events. | ||
| 287 | This support was made obsolete by the generic ACPI docking station | ||
| 288 | support (CONFIG_ACPI_DOCK). It will allow locking and removing the | ||
| 289 | laptop from the docking station, but will not properly connect PCI | ||
| 290 | devices. | ||
| 291 | |||
| 292 | If you are not sure, say N here. | ||
| 293 | |||
| 294 | config THINKPAD_ACPI_BAY | ||
| 295 | bool "Legacy Removable Bay Support" | ||
| 296 | depends on THINKPAD_ACPI | ||
| 297 | default y | ||
| 298 | ---help--- | ||
| 299 | Allows the thinkpad_acpi driver to handle removable bays. It will | ||
| 300 | electrically disable the device in the bay, and also generate | ||
| 301 | notifications when the bay lever is ejected or inserted. | ||
| 302 | |||
| 303 | If you are not sure, say Y here. | ||
| 304 | |||
| 305 | config THINKPAD_ACPI_VIDEO | 280 | config THINKPAD_ACPI_VIDEO |
| 306 | bool "Video output control support" | 281 | bool "Video output control support" |
| 307 | depends on THINKPAD_ACPI | 282 | depends on THINKPAD_ACPI |
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index ec560f16d720..222ffb892f22 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c | |||
| @@ -143,6 +143,7 @@ struct eeepc_hotk { | |||
| 143 | struct rfkill *bluetooth_rfkill; | 143 | struct rfkill *bluetooth_rfkill; |
| 144 | struct rfkill *wwan3g_rfkill; | 144 | struct rfkill *wwan3g_rfkill; |
| 145 | struct hotplug_slot *hotplug_slot; | 145 | struct hotplug_slot *hotplug_slot; |
| 146 | struct work_struct hotplug_work; | ||
| 146 | }; | 147 | }; |
| 147 | 148 | ||
| 148 | /* The actual device the driver binds to */ | 149 | /* The actual device the driver binds to */ |
| @@ -660,7 +661,7 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot, | |||
| 660 | return 0; | 661 | return 0; |
| 661 | } | 662 | } |
| 662 | 663 | ||
| 663 | static void eeepc_rfkill_hotplug(void) | 664 | static void eeepc_hotplug_work(struct work_struct *work) |
| 664 | { | 665 | { |
| 665 | struct pci_dev *dev; | 666 | struct pci_dev *dev; |
| 666 | struct pci_bus *bus = pci_find_bus(0, 1); | 667 | struct pci_bus *bus = pci_find_bus(0, 1); |
| @@ -701,7 +702,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) | |||
| 701 | if (event != ACPI_NOTIFY_BUS_CHECK) | 702 | if (event != ACPI_NOTIFY_BUS_CHECK) |
| 702 | return; | 703 | return; |
| 703 | 704 | ||
| 704 | eeepc_rfkill_hotplug(); | 705 | schedule_work(&ehotk->hotplug_work); |
| 705 | } | 706 | } |
| 706 | 707 | ||
| 707 | static void eeepc_hotk_notify(struct acpi_device *device, u32 event) | 708 | static void eeepc_hotk_notify(struct acpi_device *device, u32 event) |
| @@ -892,7 +893,7 @@ static int eeepc_hotk_resume(struct acpi_device *device) | |||
| 892 | 893 | ||
| 893 | rfkill_set_sw_state(ehotk->wlan_rfkill, wlan != 1); | 894 | rfkill_set_sw_state(ehotk->wlan_rfkill, wlan != 1); |
| 894 | 895 | ||
| 895 | eeepc_rfkill_hotplug(); | 896 | schedule_work(&ehotk->hotplug_work); |
| 896 | } | 897 | } |
| 897 | 898 | ||
| 898 | if (ehotk->bluetooth_rfkill) | 899 | if (ehotk->bluetooth_rfkill) |
| @@ -1093,6 +1094,8 @@ static int eeepc_rfkill_init(struct device *dev) | |||
| 1093 | { | 1094 | { |
| 1094 | int result = 0; | 1095 | int result = 0; |
| 1095 | 1096 | ||
| 1097 | INIT_WORK(&ehotk->hotplug_work, eeepc_hotplug_work); | ||
| 1098 | |||
| 1096 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); | 1099 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); |
| 1097 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); | 1100 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); |
| 1098 | 1101 | ||
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index ca508564a181..a2ad53e15874 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c | |||
| @@ -520,11 +520,13 @@ static int hp_wmi_resume_handler(struct platform_device *device) | |||
| 520 | * the input layer will only actually pass it on if the state | 520 | * the input layer will only actually pass it on if the state |
| 521 | * changed. | 521 | * changed. |
| 522 | */ | 522 | */ |
| 523 | 523 | if (hp_wmi_input_dev) { | |
| 524 | input_report_switch(hp_wmi_input_dev, SW_DOCK, hp_wmi_dock_state()); | 524 | input_report_switch(hp_wmi_input_dev, SW_DOCK, |
| 525 | input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, | 525 | hp_wmi_dock_state()); |
| 526 | hp_wmi_tablet_state()); | 526 | input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, |
| 527 | input_sync(hp_wmi_input_dev); | 527 | hp_wmi_tablet_state()); |
| 528 | input_sync(hp_wmi_input_dev); | ||
| 529 | } | ||
| 528 | 530 | ||
| 529 | return 0; | 531 | return 0; |
| 530 | } | 532 | } |
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a463fd72c495..e85600852502 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c | |||
| @@ -239,12 +239,6 @@ struct ibm_init_struct { | |||
| 239 | }; | 239 | }; |
| 240 | 240 | ||
| 241 | static struct { | 241 | static struct { |
| 242 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
| 243 | u32 bay_status:1; | ||
| 244 | u32 bay_eject:1; | ||
| 245 | u32 bay_status2:1; | ||
| 246 | u32 bay_eject2:1; | ||
| 247 | #endif | ||
| 248 | u32 bluetooth:1; | 242 | u32 bluetooth:1; |
| 249 | u32 hotkey:1; | 243 | u32 hotkey:1; |
| 250 | u32 hotkey_mask:1; | 244 | u32 hotkey_mask:1; |
| @@ -589,18 +583,6 @@ static int acpi_ec_write(int i, u8 v) | |||
| 589 | return 1; | 583 | return 1; |
| 590 | } | 584 | } |
| 591 | 585 | ||
| 592 | #if defined(CONFIG_THINKPAD_ACPI_DOCK) || defined(CONFIG_THINKPAD_ACPI_BAY) | ||
| 593 | static int _sta(acpi_handle handle) | ||
| 594 | { | ||
| 595 | int status; | ||
| 596 | |||
| 597 | if (!handle || !acpi_evalf(handle, &status, "_STA", "d")) | ||
| 598 | status = 0; | ||
| 599 | |||
| 600 | return status; | ||
| 601 | } | ||
| 602 | #endif | ||
| 603 | |||
| 604 | static int issue_thinkpad_cmos_command(int cmos_cmd) | 586 | static int issue_thinkpad_cmos_command(int cmos_cmd) |
| 605 | { | 587 | { |
| 606 | if (!cmos_handle) | 588 | if (!cmos_handle) |
| @@ -784,6 +766,8 @@ static int dispatch_procfs_write(struct file *file, | |||
| 784 | 766 | ||
| 785 | if (!ibm || !ibm->write) | 767 | if (!ibm || !ibm->write) |
| 786 | return -EINVAL; | 768 | return -EINVAL; |
| 769 | if (count > PAGE_SIZE - 2) | ||
| 770 | return -EINVAL; | ||
| 787 | 771 | ||
| 788 | kernbuf = kmalloc(count + 2, GFP_KERNEL); | 772 | kernbuf = kmalloc(count + 2, GFP_KERNEL); |
| 789 | if (!kernbuf) | 773 | if (!kernbuf) |
| @@ -4442,293 +4426,6 @@ static struct ibm_struct light_driver_data = { | |||
| 4442 | }; | 4426 | }; |
| 4443 | 4427 | ||
| 4444 | /************************************************************************* | 4428 | /************************************************************************* |
| 4445 | * Dock subdriver | ||
| 4446 | */ | ||
| 4447 | |||
| 4448 | #ifdef CONFIG_THINKPAD_ACPI_DOCK | ||
| 4449 | |||
| 4450 | static void dock_notify(struct ibm_struct *ibm, u32 event); | ||
| 4451 | static int dock_read(char *p); | ||
| 4452 | static int dock_write(char *buf); | ||
| 4453 | |||
| 4454 | TPACPI_HANDLE(dock, root, "\\_SB.GDCK", /* X30, X31, X40 */ | ||
| 4455 | "\\_SB.PCI0.DOCK", /* 600e/x,770e,770x,A2xm/p,T20-22,X20-21 */ | ||
| 4456 | "\\_SB.PCI0.PCI1.DOCK", /* all others */ | ||
| 4457 | "\\_SB.PCI.ISA.SLCE", /* 570 */ | ||
| 4458 | ); /* A21e,G4x,R30,R31,R32,R40,R40e,R50e */ | ||
| 4459 | |||
| 4460 | /* don't list other alternatives as we install a notify handler on the 570 */ | ||
| 4461 | TPACPI_HANDLE(pci, root, "\\_SB.PCI"); /* 570 */ | ||
| 4462 | |||
| 4463 | static const struct acpi_device_id ibm_pci_device_ids[] = { | ||
| 4464 | {PCI_ROOT_HID_STRING, 0}, | ||
| 4465 | {"", 0}, | ||
| 4466 | }; | ||
| 4467 | |||
| 4468 | static struct tp_acpi_drv_struct ibm_dock_acpidriver[2] = { | ||
| 4469 | { | ||
| 4470 | .notify = dock_notify, | ||
| 4471 | .handle = &dock_handle, | ||
| 4472 | .type = ACPI_SYSTEM_NOTIFY, | ||
| 4473 | }, | ||
| 4474 | { | ||
| 4475 | /* THIS ONE MUST NEVER BE USED FOR DRIVER AUTOLOADING. | ||
| 4476 | * We just use it to get notifications of dock hotplug | ||
| 4477 | * in very old thinkpads */ | ||
| 4478 | .hid = ibm_pci_device_ids, | ||
| 4479 | .notify = dock_notify, | ||
| 4480 | .handle = &pci_handle, | ||
| 4481 | .type = ACPI_SYSTEM_NOTIFY, | ||
| 4482 | }, | ||
| 4483 | }; | ||
| 4484 | |||
| 4485 | static struct ibm_struct dock_driver_data[2] = { | ||
| 4486 | { | ||
| 4487 | .name = "dock", | ||
| 4488 | .read = dock_read, | ||
| 4489 | .write = dock_write, | ||
| 4490 | .acpi = &ibm_dock_acpidriver[0], | ||
| 4491 | }, | ||
| 4492 | { | ||
| 4493 | .name = "dock", | ||
| 4494 | .acpi = &ibm_dock_acpidriver[1], | ||
| 4495 | }, | ||
| 4496 | }; | ||
| 4497 | |||
| 4498 | #define dock_docked() (_sta(dock_handle) & 1) | ||
| 4499 | |||
| 4500 | static int __init dock_init(struct ibm_init_struct *iibm) | ||
| 4501 | { | ||
| 4502 | vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver\n"); | ||
| 4503 | |||
| 4504 | TPACPI_ACPIHANDLE_INIT(dock); | ||
| 4505 | |||
| 4506 | vdbg_printk(TPACPI_DBG_INIT, "dock is %s\n", | ||
| 4507 | str_supported(dock_handle != NULL)); | ||
| 4508 | |||
| 4509 | return (dock_handle)? 0 : 1; | ||
| 4510 | } | ||
| 4511 | |||
| 4512 | static int __init dock_init2(struct ibm_init_struct *iibm) | ||
| 4513 | { | ||
| 4514 | int dock2_needed; | ||
| 4515 | |||
| 4516 | vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver part 2\n"); | ||
| 4517 | |||
| 4518 | if (dock_driver_data[0].flags.acpi_driver_registered && | ||
| 4519 | dock_driver_data[0].flags.acpi_notify_installed) { | ||
| 4520 | TPACPI_ACPIHANDLE_INIT(pci); | ||
| 4521 | dock2_needed = (pci_handle != NULL); | ||
| 4522 | vdbg_printk(TPACPI_DBG_INIT, | ||
| 4523 | "dock PCI handler for the TP 570 is %s\n", | ||
| 4524 | str_supported(dock2_needed)); | ||
| 4525 | } else { | ||
| 4526 | vdbg_printk(TPACPI_DBG_INIT, | ||
| 4527 | "dock subdriver part 2 not required\n"); | ||
| 4528 | dock2_needed = 0; | ||
| 4529 | } | ||
| 4530 | |||
| 4531 | return (dock2_needed)? 0 : 1; | ||
| 4532 | } | ||
| 4533 | |||
| 4534 | static void dock_notify(struct ibm_struct *ibm, u32 event) | ||
| 4535 | { | ||
| 4536 | int docked = dock_docked(); | ||
| 4537 | int pci = ibm->acpi->hid && ibm->acpi->device && | ||
| 4538 | acpi_match_device_ids(ibm->acpi->device, ibm_pci_device_ids); | ||
| 4539 | int data; | ||
| 4540 | |||
| 4541 | if (event == 1 && !pci) /* 570 */ | ||
| 4542 | data = 1; /* button */ | ||
| 4543 | else if (event == 1 && pci) /* 570 */ | ||
| 4544 | data = 3; /* dock */ | ||
| 4545 | else if (event == 3 && docked) | ||
| 4546 | data = 1; /* button */ | ||
| 4547 | else if (event == 3 && !docked) | ||
| 4548 | data = 2; /* undock */ | ||
| 4549 | else if (event == 0 && docked) | ||
| 4550 | data = 3; /* dock */ | ||
| 4551 | else { | ||
| 4552 | printk(TPACPI_ERR "unknown dock event %d, status %d\n", | ||
| 4553 | event, _sta(dock_handle)); | ||
| 4554 | data = 0; /* unknown */ | ||
| 4555 | } | ||
| 4556 | acpi_bus_generate_proc_event(ibm->acpi->device, event, data); | ||
| 4557 | acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, | ||
| 4558 | dev_name(&ibm->acpi->device->dev), | ||
| 4559 | event, data); | ||
| 4560 | } | ||
| 4561 | |||
| 4562 | static int dock_read(char *p) | ||
| 4563 | { | ||
| 4564 | int len = 0; | ||
| 4565 | int docked = dock_docked(); | ||
| 4566 | |||
| 4567 | if (!dock_handle) | ||
| 4568 | len += sprintf(p + len, "status:\t\tnot supported\n"); | ||
| 4569 | else if (!docked) | ||
| 4570 | len += sprintf(p + len, "status:\t\tundocked\n"); | ||
| 4571 | else { | ||
| 4572 | len += sprintf(p + len, "status:\t\tdocked\n"); | ||
| 4573 | len += sprintf(p + len, "commands:\tdock, undock\n"); | ||
| 4574 | } | ||
| 4575 | |||
| 4576 | return len; | ||
| 4577 | } | ||
| 4578 | |||
| 4579 | static int dock_write(char *buf) | ||
| 4580 | { | ||
| 4581 | char *cmd; | ||
| 4582 | |||
| 4583 | if (!dock_docked()) | ||
| 4584 | return -ENODEV; | ||
| 4585 | |||
| 4586 | while ((cmd = next_cmd(&buf))) { | ||
| 4587 | if (strlencmp(cmd, "undock") == 0) { | ||
| 4588 | if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 0) || | ||
| 4589 | !acpi_evalf(dock_handle, NULL, "_EJ0", "vd", 1)) | ||
| 4590 | return -EIO; | ||
| 4591 | } else if (strlencmp(cmd, "dock") == 0) { | ||
| 4592 | if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 1)) | ||
| 4593 | return -EIO; | ||
| 4594 | } else | ||
| 4595 | return -EINVAL; | ||
| 4596 | } | ||
| 4597 | |||
| 4598 | return 0; | ||
| 4599 | } | ||
| 4600 | |||
| 4601 | #endif /* CONFIG_THINKPAD_ACPI_DOCK */ | ||
| 4602 | |||
| 4603 | /************************************************************************* | ||
| 4604 | * Bay subdriver | ||
| 4605 | */ | ||
| 4606 | |||
| 4607 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
| 4608 | |||
| 4609 | TPACPI_HANDLE(bay, root, "\\_SB.PCI.IDE.SECN.MAST", /* 570 */ | ||
| 4610 | "\\_SB.PCI0.IDE0.IDES.IDSM", /* 600e/x, 770e, 770x */ | ||
| 4611 | "\\_SB.PCI0.SATA.SCND.MSTR", /* T60, X60, Z60 */ | ||
| 4612 | "\\_SB.PCI0.IDE0.SCND.MSTR", /* all others */ | ||
| 4613 | ); /* A21e, R30, R31 */ | ||
| 4614 | TPACPI_HANDLE(bay_ej, bay, "_EJ3", /* 600e/x, A2xm/p, A3x */ | ||
| 4615 | "_EJ0", /* all others */ | ||
| 4616 | ); /* 570,A21e,G4x,R30,R31,R32,R40e,R50e */ | ||
| 4617 | TPACPI_HANDLE(bay2, root, "\\_SB.PCI0.IDE0.PRIM.SLAV", /* A3x, R32 */ | ||
| 4618 | "\\_SB.PCI0.IDE0.IDEP.IDPS", /* 600e/x, 770e, 770x */ | ||
| 4619 | ); /* all others */ | ||
| 4620 | TPACPI_HANDLE(bay2_ej, bay2, "_EJ3", /* 600e/x, 770e, A3x */ | ||
| 4621 | "_EJ0", /* 770x */ | ||
| 4622 | ); /* all others */ | ||
| 4623 | |||
| 4624 | static int __init bay_init(struct ibm_init_struct *iibm) | ||
| 4625 | { | ||
| 4626 | vdbg_printk(TPACPI_DBG_INIT, "initializing bay subdriver\n"); | ||
| 4627 | |||
| 4628 | TPACPI_ACPIHANDLE_INIT(bay); | ||
| 4629 | if (bay_handle) | ||
| 4630 | TPACPI_ACPIHANDLE_INIT(bay_ej); | ||
| 4631 | TPACPI_ACPIHANDLE_INIT(bay2); | ||
| 4632 | if (bay2_handle) | ||
| 4633 | TPACPI_ACPIHANDLE_INIT(bay2_ej); | ||
| 4634 | |||
| 4635 | tp_features.bay_status = bay_handle && | ||
| 4636 | acpi_evalf(bay_handle, NULL, "_STA", "qv"); | ||
| 4637 | tp_features.bay_status2 = bay2_handle && | ||
| 4638 | acpi_evalf(bay2_handle, NULL, "_STA", "qv"); | ||
| 4639 | |||
| 4640 | tp_features.bay_eject = bay_handle && bay_ej_handle && | ||
| 4641 | (strlencmp(bay_ej_path, "_EJ0") == 0 || experimental); | ||
| 4642 | tp_features.bay_eject2 = bay2_handle && bay2_ej_handle && | ||
| 4643 | (strlencmp(bay2_ej_path, "_EJ0") == 0 || experimental); | ||
| 4644 | |||
| 4645 | vdbg_printk(TPACPI_DBG_INIT, | ||
| 4646 | "bay 1: status %s, eject %s; bay 2: status %s, eject %s\n", | ||
| 4647 | str_supported(tp_features.bay_status), | ||
| 4648 | str_supported(tp_features.bay_eject), | ||
| 4649 | str_supported(tp_features.bay_status2), | ||
| 4650 | str_supported(tp_features.bay_eject2)); | ||
| 4651 | |||
| 4652 | return (tp_features.bay_status || tp_features.bay_eject || | ||
| 4653 | tp_features.bay_status2 || tp_features.bay_eject2)? 0 : 1; | ||
| 4654 | } | ||
| 4655 | |||
| 4656 | static void bay_notify(struct ibm_struct *ibm, u32 event) | ||
| 4657 | { | ||
| 4658 | acpi_bus_generate_proc_event(ibm->acpi->device, event, 0); | ||
| 4659 | acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, | ||
| 4660 | dev_name(&ibm->acpi->device->dev), | ||
| 4661 | event, 0); | ||
| 4662 | } | ||
| 4663 | |||
| 4664 | #define bay_occupied(b) (_sta(b##_handle) & 1) | ||
| 4665 | |||
| 4666 | static int bay_read(char *p) | ||
| 4667 | { | ||
| 4668 | int len = 0; | ||
| 4669 | int occupied = bay_occupied(bay); | ||
| 4670 | int occupied2 = bay_occupied(bay2); | ||
| 4671 | int eject, eject2; | ||
| 4672 | |||
| 4673 | len += sprintf(p + len, "status:\t\t%s\n", | ||
| 4674 | tp_features.bay_status ? | ||
| 4675 | (occupied ? "occupied" : "unoccupied") : | ||
| 4676 | "not supported"); | ||
| 4677 | if (tp_features.bay_status2) | ||
| 4678 | len += sprintf(p + len, "status2:\t%s\n", occupied2 ? | ||
| 4679 | "occupied" : "unoccupied"); | ||
| 4680 | |||
| 4681 | eject = tp_features.bay_eject && occupied; | ||
| 4682 | eject2 = tp_features.bay_eject2 && occupied2; | ||
| 4683 | |||
| 4684 | if (eject && eject2) | ||
| 4685 | len += sprintf(p + len, "commands:\teject, eject2\n"); | ||
| 4686 | else if (eject) | ||
| 4687 | len += sprintf(p + len, "commands:\teject\n"); | ||
| 4688 | else if (eject2) | ||
| 4689 | len += sprintf(p + len, "commands:\teject2\n"); | ||
| 4690 | |||
| 4691 | return len; | ||
| 4692 | } | ||
| 4693 | |||
| 4694 | static int bay_write(char *buf) | ||
| 4695 | { | ||
| 4696 | char *cmd; | ||
| 4697 | |||
| 4698 | if (!tp_features.bay_eject && !tp_features.bay_eject2) | ||
| 4699 | return -ENODEV; | ||
| 4700 | |||
| 4701 | while ((cmd = next_cmd(&buf))) { | ||
| 4702 | if (tp_features.bay_eject && strlencmp(cmd, "eject") == 0) { | ||
| 4703 | if (!acpi_evalf(bay_ej_handle, NULL, NULL, "vd", 1)) | ||
| 4704 | return -EIO; | ||
| 4705 | } else if (tp_features.bay_eject2 && | ||
| 4706 | strlencmp(cmd, "eject2") == 0) { | ||
| 4707 | if (!acpi_evalf(bay2_ej_handle, NULL, NULL, "vd", 1)) | ||
| 4708 | return -EIO; | ||
| 4709 | } else | ||
| 4710 | return -EINVAL; | ||
| 4711 | } | ||
| 4712 | |||
| 4713 | return 0; | ||
| 4714 | } | ||
| 4715 | |||
| 4716 | static struct tp_acpi_drv_struct ibm_bay_acpidriver = { | ||
| 4717 | .notify = bay_notify, | ||
| 4718 | .handle = &bay_handle, | ||
| 4719 | .type = ACPI_SYSTEM_NOTIFY, | ||
| 4720 | }; | ||
| 4721 | |||
| 4722 | static struct ibm_struct bay_driver_data = { | ||
| 4723 | .name = "bay", | ||
| 4724 | .read = bay_read, | ||
| 4725 | .write = bay_write, | ||
| 4726 | .acpi = &ibm_bay_acpidriver, | ||
| 4727 | }; | ||
| 4728 | |||
| 4729 | #endif /* CONFIG_THINKPAD_ACPI_BAY */ | ||
| 4730 | |||
| 4731 | /************************************************************************* | ||
| 4732 | * CMOS subdriver | 4429 | * CMOS subdriver |
| 4733 | */ | 4430 | */ |
| 4734 | 4431 | ||
| @@ -5945,14 +5642,48 @@ static struct backlight_ops ibm_backlight_data = { | |||
| 5945 | 5642 | ||
| 5946 | /* --------------------------------------------------------------------- */ | 5643 | /* --------------------------------------------------------------------- */ |
| 5947 | 5644 | ||
| 5645 | /* | ||
| 5646 | * These are only useful for models that have only one possibility | ||
| 5647 | * of GPU. If the BIOS model handles both ATI and Intel, don't use | ||
| 5648 | * these quirks. | ||
| 5649 | */ | ||
| 5650 | #define TPACPI_BRGHT_Q_NOEC 0x0001 /* Must NOT use EC HBRV */ | ||
| 5651 | #define TPACPI_BRGHT_Q_EC 0x0002 /* Should or must use EC HBRV */ | ||
| 5652 | #define TPACPI_BRGHT_Q_ASK 0x8000 /* Ask for user report */ | ||
| 5653 | |||
| 5654 | static const struct tpacpi_quirk brightness_quirk_table[] __initconst = { | ||
| 5655 | /* Models with ATI GPUs known to require ECNVRAM mode */ | ||
| 5656 | TPACPI_Q_IBM('1', 'Y', TPACPI_BRGHT_Q_EC), /* T43/p ATI */ | ||
| 5657 | |||
| 5658 | /* Models with ATI GPUs (waiting confirmation) */ | ||
| 5659 | TPACPI_Q_IBM('1', 'R', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
| 5660 | TPACPI_Q_IBM('1', 'Q', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
| 5661 | TPACPI_Q_IBM('7', '6', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
| 5662 | TPACPI_Q_IBM('7', '8', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
| 5663 | |||
| 5664 | /* Models with Intel Extreme Graphics 2 (waiting confirmation) */ | ||
| 5665 | TPACPI_Q_IBM('1', 'V', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), | ||
| 5666 | TPACPI_Q_IBM('1', 'W', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), | ||
| 5667 | TPACPI_Q_IBM('1', 'U', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), | ||
| 5668 | |||
| 5669 | /* Models with Intel GMA900 */ | ||
| 5670 | TPACPI_Q_IBM('7', '0', TPACPI_BRGHT_Q_NOEC), /* T43, R52 */ | ||
| 5671 | TPACPI_Q_IBM('7', '4', TPACPI_BRGHT_Q_NOEC), /* X41 */ | ||
| 5672 | TPACPI_Q_IBM('7', '5', TPACPI_BRGHT_Q_NOEC), /* X41 Tablet */ | ||
| 5673 | }; | ||
| 5674 | |||
| 5948 | static int __init brightness_init(struct ibm_init_struct *iibm) | 5675 | static int __init brightness_init(struct ibm_init_struct *iibm) |
| 5949 | { | 5676 | { |
| 5950 | int b; | 5677 | int b; |
| 5678 | unsigned long quirks; | ||
| 5951 | 5679 | ||
| 5952 | vdbg_printk(TPACPI_DBG_INIT, "initializing brightness subdriver\n"); | 5680 | vdbg_printk(TPACPI_DBG_INIT, "initializing brightness subdriver\n"); |
| 5953 | 5681 | ||
| 5954 | mutex_init(&brightness_mutex); | 5682 | mutex_init(&brightness_mutex); |
| 5955 | 5683 | ||
| 5684 | quirks = tpacpi_check_quirks(brightness_quirk_table, | ||
| 5685 | ARRAY_SIZE(brightness_quirk_table)); | ||
| 5686 | |||
| 5956 | /* | 5687 | /* |
| 5957 | * We always attempt to detect acpi support, so as to switch | 5688 | * We always attempt to detect acpi support, so as to switch |
| 5958 | * Lenovo Vista BIOS to ACPI brightness mode even if we are not | 5689 | * Lenovo Vista BIOS to ACPI brightness mode even if we are not |
| @@ -6009,23 +5740,13 @@ static int __init brightness_init(struct ibm_init_struct *iibm) | |||
| 6009 | /* TPACPI_BRGHT_MODE_AUTO not implemented yet, just use default */ | 5740 | /* TPACPI_BRGHT_MODE_AUTO not implemented yet, just use default */ |
| 6010 | if (brightness_mode == TPACPI_BRGHT_MODE_AUTO || | 5741 | if (brightness_mode == TPACPI_BRGHT_MODE_AUTO || |
| 6011 | brightness_mode == TPACPI_BRGHT_MODE_MAX) { | 5742 | brightness_mode == TPACPI_BRGHT_MODE_MAX) { |
| 6012 | if (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) { | 5743 | if (quirks & TPACPI_BRGHT_Q_EC) |
| 6013 | /* | 5744 | brightness_mode = TPACPI_BRGHT_MODE_ECNVRAM; |
| 6014 | * IBM models that define HBRV probably have | 5745 | else |
| 6015 | * EC-based backlight level control | ||
| 6016 | */ | ||
| 6017 | if (acpi_evalf(ec_handle, NULL, "HBRV", "qd")) | ||
| 6018 | /* T40-T43, R50-R52, R50e, R51e, X31-X41 */ | ||
| 6019 | brightness_mode = TPACPI_BRGHT_MODE_ECNVRAM; | ||
| 6020 | else | ||
| 6021 | /* all other IBM ThinkPads */ | ||
| 6022 | brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; | ||
| 6023 | } else | ||
| 6024 | /* All Lenovo ThinkPads */ | ||
| 6025 | brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; | 5746 | brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; |
| 6026 | 5747 | ||
| 6027 | dbg_printk(TPACPI_DBG_BRGHT, | 5748 | dbg_printk(TPACPI_DBG_BRGHT, |
| 6028 | "selected brightness_mode=%d\n", | 5749 | "driver auto-selected brightness_mode=%d\n", |
| 6029 | brightness_mode); | 5750 | brightness_mode); |
| 6030 | } | 5751 | } |
| 6031 | 5752 | ||
| @@ -6052,6 +5773,15 @@ static int __init brightness_init(struct ibm_init_struct *iibm) | |||
| 6052 | vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, | 5773 | vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, |
| 6053 | "brightness is supported\n"); | 5774 | "brightness is supported\n"); |
| 6054 | 5775 | ||
| 5776 | if (quirks & TPACPI_BRGHT_Q_ASK) { | ||
| 5777 | printk(TPACPI_NOTICE | ||
| 5778 | "brightness: will use unverified default: " | ||
| 5779 | "brightness_mode=%d\n", brightness_mode); | ||
| 5780 | printk(TPACPI_NOTICE | ||
| 5781 | "brightness: please report to %s whether it works well " | ||
| 5782 | "or not on your ThinkPad\n", TPACPI_MAIL); | ||
| 5783 | } | ||
| 5784 | |||
| 6055 | ibm_backlight_device->props.max_brightness = | 5785 | ibm_backlight_device->props.max_brightness = |
| 6056 | (tp_features.bright_16levels)? 15 : 7; | 5786 | (tp_features.bright_16levels)? 15 : 7; |
| 6057 | ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK; | 5787 | ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK; |
| @@ -7854,22 +7584,6 @@ static struct ibm_init_struct ibms_init[] __initdata = { | |||
| 7854 | .init = light_init, | 7584 | .init = light_init, |
| 7855 | .data = &light_driver_data, | 7585 | .data = &light_driver_data, |
| 7856 | }, | 7586 | }, |
| 7857 | #ifdef CONFIG_THINKPAD_ACPI_DOCK | ||
| 7858 | { | ||
| 7859 | .init = dock_init, | ||
| 7860 | .data = &dock_driver_data[0], | ||
| 7861 | }, | ||
| 7862 | { | ||
| 7863 | .init = dock_init2, | ||
| 7864 | .data = &dock_driver_data[1], | ||
| 7865 | }, | ||
| 7866 | #endif | ||
| 7867 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
| 7868 | { | ||
| 7869 | .init = bay_init, | ||
| 7870 | .data = &bay_driver_data, | ||
| 7871 | }, | ||
| 7872 | #endif | ||
| 7873 | { | 7587 | { |
| 7874 | .init = cmos_init, | 7588 | .init = cmos_init, |
| 7875 | .data = &cmos_driver_data, | 7589 | .data = &cmos_driver_data, |
| @@ -7968,12 +7682,6 @@ TPACPI_PARAM(hotkey); | |||
| 7968 | TPACPI_PARAM(bluetooth); | 7682 | TPACPI_PARAM(bluetooth); |
| 7969 | TPACPI_PARAM(video); | 7683 | TPACPI_PARAM(video); |
| 7970 | TPACPI_PARAM(light); | 7684 | TPACPI_PARAM(light); |
| 7971 | #ifdef CONFIG_THINKPAD_ACPI_DOCK | ||
| 7972 | TPACPI_PARAM(dock); | ||
| 7973 | #endif | ||
| 7974 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
| 7975 | TPACPI_PARAM(bay); | ||
| 7976 | #endif /* CONFIG_THINKPAD_ACPI_BAY */ | ||
| 7977 | TPACPI_PARAM(cmos); | 7685 | TPACPI_PARAM(cmos); |
| 7978 | TPACPI_PARAM(led); | 7686 | TPACPI_PARAM(led); |
| 7979 | TPACPI_PARAM(beep); | 7687 | TPACPI_PARAM(beep); |
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 7eda34838bfe..bdbc4f73fcdc 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig | |||
| @@ -43,6 +43,13 @@ config BATTERY_DS2760 | |||
| 43 | help | 43 | help |
| 44 | Say Y here to enable support for batteries with ds2760 chip. | 44 | Say Y here to enable support for batteries with ds2760 chip. |
| 45 | 45 | ||
| 46 | config BATTERY_DS2782 | ||
| 47 | tristate "DS2782 standalone gas-gauge" | ||
| 48 | depends on I2C | ||
| 49 | help | ||
| 50 | Say Y here to enable support for the DS2782 standalone battery | ||
| 51 | gas-gauge. | ||
| 52 | |||
| 46 | config BATTERY_PMU | 53 | config BATTERY_PMU |
| 47 | tristate "Apple PMU battery" | 54 | tristate "Apple PMU battery" |
| 48 | depends on PPC32 && ADB_PMU | 55 | depends on PPC32 && ADB_PMU |
diff --git a/drivers/power/Makefile b/drivers/power/Makefile index daf3179689aa..380d17c9ae29 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile | |||
| @@ -19,6 +19,7 @@ obj-$(CONFIG_APM_POWER) += apm_power.o | |||
| 19 | obj-$(CONFIG_WM8350_POWER) += wm8350_power.o | 19 | obj-$(CONFIG_WM8350_POWER) += wm8350_power.o |
| 20 | 20 | ||
| 21 | obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o | 21 | obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o |
| 22 | obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o | ||
| 22 | obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o | 23 | obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o |
| 23 | obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o | 24 | obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o |
| 24 | obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o | 25 | obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o |
diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c new file mode 100644 index 000000000000..da14f374cb60 --- /dev/null +++ b/drivers/power/ds2782_battery.c | |||
| @@ -0,0 +1,330 @@ | |||
| 1 | /* | ||
| 2 | * I2C client/driver for the Maxim/Dallas DS2782 Stand-Alone Fuel Gauge IC | ||
| 3 | * | ||
| 4 | * Copyright (C) 2009 Bluewater Systems Ltd | ||
| 5 | * | ||
| 6 | * Author: Ryan Mallon <ryan@bluewatersys.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License version 2 as | ||
| 10 | * published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/kernel.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | #include <linux/types.h> | ||
| 17 | #include <linux/errno.h> | ||
| 18 | #include <linux/swab.h> | ||
| 19 | #include <linux/i2c.h> | ||
| 20 | #include <linux/idr.h> | ||
| 21 | #include <linux/power_supply.h> | ||
| 22 | |||
| 23 | #define DS2782_REG_RARC 0x06 /* Remaining active relative capacity */ | ||
| 24 | |||
| 25 | #define DS2782_REG_VOLT_MSB 0x0c | ||
| 26 | #define DS2782_REG_TEMP_MSB 0x0a | ||
| 27 | #define DS2782_REG_CURRENT_MSB 0x0e | ||
| 28 | |||
| 29 | /* EEPROM Block */ | ||
| 30 | #define DS2782_REG_RSNSP 0x69 /* Sense resistor value */ | ||
| 31 | |||
| 32 | /* Current unit measurement in uA for a 1 milli-ohm sense resistor */ | ||
| 33 | #define DS2782_CURRENT_UNITS 1563 | ||
| 34 | |||
| 35 | #define to_ds2782_info(x) container_of(x, struct ds2782_info, battery) | ||
| 36 | |||
| 37 | struct ds2782_info { | ||
| 38 | struct i2c_client *client; | ||
| 39 | struct power_supply battery; | ||
| 40 | int id; | ||
| 41 | }; | ||
| 42 | |||
| 43 | static DEFINE_IDR(battery_id); | ||
| 44 | static DEFINE_MUTEX(battery_lock); | ||
| 45 | |||
| 46 | static inline int ds2782_read_reg(struct ds2782_info *info, int reg, u8 *val) | ||
| 47 | { | ||
| 48 | int ret; | ||
| 49 | |||
| 50 | ret = i2c_smbus_read_byte_data(info->client, reg); | ||
| 51 | if (ret < 0) { | ||
| 52 | dev_err(&info->client->dev, "register read failed\n"); | ||
| 53 | return ret; | ||
| 54 | } | ||
| 55 | |||
| 56 | *val = ret; | ||
| 57 | return 0; | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline int ds2782_read_reg16(struct ds2782_info *info, int reg_msb, | ||
| 61 | s16 *val) | ||
| 62 | { | ||
| 63 | int ret; | ||
| 64 | |||
| 65 | ret = swab16(i2c_smbus_read_word_data(info->client, reg_msb)); | ||
| 66 | if (ret < 0) { | ||
| 67 | dev_err(&info->client->dev, "register read failed\n"); | ||
| 68 | return ret; | ||
| 69 | } | ||
| 70 | |||
| 71 | *val = ret; | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | static int ds2782_get_temp(struct ds2782_info *info, int *temp) | ||
| 76 | { | ||
| 77 | s16 raw; | ||
| 78 | int err; | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Temperature is measured in units of 0.125 degrees celcius, the | ||
| 82 | * power_supply class measures temperature in tenths of degrees | ||
| 83 | * celsius. The temperature value is stored as a 10 bit number, plus | ||
| 84 | * sign in the upper bits of a 16 bit register. | ||
| 85 | */ | ||
| 86 | err = ds2782_read_reg16(info, DS2782_REG_TEMP_MSB, &raw); | ||
| 87 | if (err) | ||
| 88 | return err; | ||
| 89 | *temp = ((raw / 32) * 125) / 100; | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | static int ds2782_get_current(struct ds2782_info *info, int *current_uA) | ||
| 94 | { | ||
| 95 | int sense_res; | ||
| 96 | int err; | ||
| 97 | u8 sense_res_raw; | ||
| 98 | s16 raw; | ||
| 99 | |||
| 100 | /* | ||
| 101 | * The units of measurement for current are dependent on the value of | ||
| 102 | * the sense resistor. | ||
| 103 | */ | ||
| 104 | err = ds2782_read_reg(info, DS2782_REG_RSNSP, &sense_res_raw); | ||
| 105 | if (err) | ||
| 106 | return err; | ||
| 107 | if (sense_res_raw == 0) { | ||
| 108 | dev_err(&info->client->dev, "sense resistor value is 0\n"); | ||
| 109 | return -ENXIO; | ||
| 110 | } | ||
| 111 | sense_res = 1000 / sense_res_raw; | ||
| 112 | |||
| 113 | dev_dbg(&info->client->dev, "sense resistor = %d milli-ohms\n", | ||
| 114 | sense_res); | ||
| 115 | err = ds2782_read_reg16(info, DS2782_REG_CURRENT_MSB, &raw); | ||
| 116 | if (err) | ||
| 117 | return err; | ||
| 118 | *current_uA = raw * (DS2782_CURRENT_UNITS / sense_res); | ||
| 119 | return 0; | ||
| 120 | } | ||
| 121 | |||
| 122 | static int ds2782_get_voltage(struct ds2782_info *info, int *voltage_uA) | ||
| 123 | { | ||
| 124 | s16 raw; | ||
| 125 | int err; | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Voltage is measured in units of 4.88mV. The voltage is stored as | ||
| 129 | * a 10-bit number plus sign, in the upper bits of a 16-bit register | ||
| 130 | */ | ||
| 131 | err = ds2782_read_reg16(info, DS2782_REG_VOLT_MSB, &raw); | ||
| 132 | if (err) | ||
| 133 | return err; | ||
| 134 | *voltage_uA = (raw / 32) * 4800; | ||
| 135 | return 0; | ||
| 136 | } | ||
| 137 | |||
| 138 | static int ds2782_get_capacity(struct ds2782_info *info, int *capacity) | ||
| 139 | { | ||
| 140 | int err; | ||
| 141 | u8 raw; | ||
| 142 | |||
| 143 | err = ds2782_read_reg(info, DS2782_REG_RARC, &raw); | ||
| 144 | if (err) | ||
| 145 | return err; | ||
| 146 | *capacity = raw; | ||
| 147 | return raw; | ||
| 148 | } | ||
| 149 | |||
| 150 | static int ds2782_get_status(struct ds2782_info *info, int *status) | ||
| 151 | { | ||
| 152 | int err; | ||
| 153 | int current_uA; | ||
| 154 | int capacity; | ||
| 155 | |||
| 156 | err = ds2782_get_current(info, ¤t_uA); | ||
| 157 | if (err) | ||
| 158 | return err; | ||
| 159 | |||
| 160 | err = ds2782_get_capacity(info, &capacity); | ||
| 161 | if (err) | ||
| 162 | return err; | ||
| 163 | |||
| 164 | if (capacity == 100) | ||
| 165 | *status = POWER_SUPPLY_STATUS_FULL; | ||
| 166 | else if (current_uA == 0) | ||
| 167 | *status = POWER_SUPPLY_STATUS_NOT_CHARGING; | ||
| 168 | else if (current_uA < 0) | ||
| 169 | *status = POWER_SUPPLY_STATUS_DISCHARGING; | ||
| 170 | else | ||
| 171 | *status = POWER_SUPPLY_STATUS_CHARGING; | ||
| 172 | |||
| 173 | return 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | static int ds2782_battery_get_property(struct power_supply *psy, | ||
| 177 | enum power_supply_property prop, | ||
| 178 | union power_supply_propval *val) | ||
| 179 | { | ||
| 180 | struct ds2782_info *info = to_ds2782_info(psy); | ||
| 181 | int ret; | ||
| 182 | |||
| 183 | switch (prop) { | ||
| 184 | case POWER_SUPPLY_PROP_STATUS: | ||
| 185 | ret = ds2782_get_status(info, &val->intval); | ||
| 186 | break; | ||
| 187 | |||
| 188 | case POWER_SUPPLY_PROP_CAPACITY: | ||
| 189 | ret = ds2782_get_capacity(info, &val->intval); | ||
| 190 | break; | ||
| 191 | |||
| 192 | case POWER_SUPPLY_PROP_VOLTAGE_NOW: | ||
| 193 | ret = ds2782_get_voltage(info, &val->intval); | ||
| 194 | break; | ||
| 195 | |||
| 196 | case POWER_SUPPLY_PROP_CURRENT_NOW: | ||
| 197 | ret = ds2782_get_current(info, &val->intval); | ||
| 198 | break; | ||
| 199 | |||
| 200 | case POWER_SUPPLY_PROP_TEMP: | ||
| 201 | ret = ds2782_get_temp(info, &val->intval); | ||
| 202 | break; | ||
| 203 | |||
| 204 | default: | ||
| 205 | ret = -EINVAL; | ||
| 206 | } | ||
| 207 | |||
| 208 | return ret; | ||
| 209 | } | ||
| 210 | |||
| 211 | static enum power_supply_property ds2782_battery_props[] = { | ||
| 212 | POWER_SUPPLY_PROP_STATUS, | ||
| 213 | POWER_SUPPLY_PROP_CAPACITY, | ||
| 214 | POWER_SUPPLY_PROP_VOLTAGE_NOW, | ||
| 215 | POWER_SUPPLY_PROP_CURRENT_NOW, | ||
| 216 | POWER_SUPPLY_PROP_TEMP, | ||
| 217 | }; | ||
| 218 | |||
| 219 | static void ds2782_power_supply_init(struct power_supply *battery) | ||
| 220 | { | ||
| 221 | battery->type = POWER_SUPPLY_TYPE_BATTERY; | ||
| 222 | battery->properties = ds2782_battery_props; | ||
| 223 | battery->num_properties = ARRAY_SIZE(ds2782_battery_props); | ||
| 224 | battery->get_property = ds2782_battery_get_property; | ||
| 225 | battery->external_power_changed = NULL; | ||
| 226 | } | ||
| 227 | |||
| 228 | static int ds2782_battery_remove(struct i2c_client *client) | ||
| 229 | { | ||
| 230 | struct ds2782_info *info = i2c_get_clientdata(client); | ||
| 231 | |||
| 232 | power_supply_unregister(&info->battery); | ||
| 233 | kfree(info->battery.name); | ||
| 234 | |||
| 235 | mutex_lock(&battery_lock); | ||
| 236 | idr_remove(&battery_id, info->id); | ||
| 237 | mutex_unlock(&battery_lock); | ||
| 238 | |||
| 239 | i2c_set_clientdata(client, info); | ||
| 240 | |||
| 241 | kfree(info); | ||
| 242 | return 0; | ||
| 243 | } | ||
| 244 | |||
| 245 | static int ds2782_battery_probe(struct i2c_client *client, | ||
| 246 | const struct i2c_device_id *id) | ||
| 247 | { | ||
| 248 | struct ds2782_info *info; | ||
| 249 | int ret; | ||
| 250 | int num; | ||
| 251 | |||
| 252 | /* Get an ID for this battery */ | ||
| 253 | ret = idr_pre_get(&battery_id, GFP_KERNEL); | ||
| 254 | if (ret == 0) { | ||
| 255 | ret = -ENOMEM; | ||
| 256 | goto fail_id; | ||
| 257 | } | ||
| 258 | |||
| 259 | mutex_lock(&battery_lock); | ||
| 260 | ret = idr_get_new(&battery_id, client, &num); | ||
| 261 | mutex_unlock(&battery_lock); | ||
| 262 | if (ret < 0) | ||
| 263 | goto fail_id; | ||
| 264 | |||
| 265 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
| 266 | if (!info) { | ||
| 267 | ret = -ENOMEM; | ||
| 268 | goto fail_info; | ||
| 269 | } | ||
| 270 | |||
| 271 | info->battery.name = kasprintf(GFP_KERNEL, "ds2782-%d", num); | ||
| 272 | if (!info->battery.name) { | ||
| 273 | ret = -ENOMEM; | ||
| 274 | goto fail_name; | ||
| 275 | } | ||
| 276 | |||
| 277 | i2c_set_clientdata(client, info); | ||
| 278 | info->client = client; | ||
| 279 | ds2782_power_supply_init(&info->battery); | ||
| 280 | |||
| 281 | ret = power_supply_register(&client->dev, &info->battery); | ||
| 282 | if (ret) { | ||
| 283 | dev_err(&client->dev, "failed to register battery\n"); | ||
| 284 | goto fail_register; | ||
| 285 | } | ||
| 286 | |||
| 287 | return 0; | ||
| 288 | |||
| 289 | fail_register: | ||
| 290 | kfree(info->battery.name); | ||
| 291 | fail_name: | ||
| 292 | i2c_set_clientdata(client, info); | ||
| 293 | kfree(info); | ||
| 294 | fail_info: | ||
| 295 | mutex_lock(&battery_lock); | ||
| 296 | idr_remove(&battery_id, num); | ||
| 297 | mutex_unlock(&battery_lock); | ||
| 298 | fail_id: | ||
| 299 | return ret; | ||
| 300 | } | ||
| 301 | |||
| 302 | static const struct i2c_device_id ds2782_id[] = { | ||
| 303 | {"ds2782", 0}, | ||
| 304 | {}, | ||
| 305 | }; | ||
| 306 | |||
| 307 | static struct i2c_driver ds2782_battery_driver = { | ||
| 308 | .driver = { | ||
| 309 | .name = "ds2782-battery", | ||
| 310 | }, | ||
| 311 | .probe = ds2782_battery_probe, | ||
| 312 | .remove = ds2782_battery_remove, | ||
| 313 | .id_table = ds2782_id, | ||
| 314 | }; | ||
| 315 | |||
| 316 | static int __init ds2782_init(void) | ||
| 317 | { | ||
| 318 | return i2c_add_driver(&ds2782_battery_driver); | ||
| 319 | } | ||
| 320 | module_init(ds2782_init); | ||
| 321 | |||
| 322 | static void __exit ds2782_exit(void) | ||
| 323 | { | ||
| 324 | i2c_del_driver(&ds2782_battery_driver); | ||
| 325 | } | ||
| 326 | module_exit(ds2782_exit); | ||
| 327 | |||
| 328 | MODULE_AUTHOR("Ryan Mallon <ryan@bluewatersys.com>"); | ||
| 329 | MODULE_DESCRIPTION("Maxim/Dallas DS2782 Stand-Alone Fuel Gauage IC driver"); | ||
| 330 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 5fbca2681baa..58e419299cd6 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | #include <linux/kernel.h> | ||
| 11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 12 | #include <linux/err.h> | 13 | #include <linux/err.h> |
| 13 | #include <linux/platform_device.h> | 14 | #include <linux/platform_device.h> |
| @@ -35,6 +36,7 @@ | |||
| 35 | #define BAT_STAT_AC 0x10 | 36 | #define BAT_STAT_AC 0x10 |
| 36 | #define BAT_STAT_CHARGING 0x20 | 37 | #define BAT_STAT_CHARGING 0x20 |
| 37 | #define BAT_STAT_DISCHARGING 0x40 | 38 | #define BAT_STAT_DISCHARGING 0x40 |
| 39 | #define BAT_STAT_TRICKLE 0x80 | ||
| 38 | 40 | ||
| 39 | #define BAT_ERR_INFOFAIL 0x02 | 41 | #define BAT_ERR_INFOFAIL 0x02 |
| 40 | #define BAT_ERR_OVERVOLTAGE 0x04 | 42 | #define BAT_ERR_OVERVOLTAGE 0x04 |
| @@ -89,7 +91,7 @@ static char bat_serial[17]; /* Ick */ | |||
| 89 | static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte) | 91 | static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte) |
| 90 | { | 92 | { |
| 91 | if (olpc_platform_info.ecver > 0x44) { | 93 | if (olpc_platform_info.ecver > 0x44) { |
| 92 | if (ec_byte & BAT_STAT_CHARGING) | 94 | if (ec_byte & (BAT_STAT_CHARGING | BAT_STAT_TRICKLE)) |
| 93 | val->intval = POWER_SUPPLY_STATUS_CHARGING; | 95 | val->intval = POWER_SUPPLY_STATUS_CHARGING; |
| 94 | else if (ec_byte & BAT_STAT_DISCHARGING) | 96 | else if (ec_byte & BAT_STAT_DISCHARGING) |
| 95 | val->intval = POWER_SUPPLY_STATUS_DISCHARGING; | 97 | val->intval = POWER_SUPPLY_STATUS_DISCHARGING; |
| @@ -219,7 +221,8 @@ static int olpc_bat_get_property(struct power_supply *psy, | |||
| 219 | It doesn't matter though -- the EC will return the last-known | 221 | It doesn't matter though -- the EC will return the last-known |
| 220 | information, and it's as if we just ran that _little_ bit faster | 222 | information, and it's as if we just ran that _little_ bit faster |
| 221 | and managed to read it out before the battery went away. */ | 223 | and managed to read it out before the battery went away. */ |
| 222 | if (!(ec_byte & BAT_STAT_PRESENT) && psp != POWER_SUPPLY_PROP_PRESENT) | 224 | if (!(ec_byte & (BAT_STAT_PRESENT | BAT_STAT_TRICKLE)) && |
| 225 | psp != POWER_SUPPLY_PROP_PRESENT) | ||
| 223 | return -ENODEV; | 226 | return -ENODEV; |
| 224 | 227 | ||
| 225 | switch (psp) { | 228 | switch (psp) { |
| @@ -229,7 +232,8 @@ static int olpc_bat_get_property(struct power_supply *psy, | |||
| 229 | return ret; | 232 | return ret; |
| 230 | break; | 233 | break; |
| 231 | case POWER_SUPPLY_PROP_PRESENT: | 234 | case POWER_SUPPLY_PROP_PRESENT: |
| 232 | val->intval = !!(ec_byte & BAT_STAT_PRESENT); | 235 | val->intval = !!(ec_byte & (BAT_STAT_PRESENT | |
| 236 | BAT_STAT_TRICKLE)); | ||
| 233 | break; | 237 | break; |
| 234 | 238 | ||
| 235 | case POWER_SUPPLY_PROP_HEALTH: | 239 | case POWER_SUPPLY_PROP_HEALTH: |
| @@ -334,21 +338,21 @@ static ssize_t olpc_bat_eeprom_read(struct kobject *kobj, | |||
| 334 | struct bin_attribute *attr, char *buf, loff_t off, size_t count) | 338 | struct bin_attribute *attr, char *buf, loff_t off, size_t count) |
| 335 | { | 339 | { |
| 336 | uint8_t ec_byte; | 340 | uint8_t ec_byte; |
| 337 | int ret, end; | 341 | int ret; |
| 342 | int i; | ||
| 338 | 343 | ||
| 339 | if (off >= EEPROM_SIZE) | 344 | if (off >= EEPROM_SIZE) |
| 340 | return 0; | 345 | return 0; |
| 341 | if (off + count > EEPROM_SIZE) | 346 | if (off + count > EEPROM_SIZE) |
| 342 | count = EEPROM_SIZE - off; | 347 | count = EEPROM_SIZE - off; |
| 343 | 348 | ||
| 344 | end = EEPROM_START + off + count; | 349 | for (i = 0; i < count; i++) { |
| 345 | for (ec_byte = EEPROM_START + off; ec_byte < end; ec_byte++) { | 350 | ec_byte = EEPROM_START + off + i; |
| 346 | ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, | 351 | ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &buf[i], 1); |
| 347 | &buf[ec_byte - EEPROM_START], 1); | ||
| 348 | if (ret) { | 352 | if (ret) { |
| 349 | printk(KERN_ERR "olpc-battery: EC command " | 353 | pr_err("olpc-battery: " |
| 350 | "EC_BAT_EEPROM @ 0x%x failed -" | 354 | "EC_BAT_EEPROM cmd @ 0x%x failed - %d!\n", |
| 351 | " %d!\n", ec_byte, ret); | 355 | ec_byte, ret); |
| 352 | return -EIO; | 356 | return -EIO; |
| 353 | } | 357 | } |
| 354 | } | 358 | } |
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 8030e25152fb..c75d6f35cb5f 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c | |||
| @@ -553,40 +553,35 @@ static void _zfcp_erp_unit_reopen_all(struct zfcp_port *port, int clear, | |||
| 553 | _zfcp_erp_unit_reopen(unit, clear, id, ref); | 553 | _zfcp_erp_unit_reopen(unit, clear, id, ref); |
| 554 | } | 554 | } |
| 555 | 555 | ||
| 556 | static void zfcp_erp_strategy_followup_actions(struct zfcp_erp_action *act) | 556 | static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) |
| 557 | { | 557 | { |
| 558 | struct zfcp_adapter *adapter = act->adapter; | ||
| 559 | struct zfcp_port *port = act->port; | ||
| 560 | struct zfcp_unit *unit = act->unit; | ||
| 561 | u32 status = act->status; | ||
| 562 | |||
| 563 | /* initiate follow-up actions depending on success of finished action */ | ||
| 564 | switch (act->action) { | 558 | switch (act->action) { |
| 565 | |||
| 566 | case ZFCP_ERP_ACTION_REOPEN_ADAPTER: | 559 | case ZFCP_ERP_ACTION_REOPEN_ADAPTER: |
| 567 | if (status == ZFCP_ERP_SUCCEEDED) | 560 | _zfcp_erp_adapter_reopen(act->adapter, 0, "ersff_1", NULL); |
| 568 | _zfcp_erp_port_reopen_all(adapter, 0, "ersfa_1", NULL); | ||
| 569 | else | ||
| 570 | _zfcp_erp_adapter_reopen(adapter, 0, "ersfa_2", NULL); | ||
| 571 | break; | 561 | break; |
| 572 | |||
| 573 | case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: | 562 | case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: |
| 574 | if (status == ZFCP_ERP_SUCCEEDED) | 563 | _zfcp_erp_port_forced_reopen(act->port, 0, "ersff_2", NULL); |
| 575 | _zfcp_erp_port_reopen(port, 0, "ersfa_3", NULL); | ||
| 576 | else | ||
| 577 | _zfcp_erp_adapter_reopen(adapter, 0, "ersfa_4", NULL); | ||
| 578 | break; | 564 | break; |
| 579 | |||
| 580 | case ZFCP_ERP_ACTION_REOPEN_PORT: | 565 | case ZFCP_ERP_ACTION_REOPEN_PORT: |
| 581 | if (status == ZFCP_ERP_SUCCEEDED) | 566 | _zfcp_erp_port_reopen(act->port, 0, "ersff_3", NULL); |
| 582 | _zfcp_erp_unit_reopen_all(port, 0, "ersfa_5", NULL); | ||
| 583 | else | ||
| 584 | _zfcp_erp_port_forced_reopen(port, 0, "ersfa_6", NULL); | ||
| 585 | break; | 567 | break; |
| 586 | |||
| 587 | case ZFCP_ERP_ACTION_REOPEN_UNIT: | 568 | case ZFCP_ERP_ACTION_REOPEN_UNIT: |
| 588 | if (status != ZFCP_ERP_SUCCEEDED) | 569 | _zfcp_erp_unit_reopen(act->unit, 0, "ersff_4", NULL); |
| 589 | _zfcp_erp_port_reopen(unit->port, 0, "ersfa_7", NULL); | 570 | break; |
| 571 | } | ||
| 572 | } | ||
| 573 | |||
| 574 | static void zfcp_erp_strategy_followup_success(struct zfcp_erp_action *act) | ||
| 575 | { | ||
| 576 | switch (act->action) { | ||
| 577 | case ZFCP_ERP_ACTION_REOPEN_ADAPTER: | ||
| 578 | _zfcp_erp_port_reopen_all(act->adapter, 0, "ersfs_1", NULL); | ||
| 579 | break; | ||
| 580 | case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: | ||
| 581 | _zfcp_erp_port_reopen(act->port, 0, "ersfs_2", NULL); | ||
| 582 | break; | ||
| 583 | case ZFCP_ERP_ACTION_REOPEN_PORT: | ||
| 584 | _zfcp_erp_unit_reopen_all(act->port, 0, "ersfs_3", NULL); | ||
| 590 | break; | 585 | break; |
| 591 | } | 586 | } |
| 592 | } | 587 | } |
| @@ -801,7 +796,7 @@ static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action) | |||
| 801 | return ZFCP_ERP_FAILED; | 796 | return ZFCP_ERP_FAILED; |
| 802 | 797 | ||
| 803 | case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: | 798 | case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: |
| 804 | if (status & ZFCP_STATUS_PORT_PHYS_OPEN) | 799 | if (!(status & ZFCP_STATUS_PORT_PHYS_OPEN)) |
| 805 | return ZFCP_ERP_SUCCEEDED; | 800 | return ZFCP_ERP_SUCCEEDED; |
| 806 | } | 801 | } |
| 807 | return ZFCP_ERP_FAILED; | 802 | return ZFCP_ERP_FAILED; |
| @@ -853,11 +848,17 @@ void zfcp_erp_port_strategy_open_lookup(struct work_struct *work) | |||
| 853 | gid_pn_work); | 848 | gid_pn_work); |
| 854 | 849 | ||
| 855 | retval = zfcp_fc_ns_gid_pn(&port->erp_action); | 850 | retval = zfcp_fc_ns_gid_pn(&port->erp_action); |
| 856 | if (retval == -ENOMEM) | 851 | if (!retval) { |
| 857 | zfcp_erp_notify(&port->erp_action, ZFCP_ERP_NOMEM); | 852 | port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP; |
| 858 | port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP; | 853 | goto out; |
| 859 | if (retval) | 854 | } |
| 860 | zfcp_erp_notify(&port->erp_action, ZFCP_ERP_FAILED); | 855 | if (retval == -ENOMEM) { |
| 856 | zfcp_erp_notify(&port->erp_action, ZFCP_STATUS_ERP_LOWMEM); | ||
| 857 | goto out; | ||
| 858 | } | ||
| 859 | /* all other error condtions */ | ||
| 860 | zfcp_erp_notify(&port->erp_action, 0); | ||
| 861 | out: | ||
| 861 | zfcp_port_put(port); | 862 | zfcp_port_put(port); |
| 862 | } | 863 | } |
| 863 | 864 | ||
| @@ -1289,7 +1290,10 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) | |||
| 1289 | retval = zfcp_erp_strategy_statechange(erp_action, retval); | 1290 | retval = zfcp_erp_strategy_statechange(erp_action, retval); |
| 1290 | if (retval == ZFCP_ERP_EXIT) | 1291 | if (retval == ZFCP_ERP_EXIT) |
| 1291 | goto unlock; | 1292 | goto unlock; |
| 1292 | zfcp_erp_strategy_followup_actions(erp_action); | 1293 | if (retval == ZFCP_ERP_SUCCEEDED) |
| 1294 | zfcp_erp_strategy_followup_success(erp_action); | ||
| 1295 | if (retval == ZFCP_ERP_FAILED) | ||
| 1296 | zfcp_erp_strategy_followup_failed(erp_action); | ||
| 1293 | 1297 | ||
| 1294 | unlock: | 1298 | unlock: |
| 1295 | write_unlock(&adapter->erp_lock); | 1299 | write_unlock(&adapter->erp_lock); |
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 2f0705d76b72..47daebfa7e59 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c | |||
| @@ -79,11 +79,9 @@ static int zfcp_wka_port_get(struct zfcp_wka_port *wka_port) | |||
| 79 | 79 | ||
| 80 | mutex_unlock(&wka_port->mutex); | 80 | mutex_unlock(&wka_port->mutex); |
| 81 | 81 | ||
| 82 | wait_event_timeout( | 82 | wait_event(wka_port->completion_wq, |
| 83 | wka_port->completion_wq, | 83 | wka_port->status == ZFCP_WKA_PORT_ONLINE || |
| 84 | wka_port->status == ZFCP_WKA_PORT_ONLINE || | 84 | wka_port->status == ZFCP_WKA_PORT_OFFLINE); |
| 85 | wka_port->status == ZFCP_WKA_PORT_OFFLINE, | ||
| 86 | HZ >> 1); | ||
| 87 | 85 | ||
| 88 | if (wka_port->status == ZFCP_WKA_PORT_ONLINE) { | 86 | if (wka_port->status == ZFCP_WKA_PORT_ONLINE) { |
| 89 | atomic_inc(&wka_port->refcount); | 87 | atomic_inc(&wka_port->refcount); |
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c57658f3d34f..47795fbf081f 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c | |||
| @@ -670,8 +670,11 @@ static int zfcp_fsf_req_sbal_get(struct zfcp_adapter *adapter) | |||
| 670 | zfcp_fsf_sbal_check(adapter), 5 * HZ); | 670 | zfcp_fsf_sbal_check(adapter), 5 * HZ); |
| 671 | if (ret > 0) | 671 | if (ret > 0) |
| 672 | return 0; | 672 | return 0; |
| 673 | if (!ret) | 673 | if (!ret) { |
| 674 | atomic_inc(&adapter->qdio_outb_full); | 674 | atomic_inc(&adapter->qdio_outb_full); |
| 675 | /* assume hanging outbound queue, try queue recovery */ | ||
| 676 | zfcp_erp_adapter_reopen(adapter, 0, "fsrsg_1", NULL); | ||
| 677 | } | ||
| 675 | 678 | ||
| 676 | spin_lock_bh(&adapter->req_q_lock); | 679 | spin_lock_bh(&adapter->req_q_lock); |
| 677 | return -EIO; | 680 | return -EIO; |
| @@ -722,7 +725,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_adapter *adapter, | |||
| 722 | req = zfcp_fsf_alloc_qtcb(pool); | 725 | req = zfcp_fsf_alloc_qtcb(pool); |
| 723 | 726 | ||
| 724 | if (unlikely(!req)) | 727 | if (unlikely(!req)) |
| 725 | return ERR_PTR(-EIO); | 728 | return ERR_PTR(-ENOMEM); |
| 726 | 729 | ||
| 727 | if (adapter->req_no == 0) | 730 | if (adapter->req_no == 0) |
| 728 | adapter->req_no++; | 731 | adapter->req_no++; |
| @@ -1010,6 +1013,23 @@ skip_fsfstatus: | |||
| 1010 | send_ct->handler(send_ct->handler_data); | 1013 | send_ct->handler(send_ct->handler_data); |
| 1011 | } | 1014 | } |
| 1012 | 1015 | ||
| 1016 | static void zfcp_fsf_setup_ct_els_unchained(struct qdio_buffer_element *sbale, | ||
| 1017 | struct scatterlist *sg_req, | ||
| 1018 | struct scatterlist *sg_resp) | ||
| 1019 | { | ||
| 1020 | sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE_READ; | ||
| 1021 | sbale[2].addr = sg_virt(sg_req); | ||
| 1022 | sbale[2].length = sg_req->length; | ||
| 1023 | sbale[3].addr = sg_virt(sg_resp); | ||
| 1024 | sbale[3].length = sg_resp->length; | ||
| 1025 | sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | static int zfcp_fsf_one_sbal(struct scatterlist *sg) | ||
| 1029 | { | ||
| 1030 | return sg_is_last(sg) && sg->length <= PAGE_SIZE; | ||
| 1031 | } | ||
| 1032 | |||
| 1013 | static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, | 1033 | static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, |
| 1014 | struct scatterlist *sg_req, | 1034 | struct scatterlist *sg_req, |
| 1015 | struct scatterlist *sg_resp, | 1035 | struct scatterlist *sg_resp, |
| @@ -1020,30 +1040,30 @@ static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, | |||
| 1020 | int bytes; | 1040 | int bytes; |
| 1021 | 1041 | ||
| 1022 | if (!(feat & FSF_FEATURE_ELS_CT_CHAINED_SBALS)) { | 1042 | if (!(feat & FSF_FEATURE_ELS_CT_CHAINED_SBALS)) { |
| 1023 | if (sg_req->length > PAGE_SIZE || sg_resp->length > PAGE_SIZE || | 1043 | if (!zfcp_fsf_one_sbal(sg_req) || !zfcp_fsf_one_sbal(sg_resp)) |
| 1024 | !sg_is_last(sg_req) || !sg_is_last(sg_resp)) | ||
| 1025 | return -EOPNOTSUPP; | 1044 | return -EOPNOTSUPP; |
| 1026 | 1045 | ||
| 1027 | sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE_READ; | 1046 | zfcp_fsf_setup_ct_els_unchained(sbale, sg_req, sg_resp); |
| 1028 | sbale[2].addr = sg_virt(sg_req); | 1047 | return 0; |
| 1029 | sbale[2].length = sg_req->length; | 1048 | } |
| 1030 | sbale[3].addr = sg_virt(sg_resp); | 1049 | |
| 1031 | sbale[3].length = sg_resp->length; | 1050 | /* use single, unchained SBAL if it can hold the request */ |
| 1032 | sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; | 1051 | if (zfcp_fsf_one_sbal(sg_req) && zfcp_fsf_one_sbal(sg_resp)) { |
| 1052 | zfcp_fsf_setup_ct_els_unchained(sbale, sg_req, sg_resp); | ||
| 1033 | return 0; | 1053 | return 0; |
| 1034 | } | 1054 | } |
| 1035 | 1055 | ||
| 1036 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, | 1056 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, |
| 1037 | sg_req, max_sbals); | 1057 | sg_req, max_sbals); |
| 1038 | if (bytes <= 0) | 1058 | if (bytes <= 0) |
| 1039 | return -ENOMEM; | 1059 | return -EIO; |
| 1040 | req->qtcb->bottom.support.req_buf_length = bytes; | 1060 | req->qtcb->bottom.support.req_buf_length = bytes; |
| 1041 | req->sbale_curr = ZFCP_LAST_SBALE_PER_SBAL; | 1061 | req->sbale_curr = ZFCP_LAST_SBALE_PER_SBAL; |
| 1042 | 1062 | ||
| 1043 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, | 1063 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, |
| 1044 | sg_resp, max_sbals); | 1064 | sg_resp, max_sbals); |
| 1045 | if (bytes <= 0) | 1065 | if (bytes <= 0) |
| 1046 | return -ENOMEM; | 1066 | return -EIO; |
| 1047 | req->qtcb->bottom.support.resp_buf_length = bytes; | 1067 | req->qtcb->bottom.support.resp_buf_length = bytes; |
| 1048 | 1068 | ||
| 1049 | return 0; | 1069 | return 0; |
| @@ -1607,10 +1627,10 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req) | |||
| 1607 | case FSF_ACCESS_DENIED: | 1627 | case FSF_ACCESS_DENIED: |
| 1608 | wka_port->status = ZFCP_WKA_PORT_OFFLINE; | 1628 | wka_port->status = ZFCP_WKA_PORT_OFFLINE; |
| 1609 | break; | 1629 | break; |
| 1610 | case FSF_PORT_ALREADY_OPEN: | ||
| 1611 | break; | ||
| 1612 | case FSF_GOOD: | 1630 | case FSF_GOOD: |
| 1613 | wka_port->handle = header->port_handle; | 1631 | wka_port->handle = header->port_handle; |
| 1632 | /* fall through */ | ||
| 1633 | case FSF_PORT_ALREADY_OPEN: | ||
| 1614 | wka_port->status = ZFCP_WKA_PORT_ONLINE; | 1634 | wka_port->status = ZFCP_WKA_PORT_ONLINE; |
| 1615 | } | 1635 | } |
| 1616 | out: | 1636 | out: |
| @@ -1731,15 +1751,16 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req) | |||
| 1731 | zfcp_fsf_access_denied_port(req, port); | 1751 | zfcp_fsf_access_denied_port(req, port); |
| 1732 | break; | 1752 | break; |
| 1733 | case FSF_PORT_BOXED: | 1753 | case FSF_PORT_BOXED: |
| 1734 | zfcp_erp_port_boxed(port, "fscpph2", req); | ||
| 1735 | req->status |= ZFCP_STATUS_FSFREQ_ERROR | | ||
| 1736 | ZFCP_STATUS_FSFREQ_RETRY; | ||
| 1737 | /* can't use generic zfcp_erp_modify_port_status because | 1754 | /* can't use generic zfcp_erp_modify_port_status because |
| 1738 | * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */ | 1755 | * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */ |
| 1739 | atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status); | 1756 | atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status); |
| 1740 | list_for_each_entry(unit, &port->unit_list_head, list) | 1757 | list_for_each_entry(unit, &port->unit_list_head, list) |
| 1741 | atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, | 1758 | atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, |
| 1742 | &unit->status); | 1759 | &unit->status); |
| 1760 | zfcp_erp_port_boxed(port, "fscpph2", req); | ||
| 1761 | req->status |= ZFCP_STATUS_FSFREQ_ERROR | | ||
| 1762 | ZFCP_STATUS_FSFREQ_RETRY; | ||
| 1763 | |||
| 1743 | break; | 1764 | break; |
| 1744 | case FSF_ADAPTER_STATUS_AVAILABLE: | 1765 | case FSF_ADAPTER_STATUS_AVAILABLE: |
| 1745 | switch (header->fsf_status_qual.word[0]) { | 1766 | switch (header->fsf_status_qual.word[0]) { |
| @@ -2541,7 +2562,6 @@ struct zfcp_fsf_req *zfcp_fsf_control_file(struct zfcp_adapter *adapter, | |||
| 2541 | bytes = zfcp_qdio_sbals_from_sg(req, direction, fsf_cfdc->sg, | 2562 | bytes = zfcp_qdio_sbals_from_sg(req, direction, fsf_cfdc->sg, |
| 2542 | FSF_MAX_SBALS_PER_REQ); | 2563 | FSF_MAX_SBALS_PER_REQ); |
| 2543 | if (bytes != ZFCP_CFDC_MAX_SIZE) { | 2564 | if (bytes != ZFCP_CFDC_MAX_SIZE) { |
| 2544 | retval = -ENOMEM; | ||
| 2545 | zfcp_fsf_req_free(req); | 2565 | zfcp_fsf_req_free(req); |
| 2546 | goto out; | 2566 | goto out; |
| 2547 | } | 2567 | } |
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 967ede73f4c5..6925a1784682 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c | |||
| @@ -167,20 +167,21 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
| 167 | struct zfcp_unit *unit = scpnt->device->hostdata; | 167 | struct zfcp_unit *unit = scpnt->device->hostdata; |
| 168 | struct zfcp_fsf_req *old_req, *abrt_req; | 168 | struct zfcp_fsf_req *old_req, *abrt_req; |
| 169 | unsigned long flags; | 169 | unsigned long flags; |
| 170 | unsigned long old_req_id = (unsigned long) scpnt->host_scribble; | 170 | unsigned long old_reqid = (unsigned long) scpnt->host_scribble; |
| 171 | int retval = SUCCESS; | 171 | int retval = SUCCESS; |
| 172 | int retry = 3; | 172 | int retry = 3; |
| 173 | char *dbf_tag; | ||
| 173 | 174 | ||
| 174 | /* avoid race condition between late normal completion and abort */ | 175 | /* avoid race condition between late normal completion and abort */ |
| 175 | write_lock_irqsave(&adapter->abort_lock, flags); | 176 | write_lock_irqsave(&adapter->abort_lock, flags); |
| 176 | 177 | ||
| 177 | spin_lock(&adapter->req_list_lock); | 178 | spin_lock(&adapter->req_list_lock); |
| 178 | old_req = zfcp_reqlist_find(adapter, old_req_id); | 179 | old_req = zfcp_reqlist_find(adapter, old_reqid); |
| 179 | spin_unlock(&adapter->req_list_lock); | 180 | spin_unlock(&adapter->req_list_lock); |
| 180 | if (!old_req) { | 181 | if (!old_req) { |
| 181 | write_unlock_irqrestore(&adapter->abort_lock, flags); | 182 | write_unlock_irqrestore(&adapter->abort_lock, flags); |
| 182 | zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, | 183 | zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, |
| 183 | old_req_id); | 184 | old_reqid); |
| 184 | return FAILED; /* completion could be in progress */ | 185 | return FAILED; /* completion could be in progress */ |
| 185 | } | 186 | } |
| 186 | old_req->data = NULL; | 187 | old_req->data = NULL; |
| @@ -189,7 +190,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
| 189 | write_unlock_irqrestore(&adapter->abort_lock, flags); | 190 | write_unlock_irqrestore(&adapter->abort_lock, flags); |
| 190 | 191 | ||
| 191 | while (retry--) { | 192 | while (retry--) { |
| 192 | abrt_req = zfcp_fsf_abort_fcp_command(old_req_id, unit); | 193 | abrt_req = zfcp_fsf_abort_fcp_command(old_reqid, unit); |
| 193 | if (abrt_req) | 194 | if (abrt_req) |
| 194 | break; | 195 | break; |
| 195 | 196 | ||
| @@ -197,7 +198,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
| 197 | if (!(atomic_read(&adapter->status) & | 198 | if (!(atomic_read(&adapter->status) & |
| 198 | ZFCP_STATUS_COMMON_RUNNING)) { | 199 | ZFCP_STATUS_COMMON_RUNNING)) { |
| 199 | zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, | 200 | zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, |
| 200 | old_req_id); | 201 | old_reqid); |
| 201 | return SUCCESS; | 202 | return SUCCESS; |
| 202 | } | 203 | } |
| 203 | } | 204 | } |
| @@ -208,13 +209,14 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
| 208 | abrt_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); | 209 | abrt_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); |
| 209 | 210 | ||
| 210 | if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) | 211 | if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) |
| 211 | zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, abrt_req, 0); | 212 | dbf_tag = "okay"; |
| 212 | else if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) | 213 | else if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) |
| 213 | zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, abrt_req, 0); | 214 | dbf_tag = "lte2"; |
| 214 | else { | 215 | else { |
| 215 | zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, abrt_req, 0); | 216 | dbf_tag = "fail"; |
| 216 | retval = FAILED; | 217 | retval = FAILED; |
| 217 | } | 218 | } |
| 219 | zfcp_scsi_dbf_event_abort(dbf_tag, adapter, scpnt, abrt_req, old_reqid); | ||
| 218 | zfcp_fsf_req_free(abrt_req); | 220 | zfcp_fsf_req_free(abrt_req); |
| 219 | return retval; | 221 | return retval; |
| 220 | } | 222 | } |
| @@ -534,6 +536,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port) | |||
| 534 | struct fc_rport_identifiers ids; | 536 | struct fc_rport_identifiers ids; |
| 535 | struct fc_rport *rport; | 537 | struct fc_rport *rport; |
| 536 | 538 | ||
| 539 | if (port->rport) | ||
| 540 | return; | ||
| 541 | |||
| 537 | ids.node_name = port->wwnn; | 542 | ids.node_name = port->wwnn; |
| 538 | ids.port_name = port->wwpn; | 543 | ids.port_name = port->wwpn; |
| 539 | ids.port_id = port->d_id; | 544 | ids.port_id = port->d_id; |
| @@ -557,8 +562,10 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port) | |||
| 557 | { | 562 | { |
| 558 | struct fc_rport *rport = port->rport; | 563 | struct fc_rport *rport = port->rport; |
| 559 | 564 | ||
| 560 | if (rport) | 565 | if (rport) { |
| 561 | fc_remote_port_delete(rport); | 566 | fc_remote_port_delete(rport); |
| 567 | port->rport = NULL; | ||
| 568 | } | ||
| 562 | } | 569 | } |
| 563 | 570 | ||
| 564 | void zfcp_scsi_schedule_rport_register(struct zfcp_port *port) | 571 | void zfcp_scsi_schedule_rport_register(struct zfcp_port *port) |
diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 3e51e64d1108..0fe5cce818cb 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c | |||
| @@ -494,9 +494,14 @@ static ssize_t zfcp_sysfs_adapter_q_full_show(struct device *dev, | |||
| 494 | struct Scsi_Host *scsi_host = class_to_shost(dev); | 494 | struct Scsi_Host *scsi_host = class_to_shost(dev); |
| 495 | struct zfcp_adapter *adapter = | 495 | struct zfcp_adapter *adapter = |
| 496 | (struct zfcp_adapter *) scsi_host->hostdata[0]; | 496 | (struct zfcp_adapter *) scsi_host->hostdata[0]; |
| 497 | u64 util; | ||
| 498 | |||
| 499 | spin_lock_bh(&adapter->qdio_stat_lock); | ||
| 500 | util = adapter->req_q_util; | ||
| 501 | spin_unlock_bh(&adapter->qdio_stat_lock); | ||
| 497 | 502 | ||
| 498 | return sprintf(buf, "%d %llu\n", atomic_read(&adapter->qdio_outb_full), | 503 | return sprintf(buf, "%d %llu\n", atomic_read(&adapter->qdio_outb_full), |
| 499 | (unsigned long long)adapter->req_q_util); | 504 | (unsigned long long)util); |
| 500 | } | 505 | } |
| 501 | static DEVICE_ATTR(queue_full, S_IRUGO, zfcp_sysfs_adapter_q_full_show, NULL); | 506 | static DEVICE_ATTR(queue_full, S_IRUGO, zfcp_sysfs_adapter_q_full_show, NULL); |
| 502 | 507 | ||
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 2bc22be5f849..145ab9ba55ea 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c | |||
| @@ -415,9 +415,9 @@ static void fc_exch_timeout(struct work_struct *work) | |||
| 415 | e_stat = ep->esb_stat; | 415 | e_stat = ep->esb_stat; |
| 416 | if (e_stat & ESB_ST_COMPLETE) { | 416 | if (e_stat & ESB_ST_COMPLETE) { |
| 417 | ep->esb_stat = e_stat & ~ESB_ST_REC_QUAL; | 417 | ep->esb_stat = e_stat & ~ESB_ST_REC_QUAL; |
| 418 | spin_unlock_bh(&ep->ex_lock); | ||
| 418 | if (e_stat & ESB_ST_REC_QUAL) | 419 | if (e_stat & ESB_ST_REC_QUAL) |
| 419 | fc_exch_rrq(ep); | 420 | fc_exch_rrq(ep); |
| 420 | spin_unlock_bh(&ep->ex_lock); | ||
| 421 | goto done; | 421 | goto done; |
| 422 | } else { | 422 | } else { |
| 423 | resp = ep->resp; | 423 | resp = ep->resp; |
| @@ -1624,14 +1624,14 @@ static void fc_exch_rrq(struct fc_exch *ep) | |||
| 1624 | struct fc_lport *lp; | 1624 | struct fc_lport *lp; |
| 1625 | struct fc_els_rrq *rrq; | 1625 | struct fc_els_rrq *rrq; |
| 1626 | struct fc_frame *fp; | 1626 | struct fc_frame *fp; |
| 1627 | struct fc_seq *rrq_sp; | ||
| 1628 | u32 did; | 1627 | u32 did; |
| 1629 | 1628 | ||
| 1630 | lp = ep->lp; | 1629 | lp = ep->lp; |
| 1631 | 1630 | ||
| 1632 | fp = fc_frame_alloc(lp, sizeof(*rrq)); | 1631 | fp = fc_frame_alloc(lp, sizeof(*rrq)); |
| 1633 | if (!fp) | 1632 | if (!fp) |
| 1634 | return; | 1633 | goto retry; |
| 1634 | |||
| 1635 | rrq = fc_frame_payload_get(fp, sizeof(*rrq)); | 1635 | rrq = fc_frame_payload_get(fp, sizeof(*rrq)); |
| 1636 | memset(rrq, 0, sizeof(*rrq)); | 1636 | memset(rrq, 0, sizeof(*rrq)); |
| 1637 | rrq->rrq_cmd = ELS_RRQ; | 1637 | rrq->rrq_cmd = ELS_RRQ; |
| @@ -1647,13 +1647,20 @@ static void fc_exch_rrq(struct fc_exch *ep) | |||
| 1647 | fc_host_port_id(lp->host), FC_TYPE_ELS, | 1647 | fc_host_port_id(lp->host), FC_TYPE_ELS, |
| 1648 | FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); | 1648 | FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); |
| 1649 | 1649 | ||
| 1650 | rrq_sp = fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep, | 1650 | if (fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep, lp->e_d_tov)) |
| 1651 | lp->e_d_tov); | 1651 | return; |
| 1652 | if (!rrq_sp) { | 1652 | |
| 1653 | ep->esb_stat |= ESB_ST_REC_QUAL; | 1653 | retry: |
| 1654 | fc_exch_timer_set_locked(ep, ep->r_a_tov); | 1654 | spin_lock_bh(&ep->ex_lock); |
| 1655 | if (ep->state & (FC_EX_RST_CLEANUP | FC_EX_DONE)) { | ||
| 1656 | spin_unlock_bh(&ep->ex_lock); | ||
| 1657 | /* drop hold for rec qual */ | ||
| 1658 | fc_exch_release(ep); | ||
| 1655 | return; | 1659 | return; |
| 1656 | } | 1660 | } |
| 1661 | ep->esb_stat |= ESB_ST_REC_QUAL; | ||
| 1662 | fc_exch_timer_set_locked(ep, ep->r_a_tov); | ||
| 1663 | spin_unlock_bh(&ep->ex_lock); | ||
| 1657 | } | 1664 | } |
| 1658 | 1665 | ||
| 1659 | 1666 | ||
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 716cc344c5df..a751f6230c22 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c | |||
| @@ -1974,10 +1974,10 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) | |||
| 1974 | * good and have never sent us a successful tmf response | 1974 | * good and have never sent us a successful tmf response |
| 1975 | * then sent more data for the cmd. | 1975 | * then sent more data for the cmd. |
| 1976 | */ | 1976 | */ |
| 1977 | spin_lock(&session->lock); | 1977 | spin_lock_bh(&session->lock); |
| 1978 | fail_scsi_task(task, DID_ABORT); | 1978 | fail_scsi_task(task, DID_ABORT); |
| 1979 | conn->tmf_state = TMF_INITIAL; | 1979 | conn->tmf_state = TMF_INITIAL; |
| 1980 | spin_unlock(&session->lock); | 1980 | spin_unlock_bh(&session->lock); |
| 1981 | iscsi_start_tx(conn); | 1981 | iscsi_start_tx(conn); |
| 1982 | goto success_unlocked; | 1982 | goto success_unlocked; |
| 1983 | case TMF_TIMEDOUT: | 1983 | case TMF_TIMEDOUT: |
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 54fa1e42dc4d..b3381959acce 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c | |||
| @@ -766,6 +766,7 @@ static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id) | |||
| 766 | if (!memcmp(phy->attached_sas_addr, ephy->attached_sas_addr, | 766 | if (!memcmp(phy->attached_sas_addr, ephy->attached_sas_addr, |
| 767 | SAS_ADDR_SIZE) && ephy->port) { | 767 | SAS_ADDR_SIZE) && ephy->port) { |
| 768 | sas_port_add_phy(ephy->port, phy->phy); | 768 | sas_port_add_phy(ephy->port, phy->phy); |
| 769 | phy->port = ephy->port; | ||
| 769 | phy->phy_state = PHY_DEVICE_DISCOVERED; | 770 | phy->phy_state = PHY_DEVICE_DISCOVERED; |
| 770 | return 0; | 771 | return 0; |
| 771 | } | 772 | } |
| @@ -945,11 +946,21 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) | |||
| 945 | if (ex->ex_phy[i].phy_state == PHY_VACANT || | 946 | if (ex->ex_phy[i].phy_state == PHY_VACANT || |
| 946 | ex->ex_phy[i].phy_state == PHY_NOT_PRESENT) | 947 | ex->ex_phy[i].phy_state == PHY_NOT_PRESENT) |
| 947 | continue; | 948 | continue; |
| 948 | 949 | /* | |
| 950 | * Due to races, the phy might not get added to the | ||
| 951 | * wide port, so we add the phy to the wide port here. | ||
| 952 | */ | ||
| 949 | if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == | 953 | if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == |
| 950 | SAS_ADDR(child->sas_addr)) | 954 | SAS_ADDR(child->sas_addr)) { |
| 951 | ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; | 955 | ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; |
| 956 | res = sas_ex_join_wide_port(dev, i); | ||
| 957 | if (!res) | ||
| 958 | SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", | ||
| 959 | i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); | ||
| 960 | |||
| 961 | } | ||
| 952 | } | 962 | } |
| 963 | res = 0; | ||
| 953 | } | 964 | } |
| 954 | 965 | ||
| 955 | return res; | 966 | return res; |
| @@ -1598,7 +1609,7 @@ static int sas_get_phy_attached_sas_addr(struct domain_device *dev, | |||
| 1598 | } | 1609 | } |
| 1599 | 1610 | ||
| 1600 | static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, | 1611 | static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, |
| 1601 | int from_phy) | 1612 | int from_phy, bool update) |
| 1602 | { | 1613 | { |
| 1603 | struct expander_device *ex = &dev->ex_dev; | 1614 | struct expander_device *ex = &dev->ex_dev; |
| 1604 | int res = 0; | 1615 | int res = 0; |
| @@ -1611,7 +1622,9 @@ static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, | |||
| 1611 | if (res) | 1622 | if (res) |
| 1612 | goto out; | 1623 | goto out; |
| 1613 | else if (phy_change_count != ex->ex_phy[i].phy_change_count) { | 1624 | else if (phy_change_count != ex->ex_phy[i].phy_change_count) { |
| 1614 | ex->ex_phy[i].phy_change_count = phy_change_count; | 1625 | if (update) |
| 1626 | ex->ex_phy[i].phy_change_count = | ||
| 1627 | phy_change_count; | ||
| 1615 | *phy_id = i; | 1628 | *phy_id = i; |
| 1616 | return 0; | 1629 | return 0; |
| 1617 | } | 1630 | } |
| @@ -1653,31 +1666,52 @@ out: | |||
| 1653 | kfree(rg_req); | 1666 | kfree(rg_req); |
| 1654 | return res; | 1667 | return res; |
| 1655 | } | 1668 | } |
| 1669 | /** | ||
| 1670 | * sas_find_bcast_dev - find the device issue BROADCAST(CHANGE). | ||
| 1671 | * @dev:domain device to be detect. | ||
| 1672 | * @src_dev: the device which originated BROADCAST(CHANGE). | ||
| 1673 | * | ||
| 1674 | * Add self-configuration expander suport. Suppose two expander cascading, | ||
| 1675 | * when the first level expander is self-configuring, hotplug the disks in | ||
| 1676 | * second level expander, BROADCAST(CHANGE) will not only be originated | ||
| 1677 | * in the second level expander, but also be originated in the first level | ||
| 1678 | * expander (see SAS protocol SAS 2r-14, 7.11 for detail), it is to say, | ||
| 1679 | * expander changed count in two level expanders will all increment at least | ||
| 1680 | * once, but the phy which chang count has changed is the source device which | ||
| 1681 | * we concerned. | ||
| 1682 | */ | ||
| 1656 | 1683 | ||
| 1657 | static int sas_find_bcast_dev(struct domain_device *dev, | 1684 | static int sas_find_bcast_dev(struct domain_device *dev, |
| 1658 | struct domain_device **src_dev) | 1685 | struct domain_device **src_dev) |
| 1659 | { | 1686 | { |
| 1660 | struct expander_device *ex = &dev->ex_dev; | 1687 | struct expander_device *ex = &dev->ex_dev; |
| 1661 | int ex_change_count = -1; | 1688 | int ex_change_count = -1; |
| 1689 | int phy_id = -1; | ||
| 1662 | int res; | 1690 | int res; |
| 1691 | struct domain_device *ch; | ||
| 1663 | 1692 | ||
| 1664 | res = sas_get_ex_change_count(dev, &ex_change_count); | 1693 | res = sas_get_ex_change_count(dev, &ex_change_count); |
| 1665 | if (res) | 1694 | if (res) |
| 1666 | goto out; | 1695 | goto out; |
| 1667 | if (ex_change_count != -1 && | 1696 | if (ex_change_count != -1 && ex_change_count != ex->ex_change_count) { |
| 1668 | ex_change_count != ex->ex_change_count) { | 1697 | /* Just detect if this expander phys phy change count changed, |
| 1669 | *src_dev = dev; | 1698 | * in order to determine if this expander originate BROADCAST, |
| 1670 | ex->ex_change_count = ex_change_count; | 1699 | * and do not update phy change count field in our structure. |
| 1671 | } else { | 1700 | */ |
| 1672 | struct domain_device *ch; | 1701 | res = sas_find_bcast_phy(dev, &phy_id, 0, false); |
| 1673 | 1702 | if (phy_id != -1) { | |
| 1674 | list_for_each_entry(ch, &ex->children, siblings) { | 1703 | *src_dev = dev; |
| 1675 | if (ch->dev_type == EDGE_DEV || | 1704 | ex->ex_change_count = ex_change_count; |
| 1676 | ch->dev_type == FANOUT_DEV) { | 1705 | SAS_DPRINTK("Expander phy change count has changed\n"); |
| 1677 | res = sas_find_bcast_dev(ch, src_dev); | 1706 | return res; |
| 1678 | if (src_dev) | 1707 | } else |
| 1679 | return res; | 1708 | SAS_DPRINTK("Expander phys DID NOT change\n"); |
| 1680 | } | 1709 | } |
| 1710 | list_for_each_entry(ch, &ex->children, siblings) { | ||
| 1711 | if (ch->dev_type == EDGE_DEV || ch->dev_type == FANOUT_DEV) { | ||
| 1712 | res = sas_find_bcast_dev(ch, src_dev); | ||
| 1713 | if (src_dev) | ||
| 1714 | return res; | ||
| 1681 | } | 1715 | } |
| 1682 | } | 1716 | } |
| 1683 | out: | 1717 | out: |
| @@ -1700,24 +1734,26 @@ static void sas_unregister_ex_tree(struct domain_device *dev) | |||
| 1700 | } | 1734 | } |
| 1701 | 1735 | ||
| 1702 | static void sas_unregister_devs_sas_addr(struct domain_device *parent, | 1736 | static void sas_unregister_devs_sas_addr(struct domain_device *parent, |
| 1703 | int phy_id) | 1737 | int phy_id, bool last) |
| 1704 | { | 1738 | { |
| 1705 | struct expander_device *ex_dev = &parent->ex_dev; | 1739 | struct expander_device *ex_dev = &parent->ex_dev; |
| 1706 | struct ex_phy *phy = &ex_dev->ex_phy[phy_id]; | 1740 | struct ex_phy *phy = &ex_dev->ex_phy[phy_id]; |
| 1707 | struct domain_device *child, *n; | 1741 | struct domain_device *child, *n; |
| 1708 | 1742 | if (last) { | |
| 1709 | list_for_each_entry_safe(child, n, &ex_dev->children, siblings) { | 1743 | list_for_each_entry_safe(child, n, |
| 1710 | if (SAS_ADDR(child->sas_addr) == | 1744 | &ex_dev->children, siblings) { |
| 1711 | SAS_ADDR(phy->attached_sas_addr)) { | 1745 | if (SAS_ADDR(child->sas_addr) == |
| 1712 | if (child->dev_type == EDGE_DEV || | 1746 | SAS_ADDR(phy->attached_sas_addr)) { |
| 1713 | child->dev_type == FANOUT_DEV) | 1747 | if (child->dev_type == EDGE_DEV || |
| 1714 | sas_unregister_ex_tree(child); | 1748 | child->dev_type == FANOUT_DEV) |
| 1715 | else | 1749 | sas_unregister_ex_tree(child); |
| 1716 | sas_unregister_dev(child); | 1750 | else |
| 1717 | break; | 1751 | sas_unregister_dev(child); |
| 1752 | break; | ||
| 1753 | } | ||
| 1718 | } | 1754 | } |
| 1755 | sas_disable_routing(parent, phy->attached_sas_addr); | ||
| 1719 | } | 1756 | } |
| 1720 | sas_disable_routing(parent, phy->attached_sas_addr); | ||
| 1721 | memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); | 1757 | memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); |
| 1722 | sas_port_delete_phy(phy->port, phy->phy); | 1758 | sas_port_delete_phy(phy->port, phy->phy); |
| 1723 | if (phy->port->num_phys == 0) | 1759 | if (phy->port->num_phys == 0) |
| @@ -1770,15 +1806,31 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) | |||
| 1770 | { | 1806 | { |
| 1771 | struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; | 1807 | struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; |
| 1772 | struct domain_device *child; | 1808 | struct domain_device *child; |
| 1773 | int res; | 1809 | bool found = false; |
| 1810 | int res, i; | ||
| 1774 | 1811 | ||
| 1775 | SAS_DPRINTK("ex %016llx phy%d new device attached\n", | 1812 | SAS_DPRINTK("ex %016llx phy%d new device attached\n", |
| 1776 | SAS_ADDR(dev->sas_addr), phy_id); | 1813 | SAS_ADDR(dev->sas_addr), phy_id); |
| 1777 | res = sas_ex_phy_discover(dev, phy_id); | 1814 | res = sas_ex_phy_discover(dev, phy_id); |
| 1778 | if (res) | 1815 | if (res) |
| 1779 | goto out; | 1816 | goto out; |
| 1817 | /* to support the wide port inserted */ | ||
| 1818 | for (i = 0; i < dev->ex_dev.num_phys; i++) { | ||
| 1819 | struct ex_phy *ex_phy_temp = &dev->ex_dev.ex_phy[i]; | ||
| 1820 | if (i == phy_id) | ||
| 1821 | continue; | ||
| 1822 | if (SAS_ADDR(ex_phy_temp->attached_sas_addr) == | ||
| 1823 | SAS_ADDR(ex_phy->attached_sas_addr)) { | ||
| 1824 | found = true; | ||
| 1825 | break; | ||
| 1826 | } | ||
| 1827 | } | ||
| 1828 | if (found) { | ||
| 1829 | sas_ex_join_wide_port(dev, phy_id); | ||
| 1830 | return 0; | ||
| 1831 | } | ||
| 1780 | res = sas_ex_discover_devices(dev, phy_id); | 1832 | res = sas_ex_discover_devices(dev, phy_id); |
| 1781 | if (res) | 1833 | if (!res) |
| 1782 | goto out; | 1834 | goto out; |
| 1783 | list_for_each_entry(child, &dev->ex_dev.children, siblings) { | 1835 | list_for_each_entry(child, &dev->ex_dev.children, siblings) { |
| 1784 | if (SAS_ADDR(child->sas_addr) == | 1836 | if (SAS_ADDR(child->sas_addr) == |
| @@ -1793,7 +1845,7 @@ out: | |||
| 1793 | return res; | 1845 | return res; |
| 1794 | } | 1846 | } |
| 1795 | 1847 | ||
| 1796 | static int sas_rediscover_dev(struct domain_device *dev, int phy_id) | 1848 | static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) |
| 1797 | { | 1849 | { |
| 1798 | struct expander_device *ex = &dev->ex_dev; | 1850 | struct expander_device *ex = &dev->ex_dev; |
| 1799 | struct ex_phy *phy = &ex->ex_phy[phy_id]; | 1851 | struct ex_phy *phy = &ex->ex_phy[phy_id]; |
| @@ -1804,11 +1856,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id) | |||
| 1804 | switch (res) { | 1856 | switch (res) { |
| 1805 | case SMP_RESP_NO_PHY: | 1857 | case SMP_RESP_NO_PHY: |
| 1806 | phy->phy_state = PHY_NOT_PRESENT; | 1858 | phy->phy_state = PHY_NOT_PRESENT; |
| 1807 | sas_unregister_devs_sas_addr(dev, phy_id); | 1859 | sas_unregister_devs_sas_addr(dev, phy_id, last); |
| 1808 | goto out; break; | 1860 | goto out; break; |
| 1809 | case SMP_RESP_PHY_VACANT: | 1861 | case SMP_RESP_PHY_VACANT: |
| 1810 | phy->phy_state = PHY_VACANT; | 1862 | phy->phy_state = PHY_VACANT; |
| 1811 | sas_unregister_devs_sas_addr(dev, phy_id); | 1863 | sas_unregister_devs_sas_addr(dev, phy_id, last); |
| 1812 | goto out; break; | 1864 | goto out; break; |
| 1813 | case SMP_RESP_FUNC_ACC: | 1865 | case SMP_RESP_FUNC_ACC: |
| 1814 | break; | 1866 | break; |
| @@ -1816,7 +1868,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id) | |||
| 1816 | 1868 | ||
| 1817 | if (SAS_ADDR(attached_sas_addr) == 0) { | 1869 | if (SAS_ADDR(attached_sas_addr) == 0) { |
| 1818 | phy->phy_state = PHY_EMPTY; | 1870 | phy->phy_state = PHY_EMPTY; |
| 1819 | sas_unregister_devs_sas_addr(dev, phy_id); | 1871 | sas_unregister_devs_sas_addr(dev, phy_id, last); |
| 1820 | } else if (SAS_ADDR(attached_sas_addr) == | 1872 | } else if (SAS_ADDR(attached_sas_addr) == |
| 1821 | SAS_ADDR(phy->attached_sas_addr)) { | 1873 | SAS_ADDR(phy->attached_sas_addr)) { |
| 1822 | SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n", | 1874 | SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n", |
| @@ -1828,12 +1880,27 @@ out: | |||
| 1828 | return res; | 1880 | return res; |
| 1829 | } | 1881 | } |
| 1830 | 1882 | ||
| 1883 | /** | ||
| 1884 | * sas_rediscover - revalidate the domain. | ||
| 1885 | * @dev:domain device to be detect. | ||
| 1886 | * @phy_id: the phy id will be detected. | ||
| 1887 | * | ||
| 1888 | * NOTE: this process _must_ quit (return) as soon as any connection | ||
| 1889 | * errors are encountered. Connection recovery is done elsewhere. | ||
| 1890 | * Discover process only interrogates devices in order to discover the | ||
| 1891 | * domain.For plugging out, we un-register the device only when it is | ||
| 1892 | * the last phy in the port, for other phys in this port, we just delete it | ||
| 1893 | * from the port.For inserting, we do discovery when it is the | ||
| 1894 | * first phy,for other phys in this port, we add it to the port to | ||
| 1895 | * forming the wide-port. | ||
| 1896 | */ | ||
| 1831 | static int sas_rediscover(struct domain_device *dev, const int phy_id) | 1897 | static int sas_rediscover(struct domain_device *dev, const int phy_id) |
| 1832 | { | 1898 | { |
| 1833 | struct expander_device *ex = &dev->ex_dev; | 1899 | struct expander_device *ex = &dev->ex_dev; |
| 1834 | struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; | 1900 | struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; |
| 1835 | int res = 0; | 1901 | int res = 0; |
| 1836 | int i; | 1902 | int i; |
| 1903 | bool last = true; /* is this the last phy of the port */ | ||
| 1837 | 1904 | ||
| 1838 | SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n", | 1905 | SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n", |
| 1839 | SAS_ADDR(dev->sas_addr), phy_id); | 1906 | SAS_ADDR(dev->sas_addr), phy_id); |
| @@ -1848,13 +1915,13 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) | |||
| 1848 | SAS_ADDR(changed_phy->attached_sas_addr)) { | 1915 | SAS_ADDR(changed_phy->attached_sas_addr)) { |
| 1849 | SAS_DPRINTK("phy%d part of wide port with " | 1916 | SAS_DPRINTK("phy%d part of wide port with " |
| 1850 | "phy%d\n", phy_id, i); | 1917 | "phy%d\n", phy_id, i); |
| 1851 | goto out; | 1918 | last = false; |
| 1919 | break; | ||
| 1852 | } | 1920 | } |
| 1853 | } | 1921 | } |
| 1854 | res = sas_rediscover_dev(dev, phy_id); | 1922 | res = sas_rediscover_dev(dev, phy_id, last); |
| 1855 | } else | 1923 | } else |
| 1856 | res = sas_discover_new(dev, phy_id); | 1924 | res = sas_discover_new(dev, phy_id); |
| 1857 | out: | ||
| 1858 | return res; | 1925 | return res; |
| 1859 | } | 1926 | } |
| 1860 | 1927 | ||
| @@ -1881,7 +1948,7 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) | |||
| 1881 | 1948 | ||
| 1882 | do { | 1949 | do { |
| 1883 | phy_id = -1; | 1950 | phy_id = -1; |
| 1884 | res = sas_find_bcast_phy(dev, &phy_id, i); | 1951 | res = sas_find_bcast_phy(dev, &phy_id, i, true); |
| 1885 | if (phy_id == -1) | 1952 | if (phy_id == -1) |
| 1886 | break; | 1953 | break; |
| 1887 | res = sas_rediscover(dev, phy_id); | 1954 | res = sas_rediscover(dev, phy_id); |
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index e6ac59c023f1..fe8b74c706d2 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c | |||
| @@ -56,7 +56,7 @@ static void sas_form_port(struct asd_sas_phy *phy) | |||
| 56 | } | 56 | } |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | /* find a port */ | 59 | /* see if the phy should be part of a wide port */ |
| 60 | spin_lock_irqsave(&sas_ha->phy_port_lock, flags); | 60 | spin_lock_irqsave(&sas_ha->phy_port_lock, flags); |
| 61 | for (i = 0; i < sas_ha->num_phys; i++) { | 61 | for (i = 0; i < sas_ha->num_phys; i++) { |
| 62 | port = sas_ha->sas_port[i]; | 62 | port = sas_ha->sas_port[i]; |
| @@ -69,12 +69,23 @@ static void sas_form_port(struct asd_sas_phy *phy) | |||
| 69 | SAS_DPRINTK("phy%d matched wide port%d\n", phy->id, | 69 | SAS_DPRINTK("phy%d matched wide port%d\n", phy->id, |
| 70 | port->id); | 70 | port->id); |
| 71 | break; | 71 | break; |
| 72 | } else if (*(u64 *) port->sas_addr == 0 && port->num_phys==0) { | ||
| 73 | memcpy(port->sas_addr, phy->sas_addr, SAS_ADDR_SIZE); | ||
| 74 | break; | ||
| 75 | } | 72 | } |
| 76 | spin_unlock(&port->phy_list_lock); | 73 | spin_unlock(&port->phy_list_lock); |
| 77 | } | 74 | } |
| 75 | /* The phy does not match any existing port, create a new one */ | ||
| 76 | if (i == sas_ha->num_phys) { | ||
| 77 | for (i = 0; i < sas_ha->num_phys; i++) { | ||
| 78 | port = sas_ha->sas_port[i]; | ||
| 79 | spin_lock(&port->phy_list_lock); | ||
| 80 | if (*(u64 *)port->sas_addr == 0 | ||
| 81 | && port->num_phys == 0) { | ||
| 82 | memcpy(port->sas_addr, phy->sas_addr, | ||
| 83 | SAS_ADDR_SIZE); | ||
| 84 | break; | ||
| 85 | } | ||
| 86 | spin_unlock(&port->phy_list_lock); | ||
| 87 | } | ||
| 88 | } | ||
| 78 | 89 | ||
| 79 | if (i >= sas_ha->num_phys) { | 90 | if (i >= sas_ha->num_phys) { |
| 80 | printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n", | 91 | printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n", |
diff --git a/drivers/scsi/qla4xxx/ql4_dbg.c b/drivers/scsi/qla4xxx/ql4_dbg.c index fcc184cd066d..cbceb0ebabf7 100644 --- a/drivers/scsi/qla4xxx/ql4_dbg.c +++ b/drivers/scsi/qla4xxx/ql4_dbg.c | |||
| @@ -15,19 +15,18 @@ void qla4xxx_dump_buffer(void *b, uint32_t size) | |||
| 15 | uint32_t cnt; | 15 | uint32_t cnt; |
| 16 | uint8_t *c = b; | 16 | uint8_t *c = b; |
| 17 | 17 | ||
| 18 | printk(" 0 1 2 3 4 5 6 7 8 9 Ah Bh Ch Dh Eh " | 18 | printk(" 0 1 2 3 4 5 6 7 8 9 Ah Bh Ch Dh Eh " |
| 19 | "Fh\n"); | 19 | "Fh\n"); |
| 20 | printk("------------------------------------------------------------" | 20 | printk("------------------------------------------------------------" |
| 21 | "--\n"); | 21 | "--\n"); |
| 22 | for (cnt = 0; cnt < size; cnt++, c++) { | 22 | for (cnt = 0; cnt < size; c++) { |
| 23 | printk(KERN_DEBUG "%02x", *c); | 23 | printk(KERN_INFO "%02x", *c); |
| 24 | if (!(cnt % 16)) | 24 | if (!(++cnt % 16)) |
| 25 | printk(KERN_DEBUG "\n"); | 25 | printk(KERN_INFO "\n"); |
| 26 | 26 | ||
| 27 | else | 27 | else |
| 28 | printk(KERN_DEBUG " "); | 28 | printk(KERN_INFO " "); |
| 29 | } | 29 | } |
| 30 | if (cnt % 16) | 30 | printk(KERN_INFO "\n"); |
| 31 | printk(KERN_DEBUG "\n"); | ||
| 32 | } | 31 | } |
| 33 | 32 | ||
diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index b586f27c3bd4..81b5f29254e2 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h | |||
| @@ -100,7 +100,6 @@ | |||
| 100 | #define MAX_SRBS MAX_CMDS_TO_RISC | 100 | #define MAX_SRBS MAX_CMDS_TO_RISC |
| 101 | #define MBOX_AEN_REG_COUNT 5 | 101 | #define MBOX_AEN_REG_COUNT 5 |
| 102 | #define MAX_INIT_RETRIES 5 | 102 | #define MAX_INIT_RETRIES 5 |
| 103 | #define IOCB_HIWAT_CUSHION 16 | ||
| 104 | 103 | ||
| 105 | /* | 104 | /* |
| 106 | * Buffer sizes | 105 | * Buffer sizes |
| @@ -184,6 +183,11 @@ struct srb { | |||
| 184 | uint16_t cc_stat; | 183 | uint16_t cc_stat; |
| 185 | u_long r_start; /* Time we recieve a cmd from OS */ | 184 | u_long r_start; /* Time we recieve a cmd from OS */ |
| 186 | u_long u_start; /* Time when we handed the cmd to F/W */ | 185 | u_long u_start; /* Time when we handed the cmd to F/W */ |
| 186 | |||
| 187 | /* Used for extended sense / status continuation */ | ||
| 188 | uint8_t *req_sense_ptr; | ||
| 189 | uint16_t req_sense_len; | ||
| 190 | uint16_t reserved2; | ||
| 187 | }; | 191 | }; |
| 188 | 192 | ||
| 189 | /* | 193 | /* |
| @@ -302,7 +306,6 @@ struct scsi_qla_host { | |||
| 302 | uint32_t tot_ddbs; | 306 | uint32_t tot_ddbs; |
| 303 | 307 | ||
| 304 | uint16_t iocb_cnt; | 308 | uint16_t iocb_cnt; |
| 305 | uint16_t iocb_hiwat; | ||
| 306 | 309 | ||
| 307 | /* SRB cache. */ | 310 | /* SRB cache. */ |
| 308 | #define SRB_MIN_REQ 128 | 311 | #define SRB_MIN_REQ 128 |
| @@ -436,6 +439,8 @@ struct scsi_qla_host { | |||
| 436 | /* Map ddb_list entry by FW ddb index */ | 439 | /* Map ddb_list entry by FW ddb index */ |
| 437 | struct ddb_entry *fw_ddb_index_map[MAX_DDB_ENTRIES]; | 440 | struct ddb_entry *fw_ddb_index_map[MAX_DDB_ENTRIES]; |
| 438 | 441 | ||
| 442 | /* Saved srb for status continuation entry processing */ | ||
| 443 | struct srb *status_srb; | ||
| 439 | }; | 444 | }; |
| 440 | 445 | ||
| 441 | static inline int is_qla4010(struct scsi_qla_host *ha) | 446 | static inline int is_qla4010(struct scsi_qla_host *ha) |
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1b667a70cffa..9cd7a608df38 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h | |||
| @@ -572,6 +572,7 @@ struct conn_event_log_entry { | |||
| 572 | *************************************************************************/ | 572 | *************************************************************************/ |
| 573 | #define IOCB_MAX_CDB_LEN 16 /* Bytes in a CBD */ | 573 | #define IOCB_MAX_CDB_LEN 16 /* Bytes in a CBD */ |
| 574 | #define IOCB_MAX_SENSEDATA_LEN 32 /* Bytes of sense data */ | 574 | #define IOCB_MAX_SENSEDATA_LEN 32 /* Bytes of sense data */ |
| 575 | #define IOCB_MAX_EXT_SENSEDATA_LEN 60 /* Bytes of extended sense data */ | ||
| 575 | 576 | ||
| 576 | /* IOCB header structure */ | 577 | /* IOCB header structure */ |
| 577 | struct qla4_header { | 578 | struct qla4_header { |
| @@ -733,6 +734,12 @@ struct status_entry { | |||
| 733 | 734 | ||
| 734 | }; | 735 | }; |
| 735 | 736 | ||
| 737 | /* Status Continuation entry */ | ||
| 738 | struct status_cont_entry { | ||
| 739 | struct qla4_header hdr; /* 00-03 */ | ||
| 740 | uint8_t ext_sense_data[IOCB_MAX_EXT_SENSEDATA_LEN]; /* 04-63 */ | ||
| 741 | }; | ||
| 742 | |||
| 736 | struct passthru0 { | 743 | struct passthru0 { |
| 737 | struct qla4_header hdr; /* 00-03 */ | 744 | struct qla4_header hdr; /* 00-03 */ |
| 738 | uint32_t handle; /* 04-07 */ | 745 | uint32_t handle; /* 04-07 */ |
diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index 912a67494adf..e0c32159749c 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c | |||
| @@ -10,9 +10,42 @@ | |||
| 10 | #include "ql4_dbg.h" | 10 | #include "ql4_dbg.h" |
| 11 | #include "ql4_inline.h" | 11 | #include "ql4_inline.h" |
| 12 | 12 | ||
| 13 | |||
| 14 | #include <scsi/scsi_tcq.h> | 13 | #include <scsi/scsi_tcq.h> |
| 15 | 14 | ||
| 15 | static int | ||
| 16 | qla4xxx_space_in_req_ring(struct scsi_qla_host *ha, uint16_t req_cnt) | ||
| 17 | { | ||
| 18 | uint16_t cnt; | ||
| 19 | |||
| 20 | /* Calculate number of free request entries. */ | ||
| 21 | if ((req_cnt + 2) >= ha->req_q_count) { | ||
| 22 | cnt = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); | ||
| 23 | if (ha->request_in < cnt) | ||
| 24 | ha->req_q_count = cnt - ha->request_in; | ||
| 25 | else | ||
| 26 | ha->req_q_count = REQUEST_QUEUE_DEPTH - | ||
| 27 | (ha->request_in - cnt); | ||
| 28 | } | ||
| 29 | |||
| 30 | /* Check if room for request in request ring. */ | ||
| 31 | if ((req_cnt + 2) < ha->req_q_count) | ||
| 32 | return 1; | ||
| 33 | else | ||
| 34 | return 0; | ||
| 35 | } | ||
| 36 | |||
| 37 | static void qla4xxx_advance_req_ring_ptr(struct scsi_qla_host *ha) | ||
| 38 | { | ||
| 39 | /* Advance request queue pointer */ | ||
| 40 | if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) { | ||
| 41 | ha->request_in = 0; | ||
| 42 | ha->request_ptr = ha->request_ring; | ||
| 43 | } else { | ||
| 44 | ha->request_in++; | ||
| 45 | ha->request_ptr++; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 16 | /** | 49 | /** |
| 17 | * qla4xxx_get_req_pkt - returns a valid entry in request queue. | 50 | * qla4xxx_get_req_pkt - returns a valid entry in request queue. |
| 18 | * @ha: Pointer to host adapter structure. | 51 | * @ha: Pointer to host adapter structure. |
| @@ -26,35 +59,18 @@ | |||
| 26 | static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, | 59 | static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, |
| 27 | struct queue_entry **queue_entry) | 60 | struct queue_entry **queue_entry) |
| 28 | { | 61 | { |
| 29 | uint16_t request_in; | 62 | uint16_t req_cnt = 1; |
| 30 | uint8_t status = QLA_SUCCESS; | ||
| 31 | |||
| 32 | *queue_entry = ha->request_ptr; | ||
| 33 | 63 | ||
| 34 | /* get the latest request_in and request_out index */ | 64 | if (qla4xxx_space_in_req_ring(ha, req_cnt)) { |
| 35 | request_in = ha->request_in; | 65 | *queue_entry = ha->request_ptr; |
| 36 | ha->request_out = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); | ||
| 37 | |||
| 38 | /* Advance request queue pointer and check for queue full */ | ||
| 39 | if (request_in == (REQUEST_QUEUE_DEPTH - 1)) { | ||
| 40 | request_in = 0; | ||
| 41 | ha->request_ptr = ha->request_ring; | ||
| 42 | } else { | ||
| 43 | request_in++; | ||
| 44 | ha->request_ptr++; | ||
| 45 | } | ||
| 46 | |||
| 47 | /* request queue is full, try again later */ | ||
| 48 | if ((ha->iocb_cnt + 1) >= ha->iocb_hiwat) { | ||
| 49 | /* restore request pointer */ | ||
| 50 | ha->request_ptr = *queue_entry; | ||
| 51 | status = QLA_ERROR; | ||
| 52 | } else { | ||
| 53 | ha->request_in = request_in; | ||
| 54 | memset(*queue_entry, 0, sizeof(**queue_entry)); | 66 | memset(*queue_entry, 0, sizeof(**queue_entry)); |
| 67 | |||
| 68 | qla4xxx_advance_req_ring_ptr(ha); | ||
| 69 | ha->req_q_count -= req_cnt; | ||
| 70 | return QLA_SUCCESS; | ||
| 55 | } | 71 | } |
| 56 | 72 | ||
| 57 | return status; | 73 | return QLA_ERROR; |
| 58 | } | 74 | } |
| 59 | 75 | ||
| 60 | /** | 76 | /** |
| @@ -100,21 +116,14 @@ exit_send_marker: | |||
| 100 | return status; | 116 | return status; |
| 101 | } | 117 | } |
| 102 | 118 | ||
| 103 | static struct continuation_t1_entry* qla4xxx_alloc_cont_entry( | 119 | static struct continuation_t1_entry * |
| 104 | struct scsi_qla_host *ha) | 120 | qla4xxx_alloc_cont_entry(struct scsi_qla_host *ha) |
| 105 | { | 121 | { |
| 106 | struct continuation_t1_entry *cont_entry; | 122 | struct continuation_t1_entry *cont_entry; |
| 107 | 123 | ||
| 108 | cont_entry = (struct continuation_t1_entry *)ha->request_ptr; | 124 | cont_entry = (struct continuation_t1_entry *)ha->request_ptr; |
| 109 | 125 | ||
| 110 | /* Advance request queue pointer */ | 126 | qla4xxx_advance_req_ring_ptr(ha); |
| 111 | if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) { | ||
| 112 | ha->request_in = 0; | ||
| 113 | ha->request_ptr = ha->request_ring; | ||
| 114 | } else { | ||
| 115 | ha->request_in++; | ||
| 116 | ha->request_ptr++; | ||
| 117 | } | ||
| 118 | 127 | ||
| 119 | /* Load packet defaults */ | 128 | /* Load packet defaults */ |
| 120 | cont_entry->hdr.entryType = ET_CONTINUE; | 129 | cont_entry->hdr.entryType = ET_CONTINUE; |
| @@ -197,13 +206,10 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
| 197 | struct scsi_cmnd *cmd = srb->cmd; | 206 | struct scsi_cmnd *cmd = srb->cmd; |
| 198 | struct ddb_entry *ddb_entry; | 207 | struct ddb_entry *ddb_entry; |
| 199 | struct command_t3_entry *cmd_entry; | 208 | struct command_t3_entry *cmd_entry; |
| 200 | |||
| 201 | int nseg; | 209 | int nseg; |
| 202 | uint16_t tot_dsds; | 210 | uint16_t tot_dsds; |
| 203 | uint16_t req_cnt; | 211 | uint16_t req_cnt; |
| 204 | |||
| 205 | unsigned long flags; | 212 | unsigned long flags; |
| 206 | uint16_t cnt; | ||
| 207 | uint32_t index; | 213 | uint32_t index; |
| 208 | char tag[2]; | 214 | char tag[2]; |
| 209 | 215 | ||
| @@ -217,6 +223,19 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
| 217 | 223 | ||
| 218 | index = (uint32_t)cmd->request->tag; | 224 | index = (uint32_t)cmd->request->tag; |
| 219 | 225 | ||
| 226 | /* | ||
| 227 | * Check to see if adapter is online before placing request on | ||
| 228 | * request queue. If a reset occurs and a request is in the queue, | ||
| 229 | * the firmware will still attempt to process the request, retrieving | ||
| 230 | * garbage for pointers. | ||
| 231 | */ | ||
| 232 | if (!test_bit(AF_ONLINE, &ha->flags)) { | ||
| 233 | DEBUG2(printk("scsi%ld: %s: Adapter OFFLINE! " | ||
| 234 | "Do not issue command.\n", | ||
| 235 | ha->host_no, __func__)); | ||
| 236 | goto queuing_error; | ||
| 237 | } | ||
| 238 | |||
| 220 | /* Calculate the number of request entries needed. */ | 239 | /* Calculate the number of request entries needed. */ |
| 221 | nseg = scsi_dma_map(cmd); | 240 | nseg = scsi_dma_map(cmd); |
| 222 | if (nseg < 0) | 241 | if (nseg < 0) |
| @@ -224,17 +243,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
| 224 | tot_dsds = nseg; | 243 | tot_dsds = nseg; |
| 225 | 244 | ||
| 226 | req_cnt = qla4xxx_calc_request_entries(tot_dsds); | 245 | req_cnt = qla4xxx_calc_request_entries(tot_dsds); |
| 227 | 246 | if (!qla4xxx_space_in_req_ring(ha, req_cnt)) | |
| 228 | if (ha->req_q_count < (req_cnt + 2)) { | ||
| 229 | cnt = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); | ||
| 230 | if (ha->request_in < cnt) | ||
| 231 | ha->req_q_count = cnt - ha->request_in; | ||
| 232 | else | ||
| 233 | ha->req_q_count = REQUEST_QUEUE_DEPTH - | ||
| 234 | (ha->request_in - cnt); | ||
| 235 | } | ||
| 236 | |||
| 237 | if (ha->req_q_count < (req_cnt + 2)) | ||
| 238 | goto queuing_error; | 247 | goto queuing_error; |
| 239 | 248 | ||
| 240 | /* total iocbs active */ | 249 | /* total iocbs active */ |
| @@ -286,32 +295,10 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
| 286 | break; | 295 | break; |
| 287 | } | 296 | } |
| 288 | 297 | ||
| 289 | 298 | qla4xxx_advance_req_ring_ptr(ha); | |
| 290 | /* Advance request queue pointer */ | ||
| 291 | ha->request_in++; | ||
| 292 | if (ha->request_in == REQUEST_QUEUE_DEPTH) { | ||
| 293 | ha->request_in = 0; | ||
| 294 | ha->request_ptr = ha->request_ring; | ||
| 295 | } else | ||
| 296 | ha->request_ptr++; | ||
| 297 | |||
| 298 | |||
| 299 | qla4xxx_build_scsi_iocbs(srb, cmd_entry, tot_dsds); | 299 | qla4xxx_build_scsi_iocbs(srb, cmd_entry, tot_dsds); |
| 300 | wmb(); | 300 | wmb(); |
| 301 | 301 | ||
| 302 | /* | ||
| 303 | * Check to see if adapter is online before placing request on | ||
| 304 | * request queue. If a reset occurs and a request is in the queue, | ||
| 305 | * the firmware will still attempt to process the request, retrieving | ||
| 306 | * garbage for pointers. | ||
| 307 | */ | ||
| 308 | if (!test_bit(AF_ONLINE, &ha->flags)) { | ||
| 309 | DEBUG2(printk("scsi%ld: %s: Adapter OFFLINE! " | ||
| 310 | "Do not issue command.\n", | ||
| 311 | ha->host_no, __func__)); | ||
| 312 | goto queuing_error; | ||
| 313 | } | ||
| 314 | |||
| 315 | srb->cmd->host_scribble = (unsigned char *)srb; | 302 | srb->cmd->host_scribble = (unsigned char *)srb; |
| 316 | 303 | ||
| 317 | /* update counters */ | 304 | /* update counters */ |
diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 799120fcb9be..8025ee16588e 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c | |||
| @@ -11,6 +11,98 @@ | |||
| 11 | #include "ql4_inline.h" | 11 | #include "ql4_inline.h" |
| 12 | 12 | ||
| 13 | /** | 13 | /** |
| 14 | * qla4xxx_copy_sense - copy sense data into cmd sense buffer | ||
| 15 | * @ha: Pointer to host adapter structure. | ||
| 16 | * @sts_entry: Pointer to status entry structure. | ||
| 17 | * @srb: Pointer to srb structure. | ||
| 18 | **/ | ||
| 19 | static void qla4xxx_copy_sense(struct scsi_qla_host *ha, | ||
| 20 | struct status_entry *sts_entry, | ||
| 21 | struct srb *srb) | ||
| 22 | { | ||
| 23 | struct scsi_cmnd *cmd = srb->cmd; | ||
| 24 | uint16_t sense_len; | ||
| 25 | |||
| 26 | memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); | ||
| 27 | sense_len = le16_to_cpu(sts_entry->senseDataByteCnt); | ||
| 28 | if (sense_len == 0) | ||
| 29 | return; | ||
| 30 | |||
| 31 | /* Save total available sense length, | ||
| 32 | * not to exceed cmd's sense buffer size */ | ||
| 33 | sense_len = min_t(uint16_t, sense_len, SCSI_SENSE_BUFFERSIZE); | ||
| 34 | srb->req_sense_ptr = cmd->sense_buffer; | ||
| 35 | srb->req_sense_len = sense_len; | ||
| 36 | |||
| 37 | /* Copy sense from sts_entry pkt */ | ||
| 38 | sense_len = min_t(uint16_t, sense_len, IOCB_MAX_SENSEDATA_LEN); | ||
| 39 | memcpy(cmd->sense_buffer, sts_entry->senseData, sense_len); | ||
| 40 | |||
| 41 | DEBUG2(printk(KERN_INFO "scsi%ld:%d:%d:%d: %s: sense key = %x, " | ||
| 42 | "ASL= %02x, ASC/ASCQ = %02x/%02x\n", ha->host_no, | ||
| 43 | cmd->device->channel, cmd->device->id, | ||
| 44 | cmd->device->lun, __func__, | ||
| 45 | sts_entry->senseData[2] & 0x0f, | ||
| 46 | sts_entry->senseData[7], | ||
| 47 | sts_entry->senseData[12], | ||
| 48 | sts_entry->senseData[13])); | ||
| 49 | |||
| 50 | DEBUG5(qla4xxx_dump_buffer(cmd->sense_buffer, sense_len)); | ||
| 51 | srb->flags |= SRB_GOT_SENSE; | ||
| 52 | |||
| 53 | /* Update srb, in case a sts_cont pkt follows */ | ||
| 54 | srb->req_sense_ptr += sense_len; | ||
| 55 | srb->req_sense_len -= sense_len; | ||
| 56 | if (srb->req_sense_len != 0) | ||
| 57 | ha->status_srb = srb; | ||
| 58 | else | ||
| 59 | ha->status_srb = NULL; | ||
| 60 | } | ||
| 61 | |||
| 62 | /** | ||
| 63 | * qla4xxx_status_cont_entry - Process a Status Continuations entry. | ||
| 64 | * @ha: SCSI driver HA context | ||
| 65 | * @sts_cont: Entry pointer | ||
| 66 | * | ||
| 67 | * Extended sense data. | ||
| 68 | */ | ||
| 69 | static void | ||
| 70 | qla4xxx_status_cont_entry(struct scsi_qla_host *ha, | ||
| 71 | struct status_cont_entry *sts_cont) | ||
| 72 | { | ||
| 73 | struct srb *srb = ha->status_srb; | ||
| 74 | struct scsi_cmnd *cmd; | ||
| 75 | uint8_t sense_len; | ||
| 76 | |||
| 77 | if (srb == NULL) | ||
| 78 | return; | ||
| 79 | |||
| 80 | cmd = srb->cmd; | ||
| 81 | if (cmd == NULL) { | ||
| 82 | DEBUG2(printk(KERN_INFO "scsi%ld: %s: Cmd already returned " | ||
| 83 | "back to OS srb=%p srb->state:%d\n", ha->host_no, | ||
| 84 | __func__, srb, srb->state)); | ||
| 85 | ha->status_srb = NULL; | ||
| 86 | return; | ||
| 87 | } | ||
| 88 | |||
| 89 | /* Copy sense data. */ | ||
| 90 | sense_len = min_t(uint16_t, srb->req_sense_len, | ||
| 91 | IOCB_MAX_EXT_SENSEDATA_LEN); | ||
| 92 | memcpy(srb->req_sense_ptr, sts_cont->ext_sense_data, sense_len); | ||
| 93 | DEBUG5(qla4xxx_dump_buffer(srb->req_sense_ptr, sense_len)); | ||
| 94 | |||
| 95 | srb->req_sense_ptr += sense_len; | ||
| 96 | srb->req_sense_len -= sense_len; | ||
| 97 | |||
| 98 | /* Place command on done queue. */ | ||
| 99 | if (srb->req_sense_len == 0) { | ||
| 100 | qla4xxx_srb_compl(ha, srb); | ||
| 101 | ha->status_srb = NULL; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | /** | ||
| 14 | * qla4xxx_status_entry - processes status IOCBs | 106 | * qla4xxx_status_entry - processes status IOCBs |
| 15 | * @ha: Pointer to host adapter structure. | 107 | * @ha: Pointer to host adapter structure. |
| 16 | * @sts_entry: Pointer to status entry structure. | 108 | * @sts_entry: Pointer to status entry structure. |
| @@ -23,7 +115,6 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
| 23 | struct srb *srb; | 115 | struct srb *srb; |
| 24 | struct ddb_entry *ddb_entry; | 116 | struct ddb_entry *ddb_entry; |
| 25 | uint32_t residual; | 117 | uint32_t residual; |
| 26 | uint16_t sensebytecnt; | ||
| 27 | 118 | ||
| 28 | srb = qla4xxx_del_from_active_array(ha, le32_to_cpu(sts_entry->handle)); | 119 | srb = qla4xxx_del_from_active_array(ha, le32_to_cpu(sts_entry->handle)); |
| 29 | if (!srb) { | 120 | if (!srb) { |
| @@ -92,24 +183,7 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
| 92 | break; | 183 | break; |
| 93 | 184 | ||
| 94 | /* Copy Sense Data into sense buffer. */ | 185 | /* Copy Sense Data into sense buffer. */ |
| 95 | memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); | 186 | qla4xxx_copy_sense(ha, sts_entry, srb); |
| 96 | |||
| 97 | sensebytecnt = le16_to_cpu(sts_entry->senseDataByteCnt); | ||
| 98 | if (sensebytecnt == 0) | ||
| 99 | break; | ||
| 100 | |||
| 101 | memcpy(cmd->sense_buffer, sts_entry->senseData, | ||
| 102 | min_t(uint16_t, sensebytecnt, SCSI_SENSE_BUFFERSIZE)); | ||
| 103 | |||
| 104 | DEBUG2(printk("scsi%ld:%d:%d:%d: %s: sense key = %x, " | ||
| 105 | "ASC/ASCQ = %02x/%02x\n", ha->host_no, | ||
| 106 | cmd->device->channel, cmd->device->id, | ||
| 107 | cmd->device->lun, __func__, | ||
| 108 | sts_entry->senseData[2] & 0x0f, | ||
| 109 | sts_entry->senseData[12], | ||
| 110 | sts_entry->senseData[13])); | ||
| 111 | |||
| 112 | srb->flags |= SRB_GOT_SENSE; | ||
| 113 | break; | 187 | break; |
| 114 | 188 | ||
| 115 | case SCS_INCOMPLETE: | 189 | case SCS_INCOMPLETE: |
| @@ -176,23 +250,7 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
| 176 | break; | 250 | break; |
| 177 | 251 | ||
| 178 | /* Copy Sense Data into sense buffer. */ | 252 | /* Copy Sense Data into sense buffer. */ |
| 179 | memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); | 253 | qla4xxx_copy_sense(ha, sts_entry, srb); |
| 180 | |||
| 181 | sensebytecnt = | ||
| 182 | le16_to_cpu(sts_entry->senseDataByteCnt); | ||
| 183 | if (sensebytecnt == 0) | ||
| 184 | break; | ||
| 185 | |||
| 186 | memcpy(cmd->sense_buffer, sts_entry->senseData, | ||
| 187 | min_t(uint16_t, sensebytecnt, SCSI_SENSE_BUFFERSIZE)); | ||
| 188 | |||
| 189 | DEBUG2(printk("scsi%ld:%d:%d:%d: %s: sense key = %x, " | ||
| 190 | "ASC/ASCQ = %02x/%02x\n", ha->host_no, | ||
| 191 | cmd->device->channel, cmd->device->id, | ||
| 192 | cmd->device->lun, __func__, | ||
| 193 | sts_entry->senseData[2] & 0x0f, | ||
| 194 | sts_entry->senseData[12], | ||
| 195 | sts_entry->senseData[13])); | ||
| 196 | } else { | 254 | } else { |
| 197 | /* | 255 | /* |
| 198 | * If RISC reports underrun and target does not | 256 | * If RISC reports underrun and target does not |
| @@ -268,9 +326,10 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
| 268 | 326 | ||
| 269 | status_entry_exit: | 327 | status_entry_exit: |
| 270 | 328 | ||
| 271 | /* complete the request */ | 329 | /* complete the request, if not waiting for status_continuation pkt */ |
| 272 | srb->cc_stat = sts_entry->completionStatus; | 330 | srb->cc_stat = sts_entry->completionStatus; |
| 273 | qla4xxx_srb_compl(ha, srb); | 331 | if (ha->status_srb == NULL) |
| 332 | qla4xxx_srb_compl(ha, srb); | ||
| 274 | } | 333 | } |
| 275 | 334 | ||
| 276 | /** | 335 | /** |
| @@ -305,10 +364,7 @@ static void qla4xxx_process_response_queue(struct scsi_qla_host * ha) | |||
| 305 | /* process entry */ | 364 | /* process entry */ |
| 306 | switch (sts_entry->hdr.entryType) { | 365 | switch (sts_entry->hdr.entryType) { |
| 307 | case ET_STATUS: | 366 | case ET_STATUS: |
| 308 | /* | 367 | /* Common status */ |
| 309 | * Common status - Single completion posted in single | ||
| 310 | * IOSB. | ||
| 311 | */ | ||
| 312 | qla4xxx_status_entry(ha, sts_entry); | 368 | qla4xxx_status_entry(ha, sts_entry); |
| 313 | break; | 369 | break; |
| 314 | 370 | ||
| @@ -316,9 +372,8 @@ static void qla4xxx_process_response_queue(struct scsi_qla_host * ha) | |||
| 316 | break; | 372 | break; |
| 317 | 373 | ||
| 318 | case ET_STATUS_CONTINUATION: | 374 | case ET_STATUS_CONTINUATION: |
| 319 | /* Just throw away the status continuation entries */ | 375 | qla4xxx_status_cont_entry(ha, |
| 320 | DEBUG2(printk("scsi%ld: %s: Status Continuation entry " | 376 | (struct status_cont_entry *) sts_entry); |
| 321 | "- ignoring\n", ha->host_no, __func__)); | ||
| 322 | break; | 377 | break; |
| 323 | 378 | ||
| 324 | case ET_COMMAND: | 379 | case ET_COMMAND: |
diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 051b0f5e8c8e..09d6d4b76f39 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c | |||
| @@ -385,16 +385,6 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) | |||
| 385 | mbox_sts[0])); | 385 | mbox_sts[0])); |
| 386 | return QLA_ERROR; | 386 | return QLA_ERROR; |
| 387 | } | 387 | } |
| 388 | |||
| 389 | /* High-water mark of IOCBs */ | ||
| 390 | ha->iocb_hiwat = mbox_sts[2]; | ||
| 391 | if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) | ||
| 392 | ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; | ||
| 393 | else | ||
| 394 | dev_info(&ha->pdev->dev, "WARNING!!! You have less than %d " | ||
| 395 | "firmware IOCBs available (%d).\n", | ||
| 396 | IOCB_HIWAT_CUSHION, ha->iocb_hiwat); | ||
| 397 | |||
| 398 | return QLA_SUCCESS; | 388 | return QLA_SUCCESS; |
| 399 | } | 389 | } |
| 400 | 390 | ||
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index ec9da6ce8489..40e3cafb3a9c 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c | |||
| @@ -66,6 +66,7 @@ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess, | |||
| 66 | static int qla4xxx_host_get_param(struct Scsi_Host *shost, | 66 | static int qla4xxx_host_get_param(struct Scsi_Host *shost, |
| 67 | enum iscsi_host_param param, char *buf); | 67 | enum iscsi_host_param param, char *buf); |
| 68 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session); | 68 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session); |
| 69 | static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc); | ||
| 69 | 70 | ||
| 70 | /* | 71 | /* |
| 71 | * SCSI host template entry points | 72 | * SCSI host template entry points |
| @@ -89,6 +90,7 @@ static struct scsi_host_template qla4xxx_driver_template = { | |||
| 89 | .eh_device_reset_handler = qla4xxx_eh_device_reset, | 90 | .eh_device_reset_handler = qla4xxx_eh_device_reset, |
| 90 | .eh_target_reset_handler = qla4xxx_eh_target_reset, | 91 | .eh_target_reset_handler = qla4xxx_eh_target_reset, |
| 91 | .eh_host_reset_handler = qla4xxx_eh_host_reset, | 92 | .eh_host_reset_handler = qla4xxx_eh_host_reset, |
| 93 | .eh_timed_out = qla4xxx_eh_cmd_timed_out, | ||
| 92 | 94 | ||
| 93 | .slave_configure = qla4xxx_slave_configure, | 95 | .slave_configure = qla4xxx_slave_configure, |
| 94 | .slave_alloc = qla4xxx_slave_alloc, | 96 | .slave_alloc = qla4xxx_slave_alloc, |
| @@ -124,6 +126,21 @@ static struct iscsi_transport qla4xxx_iscsi_transport = { | |||
| 124 | 126 | ||
| 125 | static struct scsi_transport_template *qla4xxx_scsi_transport; | 127 | static struct scsi_transport_template *qla4xxx_scsi_transport; |
| 126 | 128 | ||
| 129 | static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc) | ||
| 130 | { | ||
| 131 | struct iscsi_cls_session *session; | ||
| 132 | struct ddb_entry *ddb_entry; | ||
| 133 | |||
| 134 | session = starget_to_session(scsi_target(sc->device)); | ||
| 135 | ddb_entry = session->dd_data; | ||
| 136 | |||
| 137 | /* if we are not logged in then the LLD is going to clean up the cmd */ | ||
| 138 | if (atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) | ||
| 139 | return BLK_EH_RESET_TIMER; | ||
| 140 | else | ||
| 141 | return BLK_EH_NOT_HANDLED; | ||
| 142 | } | ||
| 143 | |||
| 127 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session) | 144 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session) |
| 128 | { | 145 | { |
| 129 | struct ddb_entry *ddb_entry = session->dd_data; | 146 | struct ddb_entry *ddb_entry = session->dd_data; |
| @@ -904,18 +921,17 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha, | |||
| 904 | /* Flush any pending ddb changed AENs */ | 921 | /* Flush any pending ddb changed AENs */ |
| 905 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); | 922 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); |
| 906 | 923 | ||
| 924 | qla4xxx_flush_active_srbs(ha); | ||
| 925 | |||
| 907 | /* Reset the firmware. If successful, function | 926 | /* Reset the firmware. If successful, function |
| 908 | * returns with ISP interrupts enabled. | 927 | * returns with ISP interrupts enabled. |
| 909 | */ | 928 | */ |
| 910 | if (status == QLA_SUCCESS) { | 929 | DEBUG2(printk("scsi%ld: %s - Performing soft reset..\n", |
| 911 | DEBUG2(printk("scsi%ld: %s - Performing soft reset..\n", | 930 | ha->host_no, __func__)); |
| 912 | ha->host_no, __func__)); | 931 | if (ql4xxx_lock_drvr_wait(ha) == QLA_SUCCESS) |
| 913 | qla4xxx_flush_active_srbs(ha); | 932 | status = qla4xxx_soft_reset(ha); |
| 914 | if (ql4xxx_lock_drvr_wait(ha) == QLA_SUCCESS) | 933 | else |
| 915 | status = qla4xxx_soft_reset(ha); | 934 | status = QLA_ERROR; |
| 916 | else | ||
| 917 | status = QLA_ERROR; | ||
| 918 | } | ||
| 919 | 935 | ||
| 920 | /* Flush any pending ddb changed AENs */ | 936 | /* Flush any pending ddb changed AENs */ |
| 921 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); | 937 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); |
| @@ -1527,11 +1543,9 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) | |||
| 1527 | { | 1543 | { |
| 1528 | struct scsi_qla_host *ha = to_qla_host(cmd->device->host); | 1544 | struct scsi_qla_host *ha = to_qla_host(cmd->device->host); |
| 1529 | struct ddb_entry *ddb_entry = cmd->device->hostdata; | 1545 | struct ddb_entry *ddb_entry = cmd->device->hostdata; |
| 1530 | struct srb *sp; | ||
| 1531 | int ret = FAILED, stat; | 1546 | int ret = FAILED, stat; |
| 1532 | 1547 | ||
| 1533 | sp = (struct srb *) cmd->SCp.ptr; | 1548 | if (!ddb_entry) |
| 1534 | if (!sp || !ddb_entry) | ||
| 1535 | return ret; | 1549 | return ret; |
| 1536 | 1550 | ||
| 1537 | dev_info(&ha->pdev->dev, | 1551 | dev_info(&ha->pdev->dev, |
| @@ -1644,7 +1658,7 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd) | |||
| 1644 | ha = (struct scsi_qla_host *) cmd->device->host->hostdata; | 1658 | ha = (struct scsi_qla_host *) cmd->device->host->hostdata; |
| 1645 | 1659 | ||
| 1646 | dev_info(&ha->pdev->dev, | 1660 | dev_info(&ha->pdev->dev, |
| 1647 | "scsi(%ld:%d:%d:%d): ADAPTER RESET ISSUED.\n", ha->host_no, | 1661 | "scsi(%ld:%d:%d:%d): HOST RESET ISSUED.\n", ha->host_no, |
| 1648 | cmd->device->channel, cmd->device->id, cmd->device->lun); | 1662 | cmd->device->channel, cmd->device->id, cmd->device->lun); |
| 1649 | 1663 | ||
| 1650 | if (qla4xxx_wait_for_hba_online(ha) != QLA_SUCCESS) { | 1664 | if (qla4xxx_wait_for_hba_online(ha) != QLA_SUCCESS) { |
diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index ab984cb89cea..6980cb279c81 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h | |||
| @@ -5,5 +5,5 @@ | |||
| 5 | * See LICENSE.qla4xxx for copyright and licensing details. | 5 | * See LICENSE.qla4xxx for copyright and licensing details. |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #define QLA4XXX_DRIVER_VERSION "5.01.00-k8" | 8 | #define QLA4XXX_DRIVER_VERSION "5.01.00-k9" |
| 9 | 9 | ||
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 783e33c65eb7..b47240ca4b19 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c | |||
| @@ -990,7 +990,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, | |||
| 990 | struct iscsi_uevent *ev; | 990 | struct iscsi_uevent *ev; |
| 991 | int len = NLMSG_SPACE(sizeof(*ev) + data_size); | 991 | int len = NLMSG_SPACE(sizeof(*ev) + data_size); |
| 992 | 992 | ||
| 993 | skb = alloc_skb(len, GFP_NOIO); | 993 | skb = alloc_skb(len, GFP_ATOMIC); |
| 994 | if (!skb) { | 994 | if (!skb) { |
| 995 | printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); | 995 | printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); |
| 996 | return -ENOMEM; | 996 | return -ENOMEM; |
| @@ -1012,7 +1012,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, | |||
| 1012 | 1012 | ||
| 1013 | memcpy((char *)ev + sizeof(*ev), data, data_size); | 1013 | memcpy((char *)ev + sizeof(*ev), data, data_size); |
| 1014 | 1014 | ||
| 1015 | return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_NOIO); | 1015 | return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_ATOMIC); |
| 1016 | } | 1016 | } |
| 1017 | EXPORT_SYMBOL_GPL(iscsi_offload_mesg); | 1017 | EXPORT_SYMBOL_GPL(iscsi_offload_mesg); |
| 1018 | 1018 | ||
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5616cd780ff3..b7b9fec67a98 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
| @@ -1840,6 +1840,18 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) | |||
| 1840 | kfree(buffer); | 1840 | kfree(buffer); |
| 1841 | } | 1841 | } |
| 1842 | 1842 | ||
| 1843 | static int sd_try_extended_inquiry(struct scsi_device *sdp) | ||
| 1844 | { | ||
| 1845 | /* | ||
| 1846 | * Although VPD inquiries can go to SCSI-2 type devices, | ||
| 1847 | * some USB ones crash on receiving them, and the pages | ||
| 1848 | * we currently ask for are for SPC-3 and beyond | ||
| 1849 | */ | ||
| 1850 | if (sdp->scsi_level > SCSI_SPC_2) | ||
| 1851 | return 1; | ||
| 1852 | return 0; | ||
| 1853 | } | ||
| 1854 | |||
| 1843 | /** | 1855 | /** |
| 1844 | * sd_revalidate_disk - called the first time a new disk is seen, | 1856 | * sd_revalidate_disk - called the first time a new disk is seen, |
| 1845 | * performs disk spin up, read_capacity, etc. | 1857 | * performs disk spin up, read_capacity, etc. |
| @@ -1877,8 +1889,12 @@ static int sd_revalidate_disk(struct gendisk *disk) | |||
| 1877 | */ | 1889 | */ |
| 1878 | if (sdkp->media_present) { | 1890 | if (sdkp->media_present) { |
| 1879 | sd_read_capacity(sdkp, buffer); | 1891 | sd_read_capacity(sdkp, buffer); |
| 1880 | sd_read_block_limits(sdkp); | 1892 | |
| 1881 | sd_read_block_characteristics(sdkp); | 1893 | if (sd_try_extended_inquiry(sdp)) { |
| 1894 | sd_read_block_limits(sdkp); | ||
| 1895 | sd_read_block_characteristics(sdkp); | ||
| 1896 | } | ||
| 1897 | |||
| 1882 | sd_read_write_protect_flag(sdkp, buffer); | 1898 | sd_read_write_protect_flag(sdkp, buffer); |
| 1883 | sd_read_cache_type(sdkp, buffer); | 1899 | sd_read_cache_type(sdkp, buffer); |
| 1884 | sd_read_app_tag_own(sdkp, buffer); | 1900 | sd_read_app_tag_own(sdkp, buffer); |
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/serial/cpm_uart/cpm_uart_cpm2.c index 141c0a3333ad..a9802e76b5fa 100644 --- a/drivers/serial/cpm_uart/cpm_uart_cpm2.c +++ b/drivers/serial/cpm_uart/cpm_uart_cpm2.c | |||
| @@ -132,7 +132,7 @@ int cpm_uart_allocbuf(struct uart_cpm_port *pinfo, unsigned int is_con) | |||
| 132 | memsz = L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize) + | 132 | memsz = L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize) + |
| 133 | L1_CACHE_ALIGN(pinfo->tx_nrfifos * pinfo->tx_fifosize); | 133 | L1_CACHE_ALIGN(pinfo->tx_nrfifos * pinfo->tx_fifosize); |
| 134 | if (is_con) { | 134 | if (is_con) { |
| 135 | mem_addr = alloc_bootmem(memsz); | 135 | mem_addr = kzalloc(memsz, GFP_NOWAIT); |
| 136 | dma_addr = virt_to_bus(mem_addr); | 136 | dma_addr = virt_to_bus(mem_addr); |
| 137 | } | 137 | } |
| 138 | else | 138 | else |
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index ef7870f5ea08..857b3668b3ba 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c | |||
| @@ -957,9 +957,14 @@ static int __devinit sticore_pci_init(struct pci_dev *pd, | |||
| 957 | #ifdef CONFIG_PCI | 957 | #ifdef CONFIG_PCI |
| 958 | unsigned long fb_base, rom_base; | 958 | unsigned long fb_base, rom_base; |
| 959 | unsigned int fb_len, rom_len; | 959 | unsigned int fb_len, rom_len; |
| 960 | int err; | ||
| 960 | struct sti_struct *sti; | 961 | struct sti_struct *sti; |
| 961 | 962 | ||
| 962 | pci_enable_device(pd); | 963 | err = pci_enable_device(pd); |
| 964 | if (err < 0) { | ||
| 965 | dev_err(&pd->dev, "Cannot enable PCI device\n"); | ||
| 966 | return err; | ||
| 967 | } | ||
| 963 | 968 | ||
| 964 | fb_base = pci_resource_start(pd, 0); | 969 | fb_base = pci_resource_start(pd, 0); |
| 965 | fb_len = pci_resource_len(pd, 0); | 970 | fb_len = pci_resource_len(pd, 0); |
| @@ -1048,7 +1053,7 @@ static void __devinit sti_init_roms(void) | |||
| 1048 | 1053 | ||
| 1049 | /* Register drivers for native & PCI cards */ | 1054 | /* Register drivers for native & PCI cards */ |
| 1050 | register_parisc_driver(&pa_sti_driver); | 1055 | register_parisc_driver(&pa_sti_driver); |
| 1051 | pci_register_driver(&pci_sti_driver); | 1056 | WARN_ON(pci_register_driver(&pci_sti_driver)); |
| 1052 | 1057 | ||
| 1053 | /* if we didn't find the given default sti, take the first one */ | 1058 | /* if we didn't find the given default sti, take the first one */ |
| 1054 | if (!default_sti) | 1059 | if (!default_sti) |
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index bcec78ffc765..248e00ec4dc1 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c | |||
| @@ -52,8 +52,10 @@ struct virtio_pci_device | |||
| 52 | char (*msix_names)[256]; | 52 | char (*msix_names)[256]; |
| 53 | /* Number of available vectors */ | 53 | /* Number of available vectors */ |
| 54 | unsigned msix_vectors; | 54 | unsigned msix_vectors; |
| 55 | /* Vectors allocated */ | 55 | /* Vectors allocated, excluding per-vq vectors if any */ |
| 56 | unsigned msix_used_vectors; | 56 | unsigned msix_used_vectors; |
| 57 | /* Whether we have vector per vq */ | ||
| 58 | bool per_vq_vectors; | ||
| 57 | }; | 59 | }; |
| 58 | 60 | ||
| 59 | /* Constants for MSI-X */ | 61 | /* Constants for MSI-X */ |
| @@ -258,7 +260,6 @@ static void vp_free_vectors(struct virtio_device *vdev) | |||
| 258 | 260 | ||
| 259 | for (i = 0; i < vp_dev->msix_used_vectors; ++i) | 261 | for (i = 0; i < vp_dev->msix_used_vectors; ++i) |
| 260 | free_irq(vp_dev->msix_entries[i].vector, vp_dev); | 262 | free_irq(vp_dev->msix_entries[i].vector, vp_dev); |
| 261 | vp_dev->msix_used_vectors = 0; | ||
| 262 | 263 | ||
| 263 | if (vp_dev->msix_enabled) { | 264 | if (vp_dev->msix_enabled) { |
| 264 | /* Disable the vector used for configuration */ | 265 | /* Disable the vector used for configuration */ |
| @@ -267,80 +268,77 @@ static void vp_free_vectors(struct virtio_device *vdev) | |||
| 267 | /* Flush the write out to device */ | 268 | /* Flush the write out to device */ |
| 268 | ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); | 269 | ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); |
| 269 | 270 | ||
| 270 | vp_dev->msix_enabled = 0; | ||
| 271 | pci_disable_msix(vp_dev->pci_dev); | 271 | pci_disable_msix(vp_dev->pci_dev); |
| 272 | vp_dev->msix_enabled = 0; | ||
| 273 | vp_dev->msix_vectors = 0; | ||
| 272 | } | 274 | } |
| 273 | } | ||
| 274 | 275 | ||
| 275 | static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, | 276 | vp_dev->msix_used_vectors = 0; |
| 276 | int *options, int noptions) | 277 | kfree(vp_dev->msix_names); |
| 277 | { | 278 | vp_dev->msix_names = NULL; |
| 278 | int i; | 279 | kfree(vp_dev->msix_entries); |
| 279 | for (i = 0; i < noptions; ++i) | 280 | vp_dev->msix_entries = NULL; |
| 280 | if (!pci_enable_msix(dev, entries, options[i])) | ||
| 281 | return options[i]; | ||
| 282 | return -EBUSY; | ||
| 283 | } | 281 | } |
| 284 | 282 | ||
| 285 | static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) | 283 | static int vp_request_vectors(struct virtio_device *vdev, int nvectors, |
| 284 | bool per_vq_vectors) | ||
| 286 | { | 285 | { |
| 287 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | 286 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 288 | const char *name = dev_name(&vp_dev->vdev.dev); | 287 | const char *name = dev_name(&vp_dev->vdev.dev); |
| 289 | unsigned i, v; | 288 | unsigned i, v; |
| 290 | int err = -ENOMEM; | 289 | int err = -ENOMEM; |
| 291 | /* We want at most one vector per queue and one for config changes. | 290 | |
| 292 | * Fallback to separate vectors for config and a shared for queues. | 291 | if (!nvectors) { |
| 293 | * Finally fall back to regular interrupts. */ | 292 | /* Can't allocate MSI-X vectors, use regular interrupt */ |
| 294 | int options[] = { max_vqs + 1, 2 }; | 293 | vp_dev->msix_vectors = 0; |
| 295 | int nvectors = max(options[0], options[1]); | 294 | err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, |
| 295 | IRQF_SHARED, name, vp_dev); | ||
| 296 | if (err) | ||
| 297 | return err; | ||
| 298 | vp_dev->intx_enabled = 1; | ||
| 299 | return 0; | ||
| 300 | } | ||
| 296 | 301 | ||
| 297 | vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, | 302 | vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, |
| 298 | GFP_KERNEL); | 303 | GFP_KERNEL); |
| 299 | if (!vp_dev->msix_entries) | 304 | if (!vp_dev->msix_entries) |
| 300 | goto error_entries; | 305 | goto error; |
| 301 | vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, | 306 | vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, |
| 302 | GFP_KERNEL); | 307 | GFP_KERNEL); |
| 303 | if (!vp_dev->msix_names) | 308 | if (!vp_dev->msix_names) |
| 304 | goto error_names; | 309 | goto error; |
| 305 | 310 | ||
| 306 | for (i = 0; i < nvectors; ++i) | 311 | for (i = 0; i < nvectors; ++i) |
| 307 | vp_dev->msix_entries[i].entry = i; | 312 | vp_dev->msix_entries[i].entry = i; |
| 308 | 313 | ||
| 309 | err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, | 314 | err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); |
| 310 | options, ARRAY_SIZE(options)); | 315 | if (err > 0) |
| 311 | if (err < 0) { | 316 | err = -ENOSPC; |
| 312 | /* Can't allocate enough MSI-X vectors, use regular interrupt */ | 317 | if (err) |
| 313 | vp_dev->msix_vectors = 0; | 318 | goto error; |
| 314 | err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, | 319 | vp_dev->msix_vectors = nvectors; |
| 315 | IRQF_SHARED, name, vp_dev); | 320 | vp_dev->msix_enabled = 1; |
| 316 | if (err) | 321 | |
| 317 | goto error_irq; | 322 | /* Set the vector used for configuration */ |
| 318 | vp_dev->intx_enabled = 1; | 323 | v = vp_dev->msix_used_vectors; |
| 319 | } else { | 324 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, |
| 320 | vp_dev->msix_vectors = err; | 325 | "%s-config", name); |
| 321 | vp_dev->msix_enabled = 1; | 326 | err = request_irq(vp_dev->msix_entries[v].vector, |
| 322 | 327 | vp_config_changed, 0, vp_dev->msix_names[v], | |
| 323 | /* Set the vector used for configuration */ | 328 | vp_dev); |
| 324 | v = vp_dev->msix_used_vectors; | 329 | if (err) |
| 325 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, | 330 | goto error; |
| 326 | "%s-config", name); | 331 | ++vp_dev->msix_used_vectors; |
| 327 | err = request_irq(vp_dev->msix_entries[v].vector, | 332 | |
| 328 | vp_config_changed, 0, vp_dev->msix_names[v], | 333 | iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); |
| 329 | vp_dev); | 334 | /* Verify we had enough resources to assign the vector */ |
| 330 | if (err) | 335 | v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); |
| 331 | goto error_irq; | 336 | if (v == VIRTIO_MSI_NO_VECTOR) { |
| 332 | ++vp_dev->msix_used_vectors; | 337 | err = -EBUSY; |
| 333 | 338 | goto error; | |
| 334 | iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); | ||
| 335 | /* Verify we had enough resources to assign the vector */ | ||
| 336 | v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); | ||
| 337 | if (v == VIRTIO_MSI_NO_VECTOR) { | ||
| 338 | err = -EBUSY; | ||
| 339 | goto error_irq; | ||
| 340 | } | ||
| 341 | } | 339 | } |
| 342 | 340 | ||
| 343 | if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) { | 341 | if (!per_vq_vectors) { |
| 344 | /* Shared vector for all VQs */ | 342 | /* Shared vector for all VQs */ |
| 345 | v = vp_dev->msix_used_vectors; | 343 | v = vp_dev->msix_used_vectors; |
| 346 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, | 344 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, |
| @@ -349,28 +347,25 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) | |||
| 349 | vp_vring_interrupt, 0, vp_dev->msix_names[v], | 347 | vp_vring_interrupt, 0, vp_dev->msix_names[v], |
| 350 | vp_dev); | 348 | vp_dev); |
| 351 | if (err) | 349 | if (err) |
| 352 | goto error_irq; | 350 | goto error; |
| 353 | ++vp_dev->msix_used_vectors; | 351 | ++vp_dev->msix_used_vectors; |
| 354 | } | 352 | } |
| 355 | return 0; | 353 | return 0; |
| 356 | error_irq: | 354 | error: |
| 357 | vp_free_vectors(vdev); | 355 | vp_free_vectors(vdev); |
| 358 | kfree(vp_dev->msix_names); | ||
| 359 | error_names: | ||
| 360 | kfree(vp_dev->msix_entries); | ||
| 361 | error_entries: | ||
| 362 | return err; | 356 | return err; |
| 363 | } | 357 | } |
| 364 | 358 | ||
| 365 | static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | 359 | static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, |
| 366 | void (*callback)(struct virtqueue *vq), | 360 | void (*callback)(struct virtqueue *vq), |
| 367 | const char *name) | 361 | const char *name, |
| 362 | u16 vector) | ||
| 368 | { | 363 | { |
| 369 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | 364 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 370 | struct virtio_pci_vq_info *info; | 365 | struct virtio_pci_vq_info *info; |
| 371 | struct virtqueue *vq; | 366 | struct virtqueue *vq; |
| 372 | unsigned long flags, size; | 367 | unsigned long flags, size; |
| 373 | u16 num, vector; | 368 | u16 num; |
| 374 | int err; | 369 | int err; |
| 375 | 370 | ||
| 376 | /* Select the queue we're interested in */ | 371 | /* Select the queue we're interested in */ |
| @@ -389,7 +384,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | |||
| 389 | 384 | ||
| 390 | info->queue_index = index; | 385 | info->queue_index = index; |
| 391 | info->num = num; | 386 | info->num = num; |
| 392 | info->vector = VIRTIO_MSI_NO_VECTOR; | 387 | info->vector = vector; |
| 393 | 388 | ||
| 394 | size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); | 389 | size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); |
| 395 | info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); | 390 | info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); |
| @@ -413,22 +408,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | |||
| 413 | vq->priv = info; | 408 | vq->priv = info; |
| 414 | info->vq = vq; | 409 | info->vq = vq; |
| 415 | 410 | ||
| 416 | /* allocate per-vq vector if available and necessary */ | 411 | if (vector != VIRTIO_MSI_NO_VECTOR) { |
| 417 | if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) { | ||
| 418 | vector = vp_dev->msix_used_vectors; | ||
| 419 | snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, | ||
| 420 | "%s-%s", dev_name(&vp_dev->vdev.dev), name); | ||
| 421 | err = request_irq(vp_dev->msix_entries[vector].vector, | ||
| 422 | vring_interrupt, 0, | ||
| 423 | vp_dev->msix_names[vector], vq); | ||
| 424 | if (err) | ||
| 425 | goto out_request_irq; | ||
| 426 | info->vector = vector; | ||
| 427 | ++vp_dev->msix_used_vectors; | ||
| 428 | } else | ||
| 429 | vector = VP_MSIX_VQ_VECTOR; | ||
| 430 | |||
| 431 | if (callback && vp_dev->msix_enabled) { | ||
| 432 | iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); | 412 | iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); |
| 433 | vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); | 413 | vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); |
| 434 | if (vector == VIRTIO_MSI_NO_VECTOR) { | 414 | if (vector == VIRTIO_MSI_NO_VECTOR) { |
| @@ -444,11 +424,6 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | |||
| 444 | return vq; | 424 | return vq; |
| 445 | 425 | ||
| 446 | out_assign: | 426 | out_assign: |
| 447 | if (info->vector != VIRTIO_MSI_NO_VECTOR) { | ||
| 448 | free_irq(vp_dev->msix_entries[info->vector].vector, vq); | ||
| 449 | --vp_dev->msix_used_vectors; | ||
| 450 | } | ||
| 451 | out_request_irq: | ||
| 452 | vring_del_virtqueue(vq); | 427 | vring_del_virtqueue(vq); |
| 453 | out_activate_queue: | 428 | out_activate_queue: |
| 454 | iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); | 429 | iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); |
| @@ -462,12 +437,13 @@ static void vp_del_vq(struct virtqueue *vq) | |||
| 462 | { | 437 | { |
| 463 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); | 438 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); |
| 464 | struct virtio_pci_vq_info *info = vq->priv; | 439 | struct virtio_pci_vq_info *info = vq->priv; |
| 465 | unsigned long size; | 440 | unsigned long flags, size; |
| 466 | 441 | ||
| 467 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); | 442 | spin_lock_irqsave(&vp_dev->lock, flags); |
| 443 | list_del(&info->node); | ||
| 444 | spin_unlock_irqrestore(&vp_dev->lock, flags); | ||
| 468 | 445 | ||
| 469 | if (info->vector != VIRTIO_MSI_NO_VECTOR) | 446 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); |
| 470 | free_irq(vp_dev->msix_entries[info->vector].vector, vq); | ||
| 471 | 447 | ||
| 472 | if (vp_dev->msix_enabled) { | 448 | if (vp_dev->msix_enabled) { |
| 473 | iowrite16(VIRTIO_MSI_NO_VECTOR, | 449 | iowrite16(VIRTIO_MSI_NO_VECTOR, |
| @@ -489,36 +465,62 @@ static void vp_del_vq(struct virtqueue *vq) | |||
| 489 | /* the config->del_vqs() implementation */ | 465 | /* the config->del_vqs() implementation */ |
| 490 | static void vp_del_vqs(struct virtio_device *vdev) | 466 | static void vp_del_vqs(struct virtio_device *vdev) |
| 491 | { | 467 | { |
| 468 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | ||
| 492 | struct virtqueue *vq, *n; | 469 | struct virtqueue *vq, *n; |
| 470 | struct virtio_pci_vq_info *info; | ||
| 493 | 471 | ||
| 494 | list_for_each_entry_safe(vq, n, &vdev->vqs, list) | 472 | list_for_each_entry_safe(vq, n, &vdev->vqs, list) { |
| 473 | info = vq->priv; | ||
| 474 | if (vp_dev->per_vq_vectors) | ||
| 475 | free_irq(vp_dev->msix_entries[info->vector].vector, vq); | ||
| 495 | vp_del_vq(vq); | 476 | vp_del_vq(vq); |
| 477 | } | ||
| 478 | vp_dev->per_vq_vectors = false; | ||
| 496 | 479 | ||
| 497 | vp_free_vectors(vdev); | 480 | vp_free_vectors(vdev); |
| 498 | } | 481 | } |
| 499 | 482 | ||
| 500 | /* the config->find_vqs() implementation */ | 483 | static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, |
| 501 | static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, | 484 | struct virtqueue *vqs[], |
| 502 | struct virtqueue *vqs[], | 485 | vq_callback_t *callbacks[], |
| 503 | vq_callback_t *callbacks[], | 486 | const char *names[], |
| 504 | const char *names[]) | 487 | int nvectors, |
| 488 | bool per_vq_vectors) | ||
| 505 | { | 489 | { |
| 506 | int vectors = 0; | 490 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 507 | int i, err; | 491 | u16 vector; |
| 508 | 492 | int i, err, allocated_vectors; | |
| 509 | /* How many vectors would we like? */ | ||
| 510 | for (i = 0; i < nvqs; ++i) | ||
| 511 | if (callbacks[i]) | ||
| 512 | ++vectors; | ||
| 513 | 493 | ||
| 514 | err = vp_request_vectors(vdev, vectors); | 494 | err = vp_request_vectors(vdev, nvectors, per_vq_vectors); |
| 515 | if (err) | 495 | if (err) |
| 516 | goto error_request; | 496 | goto error_request; |
| 517 | 497 | ||
| 498 | vp_dev->per_vq_vectors = per_vq_vectors; | ||
| 499 | allocated_vectors = vp_dev->msix_used_vectors; | ||
| 518 | for (i = 0; i < nvqs; ++i) { | 500 | for (i = 0; i < nvqs; ++i) { |
| 519 | vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]); | 501 | if (!callbacks[i] || !vp_dev->msix_enabled) |
| 520 | if (IS_ERR(vqs[i])) | 502 | vector = VIRTIO_MSI_NO_VECTOR; |
| 503 | else if (vp_dev->per_vq_vectors) | ||
| 504 | vector = allocated_vectors++; | ||
| 505 | else | ||
| 506 | vector = VP_MSIX_VQ_VECTOR; | ||
| 507 | vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i], vector); | ||
| 508 | if (IS_ERR(vqs[i])) { | ||
| 509 | err = PTR_ERR(vqs[i]); | ||
| 521 | goto error_find; | 510 | goto error_find; |
| 511 | } | ||
| 512 | /* allocate per-vq irq if available and necessary */ | ||
| 513 | if (vp_dev->per_vq_vectors && vector != VIRTIO_MSI_NO_VECTOR) { | ||
| 514 | snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, | ||
| 515 | "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); | ||
| 516 | err = request_irq(vp_dev->msix_entries[vector].vector, | ||
| 517 | vring_interrupt, 0, | ||
| 518 | vp_dev->msix_names[vector], vqs[i]); | ||
| 519 | if (err) { | ||
| 520 | vp_del_vq(vqs[i]); | ||
| 521 | goto error_find; | ||
| 522 | } | ||
| 523 | } | ||
| 522 | } | 524 | } |
| 523 | return 0; | 525 | return 0; |
| 524 | 526 | ||
| @@ -526,7 +528,37 @@ error_find: | |||
| 526 | vp_del_vqs(vdev); | 528 | vp_del_vqs(vdev); |
| 527 | 529 | ||
| 528 | error_request: | 530 | error_request: |
| 529 | return PTR_ERR(vqs[i]); | 531 | return err; |
| 532 | } | ||
| 533 | |||
| 534 | /* the config->find_vqs() implementation */ | ||
| 535 | static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, | ||
| 536 | struct virtqueue *vqs[], | ||
| 537 | vq_callback_t *callbacks[], | ||
| 538 | const char *names[]) | ||
| 539 | { | ||
| 540 | int vectors = 0; | ||
| 541 | int i, uninitialized_var(err); | ||
| 542 | |||
| 543 | /* How many vectors would we like? */ | ||
| 544 | for (i = 0; i < nvqs; ++i) | ||
| 545 | if (callbacks[i]) | ||
| 546 | ++vectors; | ||
| 547 | |||
| 548 | /* We want at most one vector per queue and one for config changes. */ | ||
| 549 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, | ||
| 550 | vectors + 1, true); | ||
| 551 | if (!err) | ||
| 552 | return 0; | ||
| 553 | /* Fallback to separate vectors for config and a shared for queues. */ | ||
| 554 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, | ||
| 555 | 2, false); | ||
| 556 | if (!err) | ||
| 557 | return 0; | ||
| 558 | /* Finally fall back to regular interrupts. */ | ||
| 559 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, | ||
| 560 | 0, false); | ||
| 561 | return err; | ||
| 530 | } | 562 | } |
| 531 | 563 | ||
| 532 | static struct virtio_config_ops virtio_pci_config_ops = { | 564 | static struct virtio_config_ops virtio_pci_config_ops = { |
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c index fecb307d28e9..aec7cefdef21 100644 --- a/drivers/watchdog/coh901327_wdt.c +++ b/drivers/watchdog/coh901327_wdt.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
| 19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
| 20 | #include <linux/clk.h> | 20 | #include <linux/clk.h> |
| 21 | #include <linux/delay.h> | ||
| 21 | 22 | ||
| 22 | #define DRV_NAME "WDOG COH 901 327" | 23 | #define DRV_NAME "WDOG COH 901 327" |
| 23 | 24 | ||
| @@ -92,6 +93,8 @@ static struct clk *clk; | |||
| 92 | static void coh901327_enable(u16 timeout) | 93 | static void coh901327_enable(u16 timeout) |
| 93 | { | 94 | { |
| 94 | u16 val; | 95 | u16 val; |
| 96 | unsigned long freq; | ||
| 97 | unsigned long delay_ns; | ||
| 95 | 98 | ||
| 96 | clk_enable(clk); | 99 | clk_enable(clk); |
| 97 | /* Restart timer if it is disabled */ | 100 | /* Restart timer if it is disabled */ |
| @@ -102,6 +105,14 @@ static void coh901327_enable(u16 timeout) | |||
| 102 | /* Acknowledge any pending interrupt so it doesn't just fire off */ | 105 | /* Acknowledge any pending interrupt so it doesn't just fire off */ |
| 103 | writew(U300_WDOG_IER_WILL_BARK_IRQ_ACK_ENABLE, | 106 | writew(U300_WDOG_IER_WILL_BARK_IRQ_ACK_ENABLE, |
| 104 | virtbase + U300_WDOG_IER); | 107 | virtbase + U300_WDOG_IER); |
| 108 | /* | ||
| 109 | * The interrupt is cleared in the 32 kHz clock domain. | ||
| 110 | * Wait 3 32 kHz cycles for it to take effect | ||
| 111 | */ | ||
| 112 | freq = clk_get_rate(clk); | ||
| 113 | delay_ns = (1000000000 + freq - 1) / freq; /* Freq to ns and round up */ | ||
| 114 | delay_ns = 3 * delay_ns; /* Wait 3 cycles */ | ||
| 115 | ndelay(delay_ns); | ||
| 105 | /* Enable the watchdog interrupt */ | 116 | /* Enable the watchdog interrupt */ |
| 106 | writew(U300_WDOG_IMR_WILL_BARK_IRQ_ENABLE, virtbase + U300_WDOG_IMR); | 117 | writew(U300_WDOG_IMR_WILL_BARK_IRQ_ENABLE, virtbase + U300_WDOG_IMR); |
| 107 | /* Activate the watchdog timer */ | 118 | /* Activate the watchdog timer */ |
