diff options
author | Rafael J. Wysocki <rjw@sisk.pl> | 2009-08-05 17:56:54 -0400 |
---|---|---|
committer | Rafael J. Wysocki <rjw@sisk.pl> | 2009-08-05 17:56:54 -0400 |
commit | c00aafcd4977769e8728292302ddbbb8b1082fab (patch) | |
tree | 5766bcfbfd7b24816b54298b8ef34054f8cf0fae /drivers | |
parent | 2e6713c7662cc5ebc7346b033c404cb2f708fd51 (diff) | |
parent | 90bc1a658a53f8832ee799685703977a450e5af9 (diff) |
Merge branch 'master' into for-linus
Diffstat (limited to 'drivers')
128 files changed, 4677 insertions, 1850 deletions
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 7a0f4aa4fa1e..9a62224cc278 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c | |||
@@ -38,6 +38,9 @@ | |||
38 | 38 | ||
39 | #define _COMPONENT ACPI_MEMORY_DEVICE_COMPONENT | 39 | #define _COMPONENT ACPI_MEMORY_DEVICE_COMPONENT |
40 | 40 | ||
41 | #undef PREFIX | ||
42 | #define PREFIX "ACPI:memory_hp:" | ||
43 | |||
41 | ACPI_MODULE_NAME("acpi_memhotplug"); | 44 | ACPI_MODULE_NAME("acpi_memhotplug"); |
42 | MODULE_AUTHOR("Naveen B S <naveen.b.s@intel.com>"); | 45 | MODULE_AUTHOR("Naveen B S <naveen.b.s@intel.com>"); |
43 | MODULE_DESCRIPTION("Hotplug Mem Driver"); | 46 | MODULE_DESCRIPTION("Hotplug Mem Driver"); |
@@ -153,6 +156,7 @@ acpi_memory_get_device(acpi_handle handle, | |||
153 | acpi_handle phandle; | 156 | acpi_handle phandle; |
154 | struct acpi_device *device = NULL; | 157 | struct acpi_device *device = NULL; |
155 | struct acpi_device *pdevice = NULL; | 158 | struct acpi_device *pdevice = NULL; |
159 | int result; | ||
156 | 160 | ||
157 | 161 | ||
158 | if (!acpi_bus_get_device(handle, &device) && device) | 162 | if (!acpi_bus_get_device(handle, &device) && device) |
@@ -165,9 +169,9 @@ acpi_memory_get_device(acpi_handle handle, | |||
165 | } | 169 | } |
166 | 170 | ||
167 | /* Get the parent device */ | 171 | /* Get the parent device */ |
168 | status = acpi_bus_get_device(phandle, &pdevice); | 172 | result = acpi_bus_get_device(phandle, &pdevice); |
169 | if (ACPI_FAILURE(status)) { | 173 | if (result) { |
170 | ACPI_EXCEPTION((AE_INFO, status, "Cannot get acpi bus device")); | 174 | printk(KERN_WARNING PREFIX "Cannot get acpi bus device"); |
171 | return -EINVAL; | 175 | return -EINVAL; |
172 | } | 176 | } |
173 | 177 | ||
@@ -175,9 +179,9 @@ acpi_memory_get_device(acpi_handle handle, | |||
175 | * Now add the notified device. This creates the acpi_device | 179 | * Now add the notified device. This creates the acpi_device |
176 | * and invokes .add function | 180 | * and invokes .add function |
177 | */ | 181 | */ |
178 | status = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE); | 182 | result = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE); |
179 | if (ACPI_FAILURE(status)) { | 183 | if (result) { |
180 | ACPI_EXCEPTION((AE_INFO, status, "Cannot add acpi bus")); | 184 | printk(KERN_WARNING PREFIX "Cannot add acpi bus"); |
181 | return -EINVAL; | 185 | return -EINVAL; |
182 | } | 186 | } |
183 | 187 | ||
@@ -238,7 +242,12 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) | |||
238 | num_enabled++; | 242 | num_enabled++; |
239 | continue; | 243 | continue; |
240 | } | 244 | } |
241 | 245 | /* | |
246 | * If the memory block size is zero, please ignore it. | ||
247 | * Don't try to do the following memory hotplug flowchart. | ||
248 | */ | ||
249 | if (!info->length) | ||
250 | continue; | ||
242 | if (node < 0) | 251 | if (node < 0) |
243 | node = memory_add_physaddr_to_nid(info->start_addr); | 252 | node = memory_add_physaddr_to_nid(info->start_addr); |
244 | 253 | ||
@@ -253,8 +262,15 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) | |||
253 | mem_device->state = MEMORY_INVALID_STATE; | 262 | mem_device->state = MEMORY_INVALID_STATE; |
254 | return -EINVAL; | 263 | return -EINVAL; |
255 | } | 264 | } |
256 | 265 | /* | |
257 | return result; | 266 | * Sometimes the memory device will contain several memory blocks. |
267 | * When one memory block is hot-added to the system memory, it will | ||
268 | * be regarded as a success. | ||
269 | * Otherwise if the last memory block can't be hot-added to the system | ||
270 | * memory, it will be failure and the memory device can't be bound with | ||
271 | * driver. | ||
272 | */ | ||
273 | return 0; | ||
258 | } | 274 | } |
259 | 275 | ||
260 | static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device) | 276 | static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device) |
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h index 544dcf834922..eb6f038b03d9 100644 --- a/drivers/acpi/acpica/acobject.h +++ b/drivers/acpi/acpica/acobject.h | |||
@@ -97,6 +97,7 @@ | |||
97 | #define AOPOBJ_OBJECT_INITIALIZED 0x08 | 97 | #define AOPOBJ_OBJECT_INITIALIZED 0x08 |
98 | #define AOPOBJ_SETUP_COMPLETE 0x10 | 98 | #define AOPOBJ_SETUP_COMPLETE 0x10 |
99 | #define AOPOBJ_SINGLE_DATUM 0x20 | 99 | #define AOPOBJ_SINGLE_DATUM 0x20 |
100 | #define AOPOBJ_INVALID 0x40 /* Used if host OS won't allow an op_region address */ | ||
100 | 101 | ||
101 | /****************************************************************************** | 102 | /****************************************************************************** |
102 | * | 103 | * |
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c index 584d766e6f12..b79978f7bc71 100644 --- a/drivers/acpi/acpica/dsopcode.c +++ b/drivers/acpi/acpica/dsopcode.c | |||
@@ -397,6 +397,30 @@ acpi_status acpi_ds_get_region_arguments(union acpi_operand_object *obj_desc) | |||
397 | status = acpi_ds_execute_arguments(node, acpi_ns_get_parent_node(node), | 397 | status = acpi_ds_execute_arguments(node, acpi_ns_get_parent_node(node), |
398 | extra_desc->extra.aml_length, | 398 | extra_desc->extra.aml_length, |
399 | extra_desc->extra.aml_start); | 399 | extra_desc->extra.aml_start); |
400 | if (ACPI_FAILURE(status)) { | ||
401 | return_ACPI_STATUS(status); | ||
402 | } | ||
403 | |||
404 | /* Validate the region address/length via the host OS */ | ||
405 | |||
406 | status = acpi_os_validate_address(obj_desc->region.space_id, | ||
407 | obj_desc->region.address, | ||
408 | (acpi_size) obj_desc->region.length, | ||
409 | acpi_ut_get_node_name(node)); | ||
410 | |||
411 | if (ACPI_FAILURE(status)) { | ||
412 | /* | ||
413 | * Invalid address/length. We will emit an error message and mark | ||
414 | * the region as invalid, so that it will cause an additional error if | ||
415 | * it is ever used. Then return AE_OK. | ||
416 | */ | ||
417 | ACPI_EXCEPTION((AE_INFO, status, | ||
418 | "During address validation of OpRegion [%4.4s]", | ||
419 | node->name.ascii)); | ||
420 | obj_desc->common.flags |= AOPOBJ_INVALID; | ||
421 | status = AE_OK; | ||
422 | } | ||
423 | |||
400 | return_ACPI_STATUS(status); | 424 | return_ACPI_STATUS(status); |
401 | } | 425 | } |
402 | 426 | ||
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index d4075b821021..6687be167f5f 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c | |||
@@ -113,6 +113,12 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc, | |||
113 | } | 113 | } |
114 | } | 114 | } |
115 | 115 | ||
116 | /* Exit if Address/Length have been disallowed by the host OS */ | ||
117 | |||
118 | if (rgn_desc->common.flags & AOPOBJ_INVALID) { | ||
119 | return_ACPI_STATUS(AE_AML_ILLEGAL_ADDRESS); | ||
120 | } | ||
121 | |||
116 | /* | 122 | /* |
117 | * Exit now for SMBus address space, it has a non-linear address space | 123 | * Exit now for SMBus address space, it has a non-linear address space |
118 | * and the request cannot be directly validated | 124 | * and the request cannot be directly validated |
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 71670719d61a..5691f165a952 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c | |||
@@ -189,11 +189,36 @@ acpi_status __init acpi_os_initialize(void) | |||
189 | return AE_OK; | 189 | return AE_OK; |
190 | } | 190 | } |
191 | 191 | ||
192 | static void bind_to_cpu0(struct work_struct *work) | ||
193 | { | ||
194 | set_cpus_allowed(current, cpumask_of_cpu(0)); | ||
195 | kfree(work); | ||
196 | } | ||
197 | |||
198 | static void bind_workqueue(struct workqueue_struct *wq) | ||
199 | { | ||
200 | struct work_struct *work; | ||
201 | |||
202 | work = kzalloc(sizeof(struct work_struct), GFP_KERNEL); | ||
203 | INIT_WORK(work, bind_to_cpu0); | ||
204 | queue_work(wq, work); | ||
205 | } | ||
206 | |||
192 | acpi_status acpi_os_initialize1(void) | 207 | acpi_status acpi_os_initialize1(void) |
193 | { | 208 | { |
209 | /* | ||
210 | * On some machines, a software-initiated SMI causes corruption unless | ||
211 | * the SMI runs on CPU 0. An SMI can be initiated by any AML, but | ||
212 | * typically it's done in GPE-related methods that are run via | ||
213 | * workqueues, so we can avoid the known corruption cases by binding | ||
214 | * the workqueues to CPU 0. | ||
215 | */ | ||
194 | kacpid_wq = create_singlethread_workqueue("kacpid"); | 216 | kacpid_wq = create_singlethread_workqueue("kacpid"); |
217 | bind_workqueue(kacpid_wq); | ||
195 | kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); | 218 | kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); |
219 | bind_workqueue(kacpi_notify_wq); | ||
196 | kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug"); | 220 | kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug"); |
221 | bind_workqueue(kacpi_hotplug_wq); | ||
197 | BUG_ON(!kacpid_wq); | 222 | BUG_ON(!kacpid_wq); |
198 | BUG_ON(!kacpi_notify_wq); | 223 | BUG_ON(!kacpi_notify_wq); |
199 | BUG_ON(!kacpi_hotplug_wq); | 224 | BUG_ON(!kacpi_hotplug_wq); |
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index 0944daec064f..9c61ab2177cf 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c | |||
@@ -121,7 +121,7 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr, | |||
121 | table_attr->attr.size = 0; | 121 | table_attr->attr.size = 0; |
122 | table_attr->attr.read = acpi_table_show; | 122 | table_attr->attr.read = acpi_table_show; |
123 | table_attr->attr.attr.name = table_attr->name; | 123 | table_attr->attr.attr.name = table_attr->name; |
124 | table_attr->attr.attr.mode = 0444; | 124 | table_attr->attr.attr.mode = 0400; |
125 | 125 | ||
126 | return; | 126 | return; |
127 | } | 127 | } |
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index f703f5478246..6d7fbaa92248 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c | |||
@@ -36,7 +36,6 @@ | |||
36 | 36 | ||
37 | /* Register offsets */ | 37 | /* Register offsets */ |
38 | #define MG_BUFF_OFFSET 0x8000 | 38 | #define MG_BUFF_OFFSET 0x8000 |
39 | #define MG_STORAGE_BUFFER_SIZE 0x200 | ||
40 | #define MG_REG_OFFSET 0xC000 | 39 | #define MG_REG_OFFSET 0xC000 |
41 | #define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ | 40 | #define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ |
42 | #define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ | 41 | #define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ |
@@ -219,6 +218,16 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) | |||
219 | host->error = MG_ERR_NONE; | 218 | host->error = MG_ERR_NONE; |
220 | expire = jiffies + msecs_to_jiffies(msec); | 219 | expire = jiffies + msecs_to_jiffies(msec); |
221 | 220 | ||
221 | /* These 2 times dummy status read prevents reading invalid | ||
222 | * status. A very little time (3 times of mflash operating clk) | ||
223 | * is required for busy bit is set. Use dummy read instead of | ||
224 | * busy wait, because mflash's PLL is machine dependent. | ||
225 | */ | ||
226 | if (prv_data->use_polling) { | ||
227 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | ||
228 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | ||
229 | } | ||
230 | |||
222 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | 231 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); |
223 | 232 | ||
224 | do { | 233 | do { |
@@ -245,8 +254,6 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) | |||
245 | mg_dump_status("not ready", status, host); | 254 | mg_dump_status("not ready", status, host); |
246 | return MG_ERR_INV_STAT; | 255 | return MG_ERR_INV_STAT; |
247 | } | 256 | } |
248 | if (prv_data->use_polling) | ||
249 | msleep(1); | ||
250 | 257 | ||
251 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); | 258 | status = inb((unsigned long)host->dev_base + MG_REG_STATUS); |
252 | } while (time_before(cur_jiffies, expire)); | 259 | } while (time_before(cur_jiffies, expire)); |
@@ -469,9 +476,18 @@ static unsigned int mg_out(struct mg_host *host, | |||
469 | return MG_ERR_NONE; | 476 | return MG_ERR_NONE; |
470 | } | 477 | } |
471 | 478 | ||
479 | static void mg_read_one(struct mg_host *host, struct request *req) | ||
480 | { | ||
481 | u16 *buff = (u16 *)req->buffer; | ||
482 | u32 i; | ||
483 | |||
484 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | ||
485 | *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + | ||
486 | (i << 1)); | ||
487 | } | ||
488 | |||
472 | static void mg_read(struct request *req) | 489 | static void mg_read(struct request *req) |
473 | { | 490 | { |
474 | u32 j; | ||
475 | struct mg_host *host = req->rq_disk->private_data; | 491 | struct mg_host *host = req->rq_disk->private_data; |
476 | 492 | ||
477 | if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), | 493 | if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), |
@@ -482,49 +498,65 @@ static void mg_read(struct request *req) | |||
482 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); | 498 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); |
483 | 499 | ||
484 | do { | 500 | do { |
485 | u16 *buff = (u16 *)req->buffer; | ||
486 | |||
487 | if (mg_wait(host, ATA_DRQ, | 501 | if (mg_wait(host, ATA_DRQ, |
488 | MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { | 502 | MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { |
489 | mg_bad_rw_intr(host); | 503 | mg_bad_rw_intr(host); |
490 | return; | 504 | return; |
491 | } | 505 | } |
492 | for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) | 506 | |
493 | *buff++ = inw((unsigned long)host->dev_base + | 507 | mg_read_one(host, req); |
494 | MG_BUFF_OFFSET + (j << 1)); | ||
495 | 508 | ||
496 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + | 509 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + |
497 | MG_REG_COMMAND); | 510 | MG_REG_COMMAND); |
498 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); | 511 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); |
499 | } | 512 | } |
500 | 513 | ||
514 | static void mg_write_one(struct mg_host *host, struct request *req) | ||
515 | { | ||
516 | u16 *buff = (u16 *)req->buffer; | ||
517 | u32 i; | ||
518 | |||
519 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | ||
520 | outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET + | ||
521 | (i << 1)); | ||
522 | } | ||
523 | |||
501 | static void mg_write(struct request *req) | 524 | static void mg_write(struct request *req) |
502 | { | 525 | { |
503 | u32 j; | ||
504 | struct mg_host *host = req->rq_disk->private_data; | 526 | struct mg_host *host = req->rq_disk->private_data; |
527 | unsigned int rem = blk_rq_sectors(req); | ||
505 | 528 | ||
506 | if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), | 529 | if (mg_out(host, blk_rq_pos(req), rem, |
507 | MG_CMD_WR, NULL) != MG_ERR_NONE) { | 530 | MG_CMD_WR, NULL) != MG_ERR_NONE) { |
508 | mg_bad_rw_intr(host); | 531 | mg_bad_rw_intr(host); |
509 | return; | 532 | return; |
510 | } | 533 | } |
511 | 534 | ||
512 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", | 535 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", |
513 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); | 536 | rem, blk_rq_pos(req), req->buffer); |
537 | |||
538 | if (mg_wait(host, ATA_DRQ, | ||
539 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | ||
540 | mg_bad_rw_intr(host); | ||
541 | return; | ||
542 | } | ||
514 | 543 | ||
515 | do { | 544 | do { |
516 | u16 *buff = (u16 *)req->buffer; | 545 | mg_write_one(host, req); |
517 | 546 | ||
518 | if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | 547 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + |
548 | MG_REG_COMMAND); | ||
549 | |||
550 | rem--; | ||
551 | if (rem > 1 && mg_wait(host, ATA_DRQ, | ||
552 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | ||
553 | mg_bad_rw_intr(host); | ||
554 | return; | ||
555 | } else if (mg_wait(host, MG_STAT_READY, | ||
556 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | ||
519 | mg_bad_rw_intr(host); | 557 | mg_bad_rw_intr(host); |
520 | return; | 558 | return; |
521 | } | 559 | } |
522 | for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) | ||
523 | outw(*buff++, (unsigned long)host->dev_base + | ||
524 | MG_BUFF_OFFSET + (j << 1)); | ||
525 | |||
526 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + | ||
527 | MG_REG_COMMAND); | ||
528 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); | 560 | } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); |
529 | } | 561 | } |
530 | 562 | ||
@@ -532,7 +564,6 @@ static void mg_read_intr(struct mg_host *host) | |||
532 | { | 564 | { |
533 | struct request *req = host->req; | 565 | struct request *req = host->req; |
534 | u32 i; | 566 | u32 i; |
535 | u16 *buff; | ||
536 | 567 | ||
537 | /* check status */ | 568 | /* check status */ |
538 | do { | 569 | do { |
@@ -550,13 +581,7 @@ static void mg_read_intr(struct mg_host *host) | |||
550 | return; | 581 | return; |
551 | 582 | ||
552 | ok_to_read: | 583 | ok_to_read: |
553 | /* get current segment of request */ | 584 | mg_read_one(host, req); |
554 | buff = (u16 *)req->buffer; | ||
555 | |||
556 | /* read 1 sector */ | ||
557 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | ||
558 | *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + | ||
559 | (i << 1)); | ||
560 | 585 | ||
561 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 586 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
562 | blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); | 587 | blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); |
@@ -575,8 +600,7 @@ ok_to_read: | |||
575 | static void mg_write_intr(struct mg_host *host) | 600 | static void mg_write_intr(struct mg_host *host) |
576 | { | 601 | { |
577 | struct request *req = host->req; | 602 | struct request *req = host->req; |
578 | u32 i, j; | 603 | u32 i; |
579 | u16 *buff; | ||
580 | bool rem; | 604 | bool rem; |
581 | 605 | ||
582 | /* check status */ | 606 | /* check status */ |
@@ -597,12 +621,7 @@ static void mg_write_intr(struct mg_host *host) | |||
597 | ok_to_write: | 621 | ok_to_write: |
598 | if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) { | 622 | if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) { |
599 | /* write 1 sector and set handler if remains */ | 623 | /* write 1 sector and set handler if remains */ |
600 | buff = (u16 *)req->buffer; | 624 | mg_write_one(host, req); |
601 | for (j = 0; j < MG_STORAGE_BUFFER_SIZE >> 1; j++) { | ||
602 | outw(*buff, (unsigned long)host->dev_base + | ||
603 | MG_BUFF_OFFSET + (j << 1)); | ||
604 | buff++; | ||
605 | } | ||
606 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 625 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
607 | blk_rq_pos(req), blk_rq_sectors(req), req->buffer); | 626 | blk_rq_pos(req), blk_rq_sectors(req), req->buffer); |
608 | host->mg_do_intr = mg_write_intr; | 627 | host->mg_do_intr = mg_write_intr; |
@@ -667,9 +686,6 @@ static unsigned int mg_issue_req(struct request *req, | |||
667 | unsigned int sect_num, | 686 | unsigned int sect_num, |
668 | unsigned int sect_cnt) | 687 | unsigned int sect_cnt) |
669 | { | 688 | { |
670 | u16 *buff; | ||
671 | u32 i; | ||
672 | |||
673 | switch (rq_data_dir(req)) { | 689 | switch (rq_data_dir(req)) { |
674 | case READ: | 690 | case READ: |
675 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) | 691 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) |
@@ -693,12 +709,7 @@ static unsigned int mg_issue_req(struct request *req, | |||
693 | mg_bad_rw_intr(host); | 709 | mg_bad_rw_intr(host); |
694 | return host->error; | 710 | return host->error; |
695 | } | 711 | } |
696 | buff = (u16 *)req->buffer; | 712 | mg_write_one(host, req); |
697 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) { | ||
698 | outw(*buff, (unsigned long)host->dev_base + | ||
699 | MG_BUFF_OFFSET + (i << 1)); | ||
700 | buff++; | ||
701 | } | ||
702 | mod_timer(&host->timer, jiffies + 3 * HZ); | 713 | mod_timer(&host->timer, jiffies + 3 * HZ); |
703 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + | 714 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + |
704 | MG_REG_COMMAND); | 715 | MG_REG_COMMAND); |
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index f4bb43fb8016..e077701ae3d9 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c | |||
@@ -225,7 +225,7 @@ static const struct agp_bridge_driver parisc_agp_driver = { | |||
225 | .configure = parisc_agp_configure, | 225 | .configure = parisc_agp_configure, |
226 | .fetch_size = parisc_agp_fetch_size, | 226 | .fetch_size = parisc_agp_fetch_size, |
227 | .tlb_flush = parisc_agp_tlbflush, | 227 | .tlb_flush = parisc_agp_tlbflush, |
228 | .mask_memory = parisc_agp_mask_memory, | 228 | .mask_memory = parisc_agp_page_mask_memory, |
229 | .masks = parisc_agp_masks, | 229 | .masks = parisc_agp_masks, |
230 | .agp_enable = parisc_agp_enable, | 230 | .agp_enable = parisc_agp_enable, |
231 | .cache_flush = global_cache_flush, | 231 | .cache_flush = global_cache_flush, |
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index acd76b767d4c..1733d3439ad2 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c | |||
@@ -48,6 +48,41 @@ static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); | |||
48 | /* Line disc dispatch table */ | 48 | /* Line disc dispatch table */ |
49 | static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; | 49 | static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; |
50 | 50 | ||
51 | static inline struct tty_ldisc *get_ldisc(struct tty_ldisc *ld) | ||
52 | { | ||
53 | if (ld) | ||
54 | atomic_inc(&ld->users); | ||
55 | return ld; | ||
56 | } | ||
57 | |||
58 | static void put_ldisc(struct tty_ldisc *ld) | ||
59 | { | ||
60 | unsigned long flags; | ||
61 | |||
62 | if (WARN_ON_ONCE(!ld)) | ||
63 | return; | ||
64 | |||
65 | /* | ||
66 | * If this is the last user, free the ldisc, and | ||
67 | * release the ldisc ops. | ||
68 | * | ||
69 | * We really want an "atomic_dec_and_lock_irqsave()", | ||
70 | * but we don't have it, so this does it by hand. | ||
71 | */ | ||
72 | local_irq_save(flags); | ||
73 | if (atomic_dec_and_lock(&ld->users, &tty_ldisc_lock)) { | ||
74 | struct tty_ldisc_ops *ldo = ld->ops; | ||
75 | |||
76 | ldo->refcount--; | ||
77 | module_put(ldo->owner); | ||
78 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
79 | |||
80 | kfree(ld); | ||
81 | return; | ||
82 | } | ||
83 | local_irq_restore(flags); | ||
84 | } | ||
85 | |||
51 | /** | 86 | /** |
52 | * tty_register_ldisc - install a line discipline | 87 | * tty_register_ldisc - install a line discipline |
53 | * @disc: ldisc number | 88 | * @disc: ldisc number |
@@ -142,7 +177,7 @@ static struct tty_ldisc *tty_ldisc_try_get(int disc) | |||
142 | /* lock it */ | 177 | /* lock it */ |
143 | ldops->refcount++; | 178 | ldops->refcount++; |
144 | ld->ops = ldops; | 179 | ld->ops = ldops; |
145 | ld->refcount = 0; | 180 | atomic_set(&ld->users, 1); |
146 | err = 0; | 181 | err = 0; |
147 | } | 182 | } |
148 | } | 183 | } |
@@ -181,35 +216,6 @@ static struct tty_ldisc *tty_ldisc_get(int disc) | |||
181 | return ld; | 216 | return ld; |
182 | } | 217 | } |
183 | 218 | ||
184 | /** | ||
185 | * tty_ldisc_put - drop ldisc reference | ||
186 | * @ld: ldisc | ||
187 | * | ||
188 | * Drop a reference to a line discipline. Manage refcounts and | ||
189 | * module usage counts. Free the ldisc once the recount hits zero. | ||
190 | * | ||
191 | * Locking: | ||
192 | * takes tty_ldisc_lock to guard against ldisc races | ||
193 | */ | ||
194 | |||
195 | static void tty_ldisc_put(struct tty_ldisc *ld) | ||
196 | { | ||
197 | unsigned long flags; | ||
198 | int disc = ld->ops->num; | ||
199 | struct tty_ldisc_ops *ldo; | ||
200 | |||
201 | BUG_ON(disc < N_TTY || disc >= NR_LDISCS); | ||
202 | |||
203 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
204 | ldo = tty_ldiscs[disc]; | ||
205 | BUG_ON(ldo->refcount == 0); | ||
206 | ldo->refcount--; | ||
207 | module_put(ldo->owner); | ||
208 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
209 | WARN_ON(ld->refcount); | ||
210 | kfree(ld); | ||
211 | } | ||
212 | |||
213 | static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) | 219 | static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) |
214 | { | 220 | { |
215 | return (*pos < NR_LDISCS) ? pos : NULL; | 221 | return (*pos < NR_LDISCS) ? pos : NULL; |
@@ -234,7 +240,7 @@ static int tty_ldiscs_seq_show(struct seq_file *m, void *v) | |||
234 | if (IS_ERR(ld)) | 240 | if (IS_ERR(ld)) |
235 | return 0; | 241 | return 0; |
236 | seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); | 242 | seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); |
237 | tty_ldisc_put(ld); | 243 | put_ldisc(ld); |
238 | return 0; | 244 | return 0; |
239 | } | 245 | } |
240 | 246 | ||
@@ -288,20 +294,17 @@ static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) | |||
288 | * Locking: takes tty_ldisc_lock | 294 | * Locking: takes tty_ldisc_lock |
289 | */ | 295 | */ |
290 | 296 | ||
291 | static int tty_ldisc_try(struct tty_struct *tty) | 297 | static struct tty_ldisc *tty_ldisc_try(struct tty_struct *tty) |
292 | { | 298 | { |
293 | unsigned long flags; | 299 | unsigned long flags; |
294 | struct tty_ldisc *ld; | 300 | struct tty_ldisc *ld; |
295 | int ret = 0; | ||
296 | 301 | ||
297 | spin_lock_irqsave(&tty_ldisc_lock, flags); | 302 | spin_lock_irqsave(&tty_ldisc_lock, flags); |
298 | ld = tty->ldisc; | 303 | ld = NULL; |
299 | if (test_bit(TTY_LDISC, &tty->flags)) { | 304 | if (test_bit(TTY_LDISC, &tty->flags)) |
300 | ld->refcount++; | 305 | ld = get_ldisc(tty->ldisc); |
301 | ret = 1; | ||
302 | } | ||
303 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | 306 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); |
304 | return ret; | 307 | return ld; |
305 | } | 308 | } |
306 | 309 | ||
307 | /** | 310 | /** |
@@ -322,10 +325,11 @@ static int tty_ldisc_try(struct tty_struct *tty) | |||
322 | 325 | ||
323 | struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) | 326 | struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) |
324 | { | 327 | { |
328 | struct tty_ldisc *ld; | ||
329 | |||
325 | /* wait_event is a macro */ | 330 | /* wait_event is a macro */ |
326 | wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); | 331 | wait_event(tty_ldisc_wait, (ld = tty_ldisc_try(tty)) != NULL); |
327 | WARN_ON(tty->ldisc->refcount == 0); | 332 | return ld; |
328 | return tty->ldisc; | ||
329 | } | 333 | } |
330 | EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); | 334 | EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); |
331 | 335 | ||
@@ -342,9 +346,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); | |||
342 | 346 | ||
343 | struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) | 347 | struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) |
344 | { | 348 | { |
345 | if (tty_ldisc_try(tty)) | 349 | return tty_ldisc_try(tty); |
346 | return tty->ldisc; | ||
347 | return NULL; | ||
348 | } | 350 | } |
349 | EXPORT_SYMBOL_GPL(tty_ldisc_ref); | 351 | EXPORT_SYMBOL_GPL(tty_ldisc_ref); |
350 | 352 | ||
@@ -360,21 +362,15 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref); | |||
360 | 362 | ||
361 | void tty_ldisc_deref(struct tty_ldisc *ld) | 363 | void tty_ldisc_deref(struct tty_ldisc *ld) |
362 | { | 364 | { |
363 | unsigned long flags; | 365 | put_ldisc(ld); |
364 | |||
365 | BUG_ON(ld == NULL); | ||
366 | |||
367 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
368 | if (ld->refcount == 0) | ||
369 | printk(KERN_ERR "tty_ldisc_deref: no references.\n"); | ||
370 | else | ||
371 | ld->refcount--; | ||
372 | if (ld->refcount == 0) | ||
373 | wake_up(&tty_ldisc_wait); | ||
374 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
375 | } | 366 | } |
376 | EXPORT_SYMBOL_GPL(tty_ldisc_deref); | 367 | EXPORT_SYMBOL_GPL(tty_ldisc_deref); |
377 | 368 | ||
369 | static inline void tty_ldisc_put(struct tty_ldisc *ld) | ||
370 | { | ||
371 | put_ldisc(ld); | ||
372 | } | ||
373 | |||
378 | /** | 374 | /** |
379 | * tty_ldisc_enable - allow ldisc use | 375 | * tty_ldisc_enable - allow ldisc use |
380 | * @tty: terminal to activate ldisc on | 376 | * @tty: terminal to activate ldisc on |
@@ -523,31 +519,6 @@ static int tty_ldisc_halt(struct tty_struct *tty) | |||
523 | } | 519 | } |
524 | 520 | ||
525 | /** | 521 | /** |
526 | * tty_ldisc_wait_idle - wait for the ldisc to become idle | ||
527 | * @tty: tty to wait for | ||
528 | * | ||
529 | * Wait for the line discipline to become idle. The discipline must | ||
530 | * have been halted for this to guarantee it remains idle. | ||
531 | * | ||
532 | * tty_ldisc_lock protects the ref counts currently. | ||
533 | */ | ||
534 | |||
535 | static int tty_ldisc_wait_idle(struct tty_struct *tty) | ||
536 | { | ||
537 | unsigned long flags; | ||
538 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
539 | while (tty->ldisc->refcount) { | ||
540 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
541 | if (wait_event_timeout(tty_ldisc_wait, | ||
542 | tty->ldisc->refcount == 0, 5 * HZ) == 0) | ||
543 | return -EBUSY; | ||
544 | spin_lock_irqsave(&tty_ldisc_lock, flags); | ||
545 | } | ||
546 | spin_unlock_irqrestore(&tty_ldisc_lock, flags); | ||
547 | return 0; | ||
548 | } | ||
549 | |||
550 | /** | ||
551 | * tty_set_ldisc - set line discipline | 522 | * tty_set_ldisc - set line discipline |
552 | * @tty: the terminal to set | 523 | * @tty: the terminal to set |
553 | * @ldisc: the line discipline | 524 | * @ldisc: the line discipline |
@@ -642,14 +613,6 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc) | |||
642 | 613 | ||
643 | flush_scheduled_work(); | 614 | flush_scheduled_work(); |
644 | 615 | ||
645 | /* Let any existing reference holders finish */ | ||
646 | retval = tty_ldisc_wait_idle(tty); | ||
647 | if (retval < 0) { | ||
648 | clear_bit(TTY_LDISC_CHANGING, &tty->flags); | ||
649 | tty_ldisc_put(new_ldisc); | ||
650 | return retval; | ||
651 | } | ||
652 | |||
653 | mutex_lock(&tty->ldisc_mutex); | 616 | mutex_lock(&tty->ldisc_mutex); |
654 | if (test_bit(TTY_HUPPED, &tty->flags)) { | 617 | if (test_bit(TTY_HUPPED, &tty->flags)) { |
655 | /* We were raced by the hangup method. It will have stomped | 618 | /* We were raced by the hangup method. It will have stomped |
@@ -795,7 +758,6 @@ void tty_ldisc_hangup(struct tty_struct *tty) | |||
795 | if (tty->ldisc) { /* Not yet closed */ | 758 | if (tty->ldisc) { /* Not yet closed */ |
796 | /* Switch back to N_TTY */ | 759 | /* Switch back to N_TTY */ |
797 | tty_ldisc_halt(tty); | 760 | tty_ldisc_halt(tty); |
798 | tty_ldisc_wait_idle(tty); | ||
799 | tty_ldisc_reinit(tty); | 761 | tty_ldisc_reinit(tty); |
800 | /* At this point we have a closed ldisc and we want to | 762 | /* At this point we have a closed ldisc and we want to |
801 | reopen it. We could defer this to the next open but | 763 | reopen it. We could defer this to the next open but |
@@ -860,14 +822,6 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) | |||
860 | tty_ldisc_halt(tty); | 822 | tty_ldisc_halt(tty); |
861 | flush_scheduled_work(); | 823 | flush_scheduled_work(); |
862 | 824 | ||
863 | /* | ||
864 | * Wait for any short term users (we know they are just driver | ||
865 | * side waiters as the file is closing so user count on the file | ||
866 | * side is zero. | ||
867 | */ | ||
868 | |||
869 | tty_ldisc_wait_idle(tty); | ||
870 | |||
871 | mutex_lock(&tty->ldisc_mutex); | 825 | mutex_lock(&tty->ldisc_mutex); |
872 | /* | 826 | /* |
873 | * Now kill off the ldisc | 827 | * Now kill off the ldisc |
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b90eda8b3440..fd69086d08d5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c | |||
@@ -858,6 +858,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) | |||
858 | 858 | ||
859 | /* Check for existing affected CPUs. | 859 | /* Check for existing affected CPUs. |
860 | * They may not be aware of it due to CPU Hotplug. | 860 | * They may not be aware of it due to CPU Hotplug. |
861 | * cpufreq_cpu_put is called when the device is removed | ||
862 | * in __cpufreq_remove_dev() | ||
861 | */ | 863 | */ |
862 | managed_policy = cpufreq_cpu_get(j); | 864 | managed_policy = cpufreq_cpu_get(j); |
863 | if (unlikely(managed_policy)) { | 865 | if (unlikely(managed_policy)) { |
@@ -884,7 +886,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) | |||
884 | ret = sysfs_create_link(&sys_dev->kobj, | 886 | ret = sysfs_create_link(&sys_dev->kobj, |
885 | &managed_policy->kobj, | 887 | &managed_policy->kobj, |
886 | "cpufreq"); | 888 | "cpufreq"); |
887 | if (!ret) | 889 | if (ret) |
888 | cpufreq_cpu_put(managed_policy); | 890 | cpufreq_cpu_put(managed_policy); |
889 | /* | 891 | /* |
890 | * Success. We only needed to be added to the mask. | 892 | * Success. We only needed to be added to the mask. |
@@ -924,6 +926,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) | |||
924 | 926 | ||
925 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 927 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
926 | for_each_cpu(j, policy->cpus) { | 928 | for_each_cpu(j, policy->cpus) { |
929 | if (!cpu_online(j)) | ||
930 | continue; | ||
927 | per_cpu(cpufreq_cpu_data, j) = policy; | 931 | per_cpu(cpufreq_cpu_data, j) = policy; |
928 | per_cpu(policy_cpu, j) = policy->cpu; | 932 | per_cpu(policy_cpu, j) = policy->cpu; |
929 | } | 933 | } |
@@ -1244,13 +1248,22 @@ EXPORT_SYMBOL(cpufreq_get); | |||
1244 | 1248 | ||
1245 | static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) | 1249 | static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) |
1246 | { | 1250 | { |
1247 | int cpu = sysdev->id; | ||
1248 | int ret = 0; | 1251 | int ret = 0; |
1252 | |||
1253 | #ifdef __powerpc__ | ||
1254 | int cpu = sysdev->id; | ||
1249 | unsigned int cur_freq = 0; | 1255 | unsigned int cur_freq = 0; |
1250 | struct cpufreq_policy *cpu_policy; | 1256 | struct cpufreq_policy *cpu_policy; |
1251 | 1257 | ||
1252 | dprintk("suspending cpu %u\n", cpu); | 1258 | dprintk("suspending cpu %u\n", cpu); |
1253 | 1259 | ||
1260 | /* | ||
1261 | * This whole bogosity is here because Powerbooks are made of fail. | ||
1262 | * No sane platform should need any of the code below to be run. | ||
1263 | * (it's entirely the wrong thing to do, as driver->get may | ||
1264 | * reenable interrupts on some architectures). | ||
1265 | */ | ||
1266 | |||
1254 | if (!cpu_online(cpu)) | 1267 | if (!cpu_online(cpu)) |
1255 | return 0; | 1268 | return 0; |
1256 | 1269 | ||
@@ -1309,6 +1322,7 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) | |||
1309 | 1322 | ||
1310 | out: | 1323 | out: |
1311 | cpufreq_cpu_put(cpu_policy); | 1324 | cpufreq_cpu_put(cpu_policy); |
1325 | #endif /* __powerpc__ */ | ||
1312 | return ret; | 1326 | return ret; |
1313 | } | 1327 | } |
1314 | 1328 | ||
@@ -1322,12 +1336,18 @@ out: | |||
1322 | */ | 1336 | */ |
1323 | static int cpufreq_resume(struct sys_device *sysdev) | 1337 | static int cpufreq_resume(struct sys_device *sysdev) |
1324 | { | 1338 | { |
1325 | int cpu = sysdev->id; | ||
1326 | int ret = 0; | 1339 | int ret = 0; |
1340 | |||
1341 | #ifdef __powerpc__ | ||
1342 | int cpu = sysdev->id; | ||
1327 | struct cpufreq_policy *cpu_policy; | 1343 | struct cpufreq_policy *cpu_policy; |
1328 | 1344 | ||
1329 | dprintk("resuming cpu %u\n", cpu); | 1345 | dprintk("resuming cpu %u\n", cpu); |
1330 | 1346 | ||
1347 | /* As with the ->suspend method, all the code below is | ||
1348 | * only necessary because Powerbooks suck. | ||
1349 | * See commit 42d4dc3f4e1e for jokes. */ | ||
1350 | |||
1331 | if (!cpu_online(cpu)) | 1351 | if (!cpu_online(cpu)) |
1332 | return 0; | 1352 | return 0; |
1333 | 1353 | ||
@@ -1391,6 +1411,7 @@ out: | |||
1391 | schedule_work(&cpu_policy->update); | 1411 | schedule_work(&cpu_policy->update); |
1392 | fail: | 1412 | fail: |
1393 | cpufreq_cpu_put(cpu_policy); | 1413 | cpufreq_cpu_put(cpu_policy); |
1414 | #endif /* __powerpc__ */ | ||
1394 | return ret; | 1415 | return ret; |
1395 | } | 1416 | } |
1396 | 1417 | ||
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 57490502b21c..bdea7e2f94ba 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c | |||
@@ -63,6 +63,7 @@ struct cpu_dbs_info_s { | |||
63 | unsigned int down_skip; | 63 | unsigned int down_skip; |
64 | unsigned int requested_freq; | 64 | unsigned int requested_freq; |
65 | int cpu; | 65 | int cpu; |
66 | unsigned int enable:1; | ||
66 | /* | 67 | /* |
67 | * percpu mutex that serializes governor limit change with | 68 | * percpu mutex that serializes governor limit change with |
68 | * do_dbs_timer invocation. We do not want do_dbs_timer to run | 69 | * do_dbs_timer invocation. We do not want do_dbs_timer to run |
@@ -141,6 +142,9 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
141 | 142 | ||
142 | struct cpufreq_policy *policy; | 143 | struct cpufreq_policy *policy; |
143 | 144 | ||
145 | if (!this_dbs_info->enable) | ||
146 | return 0; | ||
147 | |||
144 | policy = this_dbs_info->cur_policy; | 148 | policy = this_dbs_info->cur_policy; |
145 | 149 | ||
146 | /* | 150 | /* |
@@ -497,6 +501,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) | |||
497 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); | 501 | int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); |
498 | delay -= jiffies % delay; | 502 | delay -= jiffies % delay; |
499 | 503 | ||
504 | dbs_info->enable = 1; | ||
500 | INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); | 505 | INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); |
501 | queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, | 506 | queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, |
502 | delay); | 507 | delay); |
@@ -504,6 +509,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) | |||
504 | 509 | ||
505 | static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) | 510 | static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) |
506 | { | 511 | { |
512 | dbs_info->enable = 0; | ||
507 | cancel_delayed_work_sync(&dbs_info->work); | 513 | cancel_delayed_work_sync(&dbs_info->work); |
508 | } | 514 | } |
509 | 515 | ||
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 070357aaedbc..81e1020fb514 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | menuconfig DMADEVICES | 5 | menuconfig DMADEVICES |
6 | bool "DMA Engine support" | 6 | bool "DMA Engine support" |
7 | depends on !HIGHMEM64G && HAS_DMA | 7 | depends on HAS_DMA |
8 | help | 8 | help |
9 | DMA engines can do asynchronous data transfers without | 9 | DMA engines can do asynchronous data transfers without |
10 | involving the host CPU. Currently, this framework can be | 10 | involving the host CPU. Currently, this framework can be |
@@ -46,6 +46,14 @@ config DW_DMAC | |||
46 | Support the Synopsys DesignWare AHB DMA controller. This | 46 | Support the Synopsys DesignWare AHB DMA controller. This |
47 | can be integrated in chips such as the Atmel AT32ap7000. | 47 | can be integrated in chips such as the Atmel AT32ap7000. |
48 | 48 | ||
49 | config AT_HDMAC | ||
50 | tristate "Atmel AHB DMA support" | ||
51 | depends on ARCH_AT91SAM9RL | ||
52 | select DMA_ENGINE | ||
53 | help | ||
54 | Support the Atmel AHB DMA controller. This can be integrated in | ||
55 | chips such as the Atmel AT91SAM9RL. | ||
56 | |||
49 | config FSL_DMA | 57 | config FSL_DMA |
50 | tristate "Freescale Elo and Elo Plus DMA support" | 58 | tristate "Freescale Elo and Elo Plus DMA support" |
51 | depends on FSL_SOC | 59 | depends on FSL_SOC |
@@ -108,7 +116,7 @@ config NET_DMA | |||
108 | 116 | ||
109 | config ASYNC_TX_DMA | 117 | config ASYNC_TX_DMA |
110 | bool "Async_tx: Offload support for the async_tx api" | 118 | bool "Async_tx: Offload support for the async_tx api" |
111 | depends on DMA_ENGINE | 119 | depends on DMA_ENGINE && !HIGHMEM64G |
112 | help | 120 | help |
113 | This allows the async_tx api to take advantage of offload engines for | 121 | This allows the async_tx api to take advantage of offload engines for |
114 | memcpy, memset, xor, and raid6 p+q operations. If your platform has | 122 | memcpy, memset, xor, and raid6 p+q operations. If your platform has |
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index a0b6564800c4..40e1e0083571 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile | |||
@@ -7,5 +7,6 @@ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o | |||
7 | obj-$(CONFIG_FSL_DMA) += fsldma.o | 7 | obj-$(CONFIG_FSL_DMA) += fsldma.o |
8 | obj-$(CONFIG_MV_XOR) += mv_xor.o | 8 | obj-$(CONFIG_MV_XOR) += mv_xor.o |
9 | obj-$(CONFIG_DW_DMAC) += dw_dmac.o | 9 | obj-$(CONFIG_DW_DMAC) += dw_dmac.o |
10 | obj-$(CONFIG_AT_HDMAC) += at_hdmac.o | ||
10 | obj-$(CONFIG_MX3_IPU) += ipu/ | 11 | obj-$(CONFIG_MX3_IPU) += ipu/ |
11 | obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o | 12 | obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o |
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c new file mode 100644 index 000000000000..9a1e5fb412ed --- /dev/null +++ b/drivers/dma/at_hdmac.c | |||
@@ -0,0 +1,1213 @@ | |||
1 | /* | ||
2 | * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems) | ||
3 | * | ||
4 | * Copyright (C) 2008 Atmel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * | ||
12 | * This supports the Atmel AHB DMA Controller, | ||
13 | * | ||
14 | * The driver has currently been tested with the Atmel AT91SAM9RL | ||
15 | * and AT91SAM9G45 series. | ||
16 | */ | ||
17 | |||
18 | #include <linux/clk.h> | ||
19 | #include <linux/dmaengine.h> | ||
20 | #include <linux/dma-mapping.h> | ||
21 | #include <linux/dmapool.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/platform_device.h> | ||
25 | |||
26 | #include "at_hdmac_regs.h" | ||
27 | |||
28 | /* | ||
29 | * Glossary | ||
30 | * -------- | ||
31 | * | ||
32 | * at_hdmac : Name of the ATmel AHB DMA Controller | ||
33 | * at_dma_ / atdma : ATmel DMA controller entity related | ||
34 | * atc_ / atchan : ATmel DMA Channel entity related | ||
35 | */ | ||
36 | |||
37 | #define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) | ||
38 | #define ATC_DEFAULT_CTRLA (0) | ||
39 | #define ATC_DEFAULT_CTRLB (ATC_SIF(0) \ | ||
40 | |ATC_DIF(1)) | ||
41 | |||
42 | /* | ||
43 | * Initial number of descriptors to allocate for each channel. This could | ||
44 | * be increased during dma usage. | ||
45 | */ | ||
46 | static unsigned int init_nr_desc_per_channel = 64; | ||
47 | module_param(init_nr_desc_per_channel, uint, 0644); | ||
48 | MODULE_PARM_DESC(init_nr_desc_per_channel, | ||
49 | "initial descriptors per channel (default: 64)"); | ||
50 | |||
51 | |||
52 | /* prototypes */ | ||
53 | static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx); | ||
54 | |||
55 | |||
56 | /*----------------------------------------------------------------------*/ | ||
57 | |||
58 | static struct at_desc *atc_first_active(struct at_dma_chan *atchan) | ||
59 | { | ||
60 | return list_first_entry(&atchan->active_list, | ||
61 | struct at_desc, desc_node); | ||
62 | } | ||
63 | |||
64 | static struct at_desc *atc_first_queued(struct at_dma_chan *atchan) | ||
65 | { | ||
66 | return list_first_entry(&atchan->queue, | ||
67 | struct at_desc, desc_node); | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * atc_alloc_descriptor - allocate and return an initilized descriptor | ||
72 | * @chan: the channel to allocate descriptors for | ||
73 | * @gfp_flags: GFP allocation flags | ||
74 | * | ||
75 | * Note: The ack-bit is positioned in the descriptor flag at creation time | ||
76 | * to make initial allocation more convenient. This bit will be cleared | ||
77 | * and control will be given to client at usage time (during | ||
78 | * preparation functions). | ||
79 | */ | ||
80 | static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, | ||
81 | gfp_t gfp_flags) | ||
82 | { | ||
83 | struct at_desc *desc = NULL; | ||
84 | struct at_dma *atdma = to_at_dma(chan->device); | ||
85 | dma_addr_t phys; | ||
86 | |||
87 | desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); | ||
88 | if (desc) { | ||
89 | memset(desc, 0, sizeof(struct at_desc)); | ||
90 | dma_async_tx_descriptor_init(&desc->txd, chan); | ||
91 | /* txd.flags will be overwritten in prep functions */ | ||
92 | desc->txd.flags = DMA_CTRL_ACK; | ||
93 | desc->txd.tx_submit = atc_tx_submit; | ||
94 | desc->txd.phys = phys; | ||
95 | } | ||
96 | |||
97 | return desc; | ||
98 | } | ||
99 | |||
100 | /** | ||
101 | * atc_desc_get - get a unsused descriptor from free_list | ||
102 | * @atchan: channel we want a new descriptor for | ||
103 | */ | ||
104 | static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) | ||
105 | { | ||
106 | struct at_desc *desc, *_desc; | ||
107 | struct at_desc *ret = NULL; | ||
108 | unsigned int i = 0; | ||
109 | LIST_HEAD(tmp_list); | ||
110 | |||
111 | spin_lock_bh(&atchan->lock); | ||
112 | list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { | ||
113 | i++; | ||
114 | if (async_tx_test_ack(&desc->txd)) { | ||
115 | list_del(&desc->desc_node); | ||
116 | ret = desc; | ||
117 | break; | ||
118 | } | ||
119 | dev_dbg(chan2dev(&atchan->chan_common), | ||
120 | "desc %p not ACKed\n", desc); | ||
121 | } | ||
122 | spin_unlock_bh(&atchan->lock); | ||
123 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
124 | "scanned %u descriptors on freelist\n", i); | ||
125 | |||
126 | /* no more descriptor available in initial pool: create one more */ | ||
127 | if (!ret) { | ||
128 | ret = atc_alloc_descriptor(&atchan->chan_common, GFP_ATOMIC); | ||
129 | if (ret) { | ||
130 | spin_lock_bh(&atchan->lock); | ||
131 | atchan->descs_allocated++; | ||
132 | spin_unlock_bh(&atchan->lock); | ||
133 | } else { | ||
134 | dev_err(chan2dev(&atchan->chan_common), | ||
135 | "not enough descriptors available\n"); | ||
136 | } | ||
137 | } | ||
138 | |||
139 | return ret; | ||
140 | } | ||
141 | |||
142 | /** | ||
143 | * atc_desc_put - move a descriptor, including any children, to the free list | ||
144 | * @atchan: channel we work on | ||
145 | * @desc: descriptor, at the head of a chain, to move to free list | ||
146 | */ | ||
147 | static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) | ||
148 | { | ||
149 | if (desc) { | ||
150 | struct at_desc *child; | ||
151 | |||
152 | spin_lock_bh(&atchan->lock); | ||
153 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | ||
154 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
155 | "moving child desc %p to freelist\n", | ||
156 | child); | ||
157 | list_splice_init(&desc->txd.tx_list, &atchan->free_list); | ||
158 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
159 | "moving desc %p to freelist\n", desc); | ||
160 | list_add(&desc->desc_node, &atchan->free_list); | ||
161 | spin_unlock_bh(&atchan->lock); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * atc_assign_cookie - compute and assign new cookie | ||
167 | * @atchan: channel we work on | ||
168 | * @desc: descriptor to asign cookie for | ||
169 | * | ||
170 | * Called with atchan->lock held and bh disabled | ||
171 | */ | ||
172 | static dma_cookie_t | ||
173 | atc_assign_cookie(struct at_dma_chan *atchan, struct at_desc *desc) | ||
174 | { | ||
175 | dma_cookie_t cookie = atchan->chan_common.cookie; | ||
176 | |||
177 | if (++cookie < 0) | ||
178 | cookie = 1; | ||
179 | |||
180 | atchan->chan_common.cookie = cookie; | ||
181 | desc->txd.cookie = cookie; | ||
182 | |||
183 | return cookie; | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * atc_dostart - starts the DMA engine for real | ||
188 | * @atchan: the channel we want to start | ||
189 | * @first: first descriptor in the list we want to begin with | ||
190 | * | ||
191 | * Called with atchan->lock held and bh disabled | ||
192 | */ | ||
193 | static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) | ||
194 | { | ||
195 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
196 | |||
197 | /* ASSERT: channel is idle */ | ||
198 | if (atc_chan_is_enabled(atchan)) { | ||
199 | dev_err(chan2dev(&atchan->chan_common), | ||
200 | "BUG: Attempted to start non-idle channel\n"); | ||
201 | dev_err(chan2dev(&atchan->chan_common), | ||
202 | " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", | ||
203 | channel_readl(atchan, SADDR), | ||
204 | channel_readl(atchan, DADDR), | ||
205 | channel_readl(atchan, CTRLA), | ||
206 | channel_readl(atchan, CTRLB), | ||
207 | channel_readl(atchan, DSCR)); | ||
208 | |||
209 | /* The tasklet will hopefully advance the queue... */ | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | vdbg_dump_regs(atchan); | ||
214 | |||
215 | /* clear any pending interrupt */ | ||
216 | while (dma_readl(atdma, EBCISR)) | ||
217 | cpu_relax(); | ||
218 | |||
219 | channel_writel(atchan, SADDR, 0); | ||
220 | channel_writel(atchan, DADDR, 0); | ||
221 | channel_writel(atchan, CTRLA, 0); | ||
222 | channel_writel(atchan, CTRLB, 0); | ||
223 | channel_writel(atchan, DSCR, first->txd.phys); | ||
224 | dma_writel(atdma, CHER, atchan->mask); | ||
225 | |||
226 | vdbg_dump_regs(atchan); | ||
227 | } | ||
228 | |||
229 | /** | ||
230 | * atc_chain_complete - finish work for one transaction chain | ||
231 | * @atchan: channel we work on | ||
232 | * @desc: descriptor at the head of the chain we want do complete | ||
233 | * | ||
234 | * Called with atchan->lock held and bh disabled */ | ||
235 | static void | ||
236 | atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) | ||
237 | { | ||
238 | dma_async_tx_callback callback; | ||
239 | void *param; | ||
240 | struct dma_async_tx_descriptor *txd = &desc->txd; | ||
241 | |||
242 | dev_vdbg(chan2dev(&atchan->chan_common), | ||
243 | "descriptor %u complete\n", txd->cookie); | ||
244 | |||
245 | atchan->completed_cookie = txd->cookie; | ||
246 | callback = txd->callback; | ||
247 | param = txd->callback_param; | ||
248 | |||
249 | /* move children to free_list */ | ||
250 | list_splice_init(&txd->tx_list, &atchan->free_list); | ||
251 | /* move myself to free_list */ | ||
252 | list_move(&desc->desc_node, &atchan->free_list); | ||
253 | |||
254 | /* unmap dma addresses */ | ||
255 | if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
256 | if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) | ||
257 | dma_unmap_single(chan2parent(&atchan->chan_common), | ||
258 | desc->lli.daddr, | ||
259 | desc->len, DMA_FROM_DEVICE); | ||
260 | else | ||
261 | dma_unmap_page(chan2parent(&atchan->chan_common), | ||
262 | desc->lli.daddr, | ||
263 | desc->len, DMA_FROM_DEVICE); | ||
264 | } | ||
265 | if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
266 | if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) | ||
267 | dma_unmap_single(chan2parent(&atchan->chan_common), | ||
268 | desc->lli.saddr, | ||
269 | desc->len, DMA_TO_DEVICE); | ||
270 | else | ||
271 | dma_unmap_page(chan2parent(&atchan->chan_common), | ||
272 | desc->lli.saddr, | ||
273 | desc->len, DMA_TO_DEVICE); | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * The API requires that no submissions are done from a | ||
278 | * callback, so we don't need to drop the lock here | ||
279 | */ | ||
280 | if (callback) | ||
281 | callback(param); | ||
282 | |||
283 | dma_run_dependencies(txd); | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * atc_complete_all - finish work for all transactions | ||
288 | * @atchan: channel to complete transactions for | ||
289 | * | ||
290 | * Eventually submit queued descriptors if any | ||
291 | * | ||
292 | * Assume channel is idle while calling this function | ||
293 | * Called with atchan->lock held and bh disabled | ||
294 | */ | ||
295 | static void atc_complete_all(struct at_dma_chan *atchan) | ||
296 | { | ||
297 | struct at_desc *desc, *_desc; | ||
298 | LIST_HEAD(list); | ||
299 | |||
300 | dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); | ||
301 | |||
302 | BUG_ON(atc_chan_is_enabled(atchan)); | ||
303 | |||
304 | /* | ||
305 | * Submit queued descriptors ASAP, i.e. before we go through | ||
306 | * the completed ones. | ||
307 | */ | ||
308 | if (!list_empty(&atchan->queue)) | ||
309 | atc_dostart(atchan, atc_first_queued(atchan)); | ||
310 | /* empty active_list now it is completed */ | ||
311 | list_splice_init(&atchan->active_list, &list); | ||
312 | /* empty queue list by moving descriptors (if any) to active_list */ | ||
313 | list_splice_init(&atchan->queue, &atchan->active_list); | ||
314 | |||
315 | list_for_each_entry_safe(desc, _desc, &list, desc_node) | ||
316 | atc_chain_complete(atchan, desc); | ||
317 | } | ||
318 | |||
319 | /** | ||
320 | * atc_cleanup_descriptors - cleanup up finished descriptors in active_list | ||
321 | * @atchan: channel to be cleaned up | ||
322 | * | ||
323 | * Called with atchan->lock held and bh disabled | ||
324 | */ | ||
325 | static void atc_cleanup_descriptors(struct at_dma_chan *atchan) | ||
326 | { | ||
327 | struct at_desc *desc, *_desc; | ||
328 | struct at_desc *child; | ||
329 | |||
330 | dev_vdbg(chan2dev(&atchan->chan_common), "cleanup descriptors\n"); | ||
331 | |||
332 | list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { | ||
333 | if (!(desc->lli.ctrla & ATC_DONE)) | ||
334 | /* This one is currently in progress */ | ||
335 | return; | ||
336 | |||
337 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | ||
338 | if (!(child->lli.ctrla & ATC_DONE)) | ||
339 | /* Currently in progress */ | ||
340 | return; | ||
341 | |||
342 | /* | ||
343 | * No descriptors so far seem to be in progress, i.e. | ||
344 | * this chain must be done. | ||
345 | */ | ||
346 | atc_chain_complete(atchan, desc); | ||
347 | } | ||
348 | } | ||
349 | |||
350 | /** | ||
351 | * atc_advance_work - at the end of a transaction, move forward | ||
352 | * @atchan: channel where the transaction ended | ||
353 | * | ||
354 | * Called with atchan->lock held and bh disabled | ||
355 | */ | ||
356 | static void atc_advance_work(struct at_dma_chan *atchan) | ||
357 | { | ||
358 | dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); | ||
359 | |||
360 | if (list_empty(&atchan->active_list) || | ||
361 | list_is_singular(&atchan->active_list)) { | ||
362 | atc_complete_all(atchan); | ||
363 | } else { | ||
364 | atc_chain_complete(atchan, atc_first_active(atchan)); | ||
365 | /* advance work */ | ||
366 | atc_dostart(atchan, atc_first_active(atchan)); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | |||
371 | /** | ||
372 | * atc_handle_error - handle errors reported by DMA controller | ||
373 | * @atchan: channel where error occurs | ||
374 | * | ||
375 | * Called with atchan->lock held and bh disabled | ||
376 | */ | ||
377 | static void atc_handle_error(struct at_dma_chan *atchan) | ||
378 | { | ||
379 | struct at_desc *bad_desc; | ||
380 | struct at_desc *child; | ||
381 | |||
382 | /* | ||
383 | * The descriptor currently at the head of the active list is | ||
384 | * broked. Since we don't have any way to report errors, we'll | ||
385 | * just have to scream loudly and try to carry on. | ||
386 | */ | ||
387 | bad_desc = atc_first_active(atchan); | ||
388 | list_del_init(&bad_desc->desc_node); | ||
389 | |||
390 | /* As we are stopped, take advantage to push queued descriptors | ||
391 | * in active_list */ | ||
392 | list_splice_init(&atchan->queue, atchan->active_list.prev); | ||
393 | |||
394 | /* Try to restart the controller */ | ||
395 | if (!list_empty(&atchan->active_list)) | ||
396 | atc_dostart(atchan, atc_first_active(atchan)); | ||
397 | |||
398 | /* | ||
399 | * KERN_CRITICAL may seem harsh, but since this only happens | ||
400 | * when someone submits a bad physical address in a | ||
401 | * descriptor, we should consider ourselves lucky that the | ||
402 | * controller flagged an error instead of scribbling over | ||
403 | * random memory locations. | ||
404 | */ | ||
405 | dev_crit(chan2dev(&atchan->chan_common), | ||
406 | "Bad descriptor submitted for DMA!\n"); | ||
407 | dev_crit(chan2dev(&atchan->chan_common), | ||
408 | " cookie: %d\n", bad_desc->txd.cookie); | ||
409 | atc_dump_lli(atchan, &bad_desc->lli); | ||
410 | list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) | ||
411 | atc_dump_lli(atchan, &child->lli); | ||
412 | |||
413 | /* Pretend the descriptor completed successfully */ | ||
414 | atc_chain_complete(atchan, bad_desc); | ||
415 | } | ||
416 | |||
417 | |||
418 | /*-- IRQ & Tasklet ---------------------------------------------------*/ | ||
419 | |||
420 | static void atc_tasklet(unsigned long data) | ||
421 | { | ||
422 | struct at_dma_chan *atchan = (struct at_dma_chan *)data; | ||
423 | |||
424 | /* Channel cannot be enabled here */ | ||
425 | if (atc_chan_is_enabled(atchan)) { | ||
426 | dev_err(chan2dev(&atchan->chan_common), | ||
427 | "BUG: channel enabled in tasklet\n"); | ||
428 | return; | ||
429 | } | ||
430 | |||
431 | spin_lock(&atchan->lock); | ||
432 | if (test_and_clear_bit(0, &atchan->error_status)) | ||
433 | atc_handle_error(atchan); | ||
434 | else | ||
435 | atc_advance_work(atchan); | ||
436 | |||
437 | spin_unlock(&atchan->lock); | ||
438 | } | ||
439 | |||
440 | static irqreturn_t at_dma_interrupt(int irq, void *dev_id) | ||
441 | { | ||
442 | struct at_dma *atdma = (struct at_dma *)dev_id; | ||
443 | struct at_dma_chan *atchan; | ||
444 | int i; | ||
445 | u32 status, pending, imr; | ||
446 | int ret = IRQ_NONE; | ||
447 | |||
448 | do { | ||
449 | imr = dma_readl(atdma, EBCIMR); | ||
450 | status = dma_readl(atdma, EBCISR); | ||
451 | pending = status & imr; | ||
452 | |||
453 | if (!pending) | ||
454 | break; | ||
455 | |||
456 | dev_vdbg(atdma->dma_common.dev, | ||
457 | "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n", | ||
458 | status, imr, pending); | ||
459 | |||
460 | for (i = 0; i < atdma->dma_common.chancnt; i++) { | ||
461 | atchan = &atdma->chan[i]; | ||
462 | if (pending & (AT_DMA_CBTC(i) | AT_DMA_ERR(i))) { | ||
463 | if (pending & AT_DMA_ERR(i)) { | ||
464 | /* Disable channel on AHB error */ | ||
465 | dma_writel(atdma, CHDR, atchan->mask); | ||
466 | /* Give information to tasklet */ | ||
467 | set_bit(0, &atchan->error_status); | ||
468 | } | ||
469 | tasklet_schedule(&atchan->tasklet); | ||
470 | ret = IRQ_HANDLED; | ||
471 | } | ||
472 | } | ||
473 | |||
474 | } while (pending); | ||
475 | |||
476 | return ret; | ||
477 | } | ||
478 | |||
479 | |||
480 | /*-- DMA Engine API --------------------------------------------------*/ | ||
481 | |||
482 | /** | ||
483 | * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine | ||
484 | * @desc: descriptor at the head of the transaction chain | ||
485 | * | ||
486 | * Queue chain if DMA engine is working already | ||
487 | * | ||
488 | * Cookie increment and adding to active_list or queue must be atomic | ||
489 | */ | ||
490 | static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) | ||
491 | { | ||
492 | struct at_desc *desc = txd_to_at_desc(tx); | ||
493 | struct at_dma_chan *atchan = to_at_dma_chan(tx->chan); | ||
494 | dma_cookie_t cookie; | ||
495 | |||
496 | spin_lock_bh(&atchan->lock); | ||
497 | cookie = atc_assign_cookie(atchan, desc); | ||
498 | |||
499 | if (list_empty(&atchan->active_list)) { | ||
500 | dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", | ||
501 | desc->txd.cookie); | ||
502 | atc_dostart(atchan, desc); | ||
503 | list_add_tail(&desc->desc_node, &atchan->active_list); | ||
504 | } else { | ||
505 | dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", | ||
506 | desc->txd.cookie); | ||
507 | list_add_tail(&desc->desc_node, &atchan->queue); | ||
508 | } | ||
509 | |||
510 | spin_unlock_bh(&atchan->lock); | ||
511 | |||
512 | return cookie; | ||
513 | } | ||
514 | |||
515 | /** | ||
516 | * atc_prep_dma_memcpy - prepare a memcpy operation | ||
517 | * @chan: the channel to prepare operation on | ||
518 | * @dest: operation virtual destination address | ||
519 | * @src: operation virtual source address | ||
520 | * @len: operation length | ||
521 | * @flags: tx descriptor status flags | ||
522 | */ | ||
523 | static struct dma_async_tx_descriptor * | ||
524 | atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, | ||
525 | size_t len, unsigned long flags) | ||
526 | { | ||
527 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
528 | struct at_desc *desc = NULL; | ||
529 | struct at_desc *first = NULL; | ||
530 | struct at_desc *prev = NULL; | ||
531 | size_t xfer_count; | ||
532 | size_t offset; | ||
533 | unsigned int src_width; | ||
534 | unsigned int dst_width; | ||
535 | u32 ctrla; | ||
536 | u32 ctrlb; | ||
537 | |||
538 | dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n", | ||
539 | dest, src, len, flags); | ||
540 | |||
541 | if (unlikely(!len)) { | ||
542 | dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); | ||
543 | return NULL; | ||
544 | } | ||
545 | |||
546 | ctrla = ATC_DEFAULT_CTRLA; | ||
547 | ctrlb = ATC_DEFAULT_CTRLB | ||
548 | | ATC_SRC_ADDR_MODE_INCR | ||
549 | | ATC_DST_ADDR_MODE_INCR | ||
550 | | ATC_FC_MEM2MEM; | ||
551 | |||
552 | /* | ||
553 | * We can be a lot more clever here, but this should take care | ||
554 | * of the most common optimization. | ||
555 | */ | ||
556 | if (!((src | dest | len) & 3)) { | ||
557 | ctrla |= ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD; | ||
558 | src_width = dst_width = 2; | ||
559 | } else if (!((src | dest | len) & 1)) { | ||
560 | ctrla |= ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD; | ||
561 | src_width = dst_width = 1; | ||
562 | } else { | ||
563 | ctrla |= ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE; | ||
564 | src_width = dst_width = 0; | ||
565 | } | ||
566 | |||
567 | for (offset = 0; offset < len; offset += xfer_count << src_width) { | ||
568 | xfer_count = min_t(size_t, (len - offset) >> src_width, | ||
569 | ATC_BTSIZE_MAX); | ||
570 | |||
571 | desc = atc_desc_get(atchan); | ||
572 | if (!desc) | ||
573 | goto err_desc_get; | ||
574 | |||
575 | desc->lli.saddr = src + offset; | ||
576 | desc->lli.daddr = dest + offset; | ||
577 | desc->lli.ctrla = ctrla | xfer_count; | ||
578 | desc->lli.ctrlb = ctrlb; | ||
579 | |||
580 | desc->txd.cookie = 0; | ||
581 | async_tx_ack(&desc->txd); | ||
582 | |||
583 | if (!first) { | ||
584 | first = desc; | ||
585 | } else { | ||
586 | /* inform the HW lli about chaining */ | ||
587 | prev->lli.dscr = desc->txd.phys; | ||
588 | /* insert the link descriptor to the LD ring */ | ||
589 | list_add_tail(&desc->desc_node, | ||
590 | &first->txd.tx_list); | ||
591 | } | ||
592 | prev = desc; | ||
593 | } | ||
594 | |||
595 | /* First descriptor of the chain embedds additional information */ | ||
596 | first->txd.cookie = -EBUSY; | ||
597 | first->len = len; | ||
598 | |||
599 | /* set end-of-link to the last link descriptor of list*/ | ||
600 | set_desc_eol(desc); | ||
601 | |||
602 | desc->txd.flags = flags; /* client is in control of this ack */ | ||
603 | |||
604 | return &first->txd; | ||
605 | |||
606 | err_desc_get: | ||
607 | atc_desc_put(atchan, first); | ||
608 | return NULL; | ||
609 | } | ||
610 | |||
611 | |||
612 | /** | ||
613 | * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction | ||
614 | * @chan: DMA channel | ||
615 | * @sgl: scatterlist to transfer to/from | ||
616 | * @sg_len: number of entries in @scatterlist | ||
617 | * @direction: DMA direction | ||
618 | * @flags: tx descriptor status flags | ||
619 | */ | ||
620 | static struct dma_async_tx_descriptor * | ||
621 | atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | ||
622 | unsigned int sg_len, enum dma_data_direction direction, | ||
623 | unsigned long flags) | ||
624 | { | ||
625 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
626 | struct at_dma_slave *atslave = chan->private; | ||
627 | struct at_desc *first = NULL; | ||
628 | struct at_desc *prev = NULL; | ||
629 | u32 ctrla; | ||
630 | u32 ctrlb; | ||
631 | dma_addr_t reg; | ||
632 | unsigned int reg_width; | ||
633 | unsigned int mem_width; | ||
634 | unsigned int i; | ||
635 | struct scatterlist *sg; | ||
636 | size_t total_len = 0; | ||
637 | |||
638 | dev_vdbg(chan2dev(chan), "prep_slave_sg: %s f0x%lx\n", | ||
639 | direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE", | ||
640 | flags); | ||
641 | |||
642 | if (unlikely(!atslave || !sg_len)) { | ||
643 | dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); | ||
644 | return NULL; | ||
645 | } | ||
646 | |||
647 | reg_width = atslave->reg_width; | ||
648 | |||
649 | sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); | ||
650 | |||
651 | ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; | ||
652 | ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; | ||
653 | |||
654 | switch (direction) { | ||
655 | case DMA_TO_DEVICE: | ||
656 | ctrla |= ATC_DST_WIDTH(reg_width); | ||
657 | ctrlb |= ATC_DST_ADDR_MODE_FIXED | ||
658 | | ATC_SRC_ADDR_MODE_INCR | ||
659 | | ATC_FC_MEM2PER; | ||
660 | reg = atslave->tx_reg; | ||
661 | for_each_sg(sgl, sg, sg_len, i) { | ||
662 | struct at_desc *desc; | ||
663 | u32 len; | ||
664 | u32 mem; | ||
665 | |||
666 | desc = atc_desc_get(atchan); | ||
667 | if (!desc) | ||
668 | goto err_desc_get; | ||
669 | |||
670 | mem = sg_phys(sg); | ||
671 | len = sg_dma_len(sg); | ||
672 | mem_width = 2; | ||
673 | if (unlikely(mem & 3 || len & 3)) | ||
674 | mem_width = 0; | ||
675 | |||
676 | desc->lli.saddr = mem; | ||
677 | desc->lli.daddr = reg; | ||
678 | desc->lli.ctrla = ctrla | ||
679 | | ATC_SRC_WIDTH(mem_width) | ||
680 | | len >> mem_width; | ||
681 | desc->lli.ctrlb = ctrlb; | ||
682 | |||
683 | if (!first) { | ||
684 | first = desc; | ||
685 | } else { | ||
686 | /* inform the HW lli about chaining */ | ||
687 | prev->lli.dscr = desc->txd.phys; | ||
688 | /* insert the link descriptor to the LD ring */ | ||
689 | list_add_tail(&desc->desc_node, | ||
690 | &first->txd.tx_list); | ||
691 | } | ||
692 | prev = desc; | ||
693 | total_len += len; | ||
694 | } | ||
695 | break; | ||
696 | case DMA_FROM_DEVICE: | ||
697 | ctrla |= ATC_SRC_WIDTH(reg_width); | ||
698 | ctrlb |= ATC_DST_ADDR_MODE_INCR | ||
699 | | ATC_SRC_ADDR_MODE_FIXED | ||
700 | | ATC_FC_PER2MEM; | ||
701 | |||
702 | reg = atslave->rx_reg; | ||
703 | for_each_sg(sgl, sg, sg_len, i) { | ||
704 | struct at_desc *desc; | ||
705 | u32 len; | ||
706 | u32 mem; | ||
707 | |||
708 | desc = atc_desc_get(atchan); | ||
709 | if (!desc) | ||
710 | goto err_desc_get; | ||
711 | |||
712 | mem = sg_phys(sg); | ||
713 | len = sg_dma_len(sg); | ||
714 | mem_width = 2; | ||
715 | if (unlikely(mem & 3 || len & 3)) | ||
716 | mem_width = 0; | ||
717 | |||
718 | desc->lli.saddr = reg; | ||
719 | desc->lli.daddr = mem; | ||
720 | desc->lli.ctrla = ctrla | ||
721 | | ATC_DST_WIDTH(mem_width) | ||
722 | | len >> mem_width; | ||
723 | desc->lli.ctrlb = ctrlb; | ||
724 | |||
725 | if (!first) { | ||
726 | first = desc; | ||
727 | } else { | ||
728 | /* inform the HW lli about chaining */ | ||
729 | prev->lli.dscr = desc->txd.phys; | ||
730 | /* insert the link descriptor to the LD ring */ | ||
731 | list_add_tail(&desc->desc_node, | ||
732 | &first->txd.tx_list); | ||
733 | } | ||
734 | prev = desc; | ||
735 | total_len += len; | ||
736 | } | ||
737 | break; | ||
738 | default: | ||
739 | return NULL; | ||
740 | } | ||
741 | |||
742 | /* set end-of-link to the last link descriptor of list*/ | ||
743 | set_desc_eol(prev); | ||
744 | |||
745 | /* First descriptor of the chain embedds additional information */ | ||
746 | first->txd.cookie = -EBUSY; | ||
747 | first->len = total_len; | ||
748 | |||
749 | /* last link descriptor of list is responsible of flags */ | ||
750 | prev->txd.flags = flags; /* client is in control of this ack */ | ||
751 | |||
752 | return &first->txd; | ||
753 | |||
754 | err_desc_get: | ||
755 | dev_err(chan2dev(chan), "not enough descriptors available\n"); | ||
756 | atc_desc_put(atchan, first); | ||
757 | return NULL; | ||
758 | } | ||
759 | |||
760 | static void atc_terminate_all(struct dma_chan *chan) | ||
761 | { | ||
762 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
763 | struct at_dma *atdma = to_at_dma(chan->device); | ||
764 | struct at_desc *desc, *_desc; | ||
765 | LIST_HEAD(list); | ||
766 | |||
767 | /* | ||
768 | * This is only called when something went wrong elsewhere, so | ||
769 | * we don't really care about the data. Just disable the | ||
770 | * channel. We still have to poll the channel enable bit due | ||
771 | * to AHB/HSB limitations. | ||
772 | */ | ||
773 | spin_lock_bh(&atchan->lock); | ||
774 | |||
775 | dma_writel(atdma, CHDR, atchan->mask); | ||
776 | |||
777 | /* confirm that this channel is disabled */ | ||
778 | while (dma_readl(atdma, CHSR) & atchan->mask) | ||
779 | cpu_relax(); | ||
780 | |||
781 | /* active_list entries will end up before queued entries */ | ||
782 | list_splice_init(&atchan->queue, &list); | ||
783 | list_splice_init(&atchan->active_list, &list); | ||
784 | |||
785 | spin_unlock_bh(&atchan->lock); | ||
786 | |||
787 | /* Flush all pending and queued descriptors */ | ||
788 | list_for_each_entry_safe(desc, _desc, &list, desc_node) | ||
789 | atc_chain_complete(atchan, desc); | ||
790 | } | ||
791 | |||
792 | /** | ||
793 | * atc_is_tx_complete - poll for transaction completion | ||
794 | * @chan: DMA channel | ||
795 | * @cookie: transaction identifier to check status of | ||
796 | * @done: if not %NULL, updated with last completed transaction | ||
797 | * @used: if not %NULL, updated with last used transaction | ||
798 | * | ||
799 | * If @done and @used are passed in, upon return they reflect the driver | ||
800 | * internal state and can be used with dma_async_is_complete() to check | ||
801 | * the status of multiple cookies without re-checking hardware state. | ||
802 | */ | ||
803 | static enum dma_status | ||
804 | atc_is_tx_complete(struct dma_chan *chan, | ||
805 | dma_cookie_t cookie, | ||
806 | dma_cookie_t *done, dma_cookie_t *used) | ||
807 | { | ||
808 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
809 | dma_cookie_t last_used; | ||
810 | dma_cookie_t last_complete; | ||
811 | enum dma_status ret; | ||
812 | |||
813 | dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", | ||
814 | cookie, done ? *done : 0, used ? *used : 0); | ||
815 | |||
816 | spin_lock_bh(atchan->lock); | ||
817 | |||
818 | last_complete = atchan->completed_cookie; | ||
819 | last_used = chan->cookie; | ||
820 | |||
821 | ret = dma_async_is_complete(cookie, last_complete, last_used); | ||
822 | if (ret != DMA_SUCCESS) { | ||
823 | atc_cleanup_descriptors(atchan); | ||
824 | |||
825 | last_complete = atchan->completed_cookie; | ||
826 | last_used = chan->cookie; | ||
827 | |||
828 | ret = dma_async_is_complete(cookie, last_complete, last_used); | ||
829 | } | ||
830 | |||
831 | spin_unlock_bh(atchan->lock); | ||
832 | |||
833 | if (done) | ||
834 | *done = last_complete; | ||
835 | if (used) | ||
836 | *used = last_used; | ||
837 | |||
838 | return ret; | ||
839 | } | ||
840 | |||
841 | /** | ||
842 | * atc_issue_pending - try to finish work | ||
843 | * @chan: target DMA channel | ||
844 | */ | ||
845 | static void atc_issue_pending(struct dma_chan *chan) | ||
846 | { | ||
847 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
848 | |||
849 | dev_vdbg(chan2dev(chan), "issue_pending\n"); | ||
850 | |||
851 | if (!atc_chan_is_enabled(atchan)) { | ||
852 | spin_lock_bh(&atchan->lock); | ||
853 | atc_advance_work(atchan); | ||
854 | spin_unlock_bh(&atchan->lock); | ||
855 | } | ||
856 | } | ||
857 | |||
858 | /** | ||
859 | * atc_alloc_chan_resources - allocate resources for DMA channel | ||
860 | * @chan: allocate descriptor resources for this channel | ||
861 | * @client: current client requesting the channel be ready for requests | ||
862 | * | ||
863 | * return - the number of allocated descriptors | ||
864 | */ | ||
865 | static int atc_alloc_chan_resources(struct dma_chan *chan) | ||
866 | { | ||
867 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
868 | struct at_dma *atdma = to_at_dma(chan->device); | ||
869 | struct at_desc *desc; | ||
870 | struct at_dma_slave *atslave; | ||
871 | int i; | ||
872 | u32 cfg; | ||
873 | LIST_HEAD(tmp_list); | ||
874 | |||
875 | dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); | ||
876 | |||
877 | /* ASSERT: channel is idle */ | ||
878 | if (atc_chan_is_enabled(atchan)) { | ||
879 | dev_dbg(chan2dev(chan), "DMA channel not idle ?\n"); | ||
880 | return -EIO; | ||
881 | } | ||
882 | |||
883 | cfg = ATC_DEFAULT_CFG; | ||
884 | |||
885 | atslave = chan->private; | ||
886 | if (atslave) { | ||
887 | /* | ||
888 | * We need controller-specific data to set up slave | ||
889 | * transfers. | ||
890 | */ | ||
891 | BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev); | ||
892 | |||
893 | /* if cfg configuration specified take it instad of default */ | ||
894 | if (atslave->cfg) | ||
895 | cfg = atslave->cfg; | ||
896 | } | ||
897 | |||
898 | /* have we already been set up? | ||
899 | * reconfigure channel but no need to reallocate descriptors */ | ||
900 | if (!list_empty(&atchan->free_list)) | ||
901 | return atchan->descs_allocated; | ||
902 | |||
903 | /* Allocate initial pool of descriptors */ | ||
904 | for (i = 0; i < init_nr_desc_per_channel; i++) { | ||
905 | desc = atc_alloc_descriptor(chan, GFP_KERNEL); | ||
906 | if (!desc) { | ||
907 | dev_err(atdma->dma_common.dev, | ||
908 | "Only %d initial descriptors\n", i); | ||
909 | break; | ||
910 | } | ||
911 | list_add_tail(&desc->desc_node, &tmp_list); | ||
912 | } | ||
913 | |||
914 | spin_lock_bh(&atchan->lock); | ||
915 | atchan->descs_allocated = i; | ||
916 | list_splice(&tmp_list, &atchan->free_list); | ||
917 | atchan->completed_cookie = chan->cookie = 1; | ||
918 | spin_unlock_bh(&atchan->lock); | ||
919 | |||
920 | /* channel parameters */ | ||
921 | channel_writel(atchan, CFG, cfg); | ||
922 | |||
923 | dev_dbg(chan2dev(chan), | ||
924 | "alloc_chan_resources: allocated %d descriptors\n", | ||
925 | atchan->descs_allocated); | ||
926 | |||
927 | return atchan->descs_allocated; | ||
928 | } | ||
929 | |||
930 | /** | ||
931 | * atc_free_chan_resources - free all channel resources | ||
932 | * @chan: DMA channel | ||
933 | */ | ||
934 | static void atc_free_chan_resources(struct dma_chan *chan) | ||
935 | { | ||
936 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
937 | struct at_dma *atdma = to_at_dma(chan->device); | ||
938 | struct at_desc *desc, *_desc; | ||
939 | LIST_HEAD(list); | ||
940 | |||
941 | dev_dbg(chan2dev(chan), "free_chan_resources: (descs allocated=%u)\n", | ||
942 | atchan->descs_allocated); | ||
943 | |||
944 | /* ASSERT: channel is idle */ | ||
945 | BUG_ON(!list_empty(&atchan->active_list)); | ||
946 | BUG_ON(!list_empty(&atchan->queue)); | ||
947 | BUG_ON(atc_chan_is_enabled(atchan)); | ||
948 | |||
949 | list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { | ||
950 | dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); | ||
951 | list_del(&desc->desc_node); | ||
952 | /* free link descriptor */ | ||
953 | dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys); | ||
954 | } | ||
955 | list_splice_init(&atchan->free_list, &list); | ||
956 | atchan->descs_allocated = 0; | ||
957 | |||
958 | dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); | ||
959 | } | ||
960 | |||
961 | |||
962 | /*-- Module Management -----------------------------------------------*/ | ||
963 | |||
964 | /** | ||
965 | * at_dma_off - disable DMA controller | ||
966 | * @atdma: the Atmel HDAMC device | ||
967 | */ | ||
968 | static void at_dma_off(struct at_dma *atdma) | ||
969 | { | ||
970 | dma_writel(atdma, EN, 0); | ||
971 | |||
972 | /* disable all interrupts */ | ||
973 | dma_writel(atdma, EBCIDR, -1L); | ||
974 | |||
975 | /* confirm that all channels are disabled */ | ||
976 | while (dma_readl(atdma, CHSR) & atdma->all_chan_mask) | ||
977 | cpu_relax(); | ||
978 | } | ||
979 | |||
980 | static int __init at_dma_probe(struct platform_device *pdev) | ||
981 | { | ||
982 | struct at_dma_platform_data *pdata; | ||
983 | struct resource *io; | ||
984 | struct at_dma *atdma; | ||
985 | size_t size; | ||
986 | int irq; | ||
987 | int err; | ||
988 | int i; | ||
989 | |||
990 | /* get DMA Controller parameters from platform */ | ||
991 | pdata = pdev->dev.platform_data; | ||
992 | if (!pdata || pdata->nr_channels > AT_DMA_MAX_NR_CHANNELS) | ||
993 | return -EINVAL; | ||
994 | |||
995 | io = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
996 | if (!io) | ||
997 | return -EINVAL; | ||
998 | |||
999 | irq = platform_get_irq(pdev, 0); | ||
1000 | if (irq < 0) | ||
1001 | return irq; | ||
1002 | |||
1003 | size = sizeof(struct at_dma); | ||
1004 | size += pdata->nr_channels * sizeof(struct at_dma_chan); | ||
1005 | atdma = kzalloc(size, GFP_KERNEL); | ||
1006 | if (!atdma) | ||
1007 | return -ENOMEM; | ||
1008 | |||
1009 | /* discover transaction capabilites from the platform data */ | ||
1010 | atdma->dma_common.cap_mask = pdata->cap_mask; | ||
1011 | atdma->all_chan_mask = (1 << pdata->nr_channels) - 1; | ||
1012 | |||
1013 | size = io->end - io->start + 1; | ||
1014 | if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { | ||
1015 | err = -EBUSY; | ||
1016 | goto err_kfree; | ||
1017 | } | ||
1018 | |||
1019 | atdma->regs = ioremap(io->start, size); | ||
1020 | if (!atdma->regs) { | ||
1021 | err = -ENOMEM; | ||
1022 | goto err_release_r; | ||
1023 | } | ||
1024 | |||
1025 | atdma->clk = clk_get(&pdev->dev, "dma_clk"); | ||
1026 | if (IS_ERR(atdma->clk)) { | ||
1027 | err = PTR_ERR(atdma->clk); | ||
1028 | goto err_clk; | ||
1029 | } | ||
1030 | clk_enable(atdma->clk); | ||
1031 | |||
1032 | /* force dma off, just in case */ | ||
1033 | at_dma_off(atdma); | ||
1034 | |||
1035 | err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma); | ||
1036 | if (err) | ||
1037 | goto err_irq; | ||
1038 | |||
1039 | platform_set_drvdata(pdev, atdma); | ||
1040 | |||
1041 | /* create a pool of consistent memory blocks for hardware descriptors */ | ||
1042 | atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool", | ||
1043 | &pdev->dev, sizeof(struct at_desc), | ||
1044 | 4 /* word alignment */, 0); | ||
1045 | if (!atdma->dma_desc_pool) { | ||
1046 | dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); | ||
1047 | err = -ENOMEM; | ||
1048 | goto err_pool_create; | ||
1049 | } | ||
1050 | |||
1051 | /* clear any pending interrupt */ | ||
1052 | while (dma_readl(atdma, EBCISR)) | ||
1053 | cpu_relax(); | ||
1054 | |||
1055 | /* initialize channels related values */ | ||
1056 | INIT_LIST_HEAD(&atdma->dma_common.channels); | ||
1057 | for (i = 0; i < pdata->nr_channels; i++, atdma->dma_common.chancnt++) { | ||
1058 | struct at_dma_chan *atchan = &atdma->chan[i]; | ||
1059 | |||
1060 | atchan->chan_common.device = &atdma->dma_common; | ||
1061 | atchan->chan_common.cookie = atchan->completed_cookie = 1; | ||
1062 | atchan->chan_common.chan_id = i; | ||
1063 | list_add_tail(&atchan->chan_common.device_node, | ||
1064 | &atdma->dma_common.channels); | ||
1065 | |||
1066 | atchan->ch_regs = atdma->regs + ch_regs(i); | ||
1067 | spin_lock_init(&atchan->lock); | ||
1068 | atchan->mask = 1 << i; | ||
1069 | |||
1070 | INIT_LIST_HEAD(&atchan->active_list); | ||
1071 | INIT_LIST_HEAD(&atchan->queue); | ||
1072 | INIT_LIST_HEAD(&atchan->free_list); | ||
1073 | |||
1074 | tasklet_init(&atchan->tasklet, atc_tasklet, | ||
1075 | (unsigned long)atchan); | ||
1076 | atc_enable_irq(atchan); | ||
1077 | } | ||
1078 | |||
1079 | /* set base routines */ | ||
1080 | atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources; | ||
1081 | atdma->dma_common.device_free_chan_resources = atc_free_chan_resources; | ||
1082 | atdma->dma_common.device_is_tx_complete = atc_is_tx_complete; | ||
1083 | atdma->dma_common.device_issue_pending = atc_issue_pending; | ||
1084 | atdma->dma_common.dev = &pdev->dev; | ||
1085 | |||
1086 | /* set prep routines based on capability */ | ||
1087 | if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) | ||
1088 | atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; | ||
1089 | |||
1090 | if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { | ||
1091 | atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; | ||
1092 | atdma->dma_common.device_terminate_all = atc_terminate_all; | ||
1093 | } | ||
1094 | |||
1095 | dma_writel(atdma, EN, AT_DMA_ENABLE); | ||
1096 | |||
1097 | dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n", | ||
1098 | dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", | ||
1099 | dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", | ||
1100 | atdma->dma_common.chancnt); | ||
1101 | |||
1102 | dma_async_device_register(&atdma->dma_common); | ||
1103 | |||
1104 | return 0; | ||
1105 | |||
1106 | err_pool_create: | ||
1107 | platform_set_drvdata(pdev, NULL); | ||
1108 | free_irq(platform_get_irq(pdev, 0), atdma); | ||
1109 | err_irq: | ||
1110 | clk_disable(atdma->clk); | ||
1111 | clk_put(atdma->clk); | ||
1112 | err_clk: | ||
1113 | iounmap(atdma->regs); | ||
1114 | atdma->regs = NULL; | ||
1115 | err_release_r: | ||
1116 | release_mem_region(io->start, size); | ||
1117 | err_kfree: | ||
1118 | kfree(atdma); | ||
1119 | return err; | ||
1120 | } | ||
1121 | |||
1122 | static int __exit at_dma_remove(struct platform_device *pdev) | ||
1123 | { | ||
1124 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
1125 | struct dma_chan *chan, *_chan; | ||
1126 | struct resource *io; | ||
1127 | |||
1128 | at_dma_off(atdma); | ||
1129 | dma_async_device_unregister(&atdma->dma_common); | ||
1130 | |||
1131 | dma_pool_destroy(atdma->dma_desc_pool); | ||
1132 | platform_set_drvdata(pdev, NULL); | ||
1133 | free_irq(platform_get_irq(pdev, 0), atdma); | ||
1134 | |||
1135 | list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, | ||
1136 | device_node) { | ||
1137 | struct at_dma_chan *atchan = to_at_dma_chan(chan); | ||
1138 | |||
1139 | /* Disable interrupts */ | ||
1140 | atc_disable_irq(atchan); | ||
1141 | tasklet_disable(&atchan->tasklet); | ||
1142 | |||
1143 | tasklet_kill(&atchan->tasklet); | ||
1144 | list_del(&chan->device_node); | ||
1145 | } | ||
1146 | |||
1147 | clk_disable(atdma->clk); | ||
1148 | clk_put(atdma->clk); | ||
1149 | |||
1150 | iounmap(atdma->regs); | ||
1151 | atdma->regs = NULL; | ||
1152 | |||
1153 | io = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
1154 | release_mem_region(io->start, io->end - io->start + 1); | ||
1155 | |||
1156 | kfree(atdma); | ||
1157 | |||
1158 | return 0; | ||
1159 | } | ||
1160 | |||
1161 | static void at_dma_shutdown(struct platform_device *pdev) | ||
1162 | { | ||
1163 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
1164 | |||
1165 | at_dma_off(platform_get_drvdata(pdev)); | ||
1166 | clk_disable(atdma->clk); | ||
1167 | } | ||
1168 | |||
1169 | static int at_dma_suspend_late(struct platform_device *pdev, pm_message_t mesg) | ||
1170 | { | ||
1171 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
1172 | |||
1173 | at_dma_off(platform_get_drvdata(pdev)); | ||
1174 | clk_disable(atdma->clk); | ||
1175 | return 0; | ||
1176 | } | ||
1177 | |||
1178 | static int at_dma_resume_early(struct platform_device *pdev) | ||
1179 | { | ||
1180 | struct at_dma *atdma = platform_get_drvdata(pdev); | ||
1181 | |||
1182 | clk_enable(atdma->clk); | ||
1183 | dma_writel(atdma, EN, AT_DMA_ENABLE); | ||
1184 | return 0; | ||
1185 | |||
1186 | } | ||
1187 | |||
1188 | static struct platform_driver at_dma_driver = { | ||
1189 | .remove = __exit_p(at_dma_remove), | ||
1190 | .shutdown = at_dma_shutdown, | ||
1191 | .suspend_late = at_dma_suspend_late, | ||
1192 | .resume_early = at_dma_resume_early, | ||
1193 | .driver = { | ||
1194 | .name = "at_hdmac", | ||
1195 | }, | ||
1196 | }; | ||
1197 | |||
1198 | static int __init at_dma_init(void) | ||
1199 | { | ||
1200 | return platform_driver_probe(&at_dma_driver, at_dma_probe); | ||
1201 | } | ||
1202 | module_init(at_dma_init); | ||
1203 | |||
1204 | static void __exit at_dma_exit(void) | ||
1205 | { | ||
1206 | platform_driver_unregister(&at_dma_driver); | ||
1207 | } | ||
1208 | module_exit(at_dma_exit); | ||
1209 | |||
1210 | MODULE_DESCRIPTION("Atmel AHB DMA Controller driver"); | ||
1211 | MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>"); | ||
1212 | MODULE_LICENSE("GPL"); | ||
1213 | MODULE_ALIAS("platform:at_hdmac"); | ||
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h new file mode 100644 index 000000000000..4c972afc49ec --- /dev/null +++ b/drivers/dma/at_hdmac_regs.h | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Header file for the Atmel AHB DMA Controller driver | ||
3 | * | ||
4 | * Copyright (C) 2008 Atmel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | */ | ||
11 | #ifndef AT_HDMAC_REGS_H | ||
12 | #define AT_HDMAC_REGS_H | ||
13 | |||
14 | #include <mach/at_hdmac.h> | ||
15 | |||
16 | #define AT_DMA_MAX_NR_CHANNELS 8 | ||
17 | |||
18 | |||
19 | #define AT_DMA_GCFG 0x00 /* Global Configuration Register */ | ||
20 | #define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ | ||
21 | #define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ | ||
22 | #define AT_DMA_ARB_CFG_FIXED (0x0 << 4) | ||
23 | #define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) | ||
24 | |||
25 | #define AT_DMA_EN 0x04 /* Controller Enable Register */ | ||
26 | #define AT_DMA_ENABLE (0x1 << 0) | ||
27 | |||
28 | #define AT_DMA_SREQ 0x08 /* Software Single Request Register */ | ||
29 | #define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ | ||
30 | #define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ | ||
31 | |||
32 | #define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ | ||
33 | #define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ | ||
34 | #define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ | ||
35 | |||
36 | #define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ | ||
37 | #define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ | ||
38 | #define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ | ||
39 | |||
40 | #define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ | ||
41 | #define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ | ||
42 | |||
43 | /* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ | ||
44 | #define AT_DMA_EBCIER 0x18 /* Enable register */ | ||
45 | #define AT_DMA_EBCIDR 0x1C /* Disable register */ | ||
46 | #define AT_DMA_EBCIMR 0x20 /* Mask Register */ | ||
47 | #define AT_DMA_EBCISR 0x24 /* Status Register */ | ||
48 | #define AT_DMA_CBTC_OFFSET 8 | ||
49 | #define AT_DMA_ERR_OFFSET 16 | ||
50 | #define AT_DMA_BTC(x) (0x1 << (x)) | ||
51 | #define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) | ||
52 | #define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) | ||
53 | |||
54 | #define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ | ||
55 | #define AT_DMA_ENA(x) (0x1 << (x)) | ||
56 | #define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) | ||
57 | #define AT_DMA_KEEP(x) (0x1 << (24 + (x))) | ||
58 | |||
59 | #define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ | ||
60 | #define AT_DMA_DIS(x) (0x1 << (x)) | ||
61 | #define AT_DMA_RES(x) (0x1 << ( 8 + (x))) | ||
62 | |||
63 | #define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ | ||
64 | #define AT_DMA_EMPT(x) (0x1 << (16 + (x))) | ||
65 | #define AT_DMA_STAL(x) (0x1 << (24 + (x))) | ||
66 | |||
67 | |||
68 | #define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ | ||
69 | #define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ | ||
70 | |||
71 | /* Hardware register offset for each channel */ | ||
72 | #define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ | ||
73 | #define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ | ||
74 | #define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ | ||
75 | #define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ | ||
76 | #define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ | ||
77 | #define ATC_CFG_OFFSET 0x14 /* Configuration Register */ | ||
78 | #define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ | ||
79 | #define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ | ||
80 | |||
81 | |||
82 | /* Bitfield definitions */ | ||
83 | |||
84 | /* Bitfields in DSCR */ | ||
85 | #define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ | ||
86 | |||
87 | /* Bitfields in CTRLA */ | ||
88 | #define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ | ||
89 | #define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ | ||
90 | /* Chunck Tranfer size definitions are in at_hdmac.h */ | ||
91 | #define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ | ||
92 | #define ATC_SRC_WIDTH(x) ((x) << 24) | ||
93 | #define ATC_SRC_WIDTH_BYTE (0x0 << 24) | ||
94 | #define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) | ||
95 | #define ATC_SRC_WIDTH_WORD (0x2 << 24) | ||
96 | #define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ | ||
97 | #define ATC_DST_WIDTH(x) ((x) << 28) | ||
98 | #define ATC_DST_WIDTH_BYTE (0x0 << 28) | ||
99 | #define ATC_DST_WIDTH_HALFWORD (0x1 << 28) | ||
100 | #define ATC_DST_WIDTH_WORD (0x2 << 28) | ||
101 | #define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ | ||
102 | |||
103 | /* Bitfields in CTRLB */ | ||
104 | #define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ | ||
105 | #define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ | ||
106 | #define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ | ||
107 | #define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ | ||
108 | #define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ | ||
109 | #define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ | ||
110 | #define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ | ||
111 | #define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ | ||
112 | #define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ | ||
113 | #define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ | ||
114 | #define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ | ||
115 | #define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ | ||
116 | #define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ | ||
117 | #define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ | ||
118 | #define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ | ||
119 | #define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) | ||
120 | #define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ | ||
121 | #define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ | ||
122 | #define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ | ||
123 | #define ATC_DST_ADDR_MODE_MASK (0x3 << 28) | ||
124 | #define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ | ||
125 | #define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ | ||
126 | #define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ | ||
127 | #define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ | ||
128 | #define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ | ||
129 | |||
130 | /* Bitfields in CFG */ | ||
131 | /* are in at_hdmac.h */ | ||
132 | |||
133 | /* Bitfields in SPIP */ | ||
134 | #define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) | ||
135 | #define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) | ||
136 | |||
137 | /* Bitfields in DPIP */ | ||
138 | #define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) | ||
139 | #define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) | ||
140 | |||
141 | |||
142 | /*-- descriptors -----------------------------------------------------*/ | ||
143 | |||
144 | /* LLI == Linked List Item; aka DMA buffer descriptor */ | ||
145 | struct at_lli { | ||
146 | /* values that are not changed by hardware */ | ||
147 | dma_addr_t saddr; | ||
148 | dma_addr_t daddr; | ||
149 | /* value that may get written back: */ | ||
150 | u32 ctrla; | ||
151 | /* more values that are not changed by hardware */ | ||
152 | u32 ctrlb; | ||
153 | dma_addr_t dscr; /* chain to next lli */ | ||
154 | }; | ||
155 | |||
156 | /** | ||
157 | * struct at_desc - software descriptor | ||
158 | * @at_lli: hardware lli structure | ||
159 | * @txd: support for the async_tx api | ||
160 | * @desc_node: node on the channed descriptors list | ||
161 | * @len: total transaction bytecount | ||
162 | */ | ||
163 | struct at_desc { | ||
164 | /* FIRST values the hardware uses */ | ||
165 | struct at_lli lli; | ||
166 | |||
167 | /* THEN values for driver housekeeping */ | ||
168 | struct dma_async_tx_descriptor txd; | ||
169 | struct list_head desc_node; | ||
170 | size_t len; | ||
171 | }; | ||
172 | |||
173 | static inline struct at_desc * | ||
174 | txd_to_at_desc(struct dma_async_tx_descriptor *txd) | ||
175 | { | ||
176 | return container_of(txd, struct at_desc, txd); | ||
177 | } | ||
178 | |||
179 | |||
180 | /*-- Channels --------------------------------------------------------*/ | ||
181 | |||
182 | /** | ||
183 | * struct at_dma_chan - internal representation of an Atmel HDMAC channel | ||
184 | * @chan_common: common dmaengine channel object members | ||
185 | * @device: parent device | ||
186 | * @ch_regs: memory mapped register base | ||
187 | * @mask: channel index in a mask | ||
188 | * @error_status: transmit error status information from irq handler | ||
189 | * to tasklet (use atomic operations) | ||
190 | * @tasklet: bottom half to finish transaction work | ||
191 | * @lock: serializes enqueue/dequeue operations to descriptors lists | ||
192 | * @completed_cookie: identifier for the most recently completed operation | ||
193 | * @active_list: list of descriptors dmaengine is being running on | ||
194 | * @queue: list of descriptors ready to be submitted to engine | ||
195 | * @free_list: list of descriptors usable by the channel | ||
196 | * @descs_allocated: records the actual size of the descriptor pool | ||
197 | */ | ||
198 | struct at_dma_chan { | ||
199 | struct dma_chan chan_common; | ||
200 | struct at_dma *device; | ||
201 | void __iomem *ch_regs; | ||
202 | u8 mask; | ||
203 | unsigned long error_status; | ||
204 | struct tasklet_struct tasklet; | ||
205 | |||
206 | spinlock_t lock; | ||
207 | |||
208 | /* these other elements are all protected by lock */ | ||
209 | dma_cookie_t completed_cookie; | ||
210 | struct list_head active_list; | ||
211 | struct list_head queue; | ||
212 | struct list_head free_list; | ||
213 | unsigned int descs_allocated; | ||
214 | }; | ||
215 | |||
216 | #define channel_readl(atchan, name) \ | ||
217 | __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) | ||
218 | |||
219 | #define channel_writel(atchan, name, val) \ | ||
220 | __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) | ||
221 | |||
222 | static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) | ||
223 | { | ||
224 | return container_of(dchan, struct at_dma_chan, chan_common); | ||
225 | } | ||
226 | |||
227 | |||
228 | /*-- Controller ------------------------------------------------------*/ | ||
229 | |||
230 | /** | ||
231 | * struct at_dma - internal representation of an Atmel HDMA Controller | ||
232 | * @chan_common: common dmaengine dma_device object members | ||
233 | * @ch_regs: memory mapped register base | ||
234 | * @clk: dma controller clock | ||
235 | * @all_chan_mask: all channels availlable in a mask | ||
236 | * @dma_desc_pool: base of DMA descriptor region (DMA address) | ||
237 | * @chan: channels table to store at_dma_chan structures | ||
238 | */ | ||
239 | struct at_dma { | ||
240 | struct dma_device dma_common; | ||
241 | void __iomem *regs; | ||
242 | struct clk *clk; | ||
243 | |||
244 | u8 all_chan_mask; | ||
245 | |||
246 | struct dma_pool *dma_desc_pool; | ||
247 | /* AT THE END channels table */ | ||
248 | struct at_dma_chan chan[0]; | ||
249 | }; | ||
250 | |||
251 | #define dma_readl(atdma, name) \ | ||
252 | __raw_readl((atdma)->regs + AT_DMA_##name) | ||
253 | #define dma_writel(atdma, name, val) \ | ||
254 | __raw_writel((val), (atdma)->regs + AT_DMA_##name) | ||
255 | |||
256 | static inline struct at_dma *to_at_dma(struct dma_device *ddev) | ||
257 | { | ||
258 | return container_of(ddev, struct at_dma, dma_common); | ||
259 | } | ||
260 | |||
261 | |||
262 | /*-- Helper functions ------------------------------------------------*/ | ||
263 | |||
264 | static struct device *chan2dev(struct dma_chan *chan) | ||
265 | { | ||
266 | return &chan->dev->device; | ||
267 | } | ||
268 | static struct device *chan2parent(struct dma_chan *chan) | ||
269 | { | ||
270 | return chan->dev->device.parent; | ||
271 | } | ||
272 | |||
273 | #if defined(VERBOSE_DEBUG) | ||
274 | static void vdbg_dump_regs(struct at_dma_chan *atchan) | ||
275 | { | ||
276 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
277 | |||
278 | dev_err(chan2dev(&atchan->chan_common), | ||
279 | " channel %d : imr = 0x%x, chsr = 0x%x\n", | ||
280 | atchan->chan_common.chan_id, | ||
281 | dma_readl(atdma, EBCIMR), | ||
282 | dma_readl(atdma, CHSR)); | ||
283 | |||
284 | dev_err(chan2dev(&atchan->chan_common), | ||
285 | " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", | ||
286 | channel_readl(atchan, SADDR), | ||
287 | channel_readl(atchan, DADDR), | ||
288 | channel_readl(atchan, CTRLA), | ||
289 | channel_readl(atchan, CTRLB), | ||
290 | channel_readl(atchan, CFG), | ||
291 | channel_readl(atchan, DSCR)); | ||
292 | } | ||
293 | #else | ||
294 | static void vdbg_dump_regs(struct at_dma_chan *atchan) {} | ||
295 | #endif | ||
296 | |||
297 | static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) | ||
298 | { | ||
299 | dev_printk(KERN_CRIT, chan2dev(&atchan->chan_common), | ||
300 | " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", | ||
301 | lli->saddr, lli->daddr, | ||
302 | lli->ctrla, lli->ctrlb, lli->dscr); | ||
303 | } | ||
304 | |||
305 | |||
306 | static void atc_setup_irq(struct at_dma_chan *atchan, int on) | ||
307 | { | ||
308 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
309 | u32 ebci; | ||
310 | |||
311 | /* enable interrupts on buffer chain completion & error */ | ||
312 | ebci = AT_DMA_CBTC(atchan->chan_common.chan_id) | ||
313 | | AT_DMA_ERR(atchan->chan_common.chan_id); | ||
314 | if (on) | ||
315 | dma_writel(atdma, EBCIER, ebci); | ||
316 | else | ||
317 | dma_writel(atdma, EBCIDR, ebci); | ||
318 | } | ||
319 | |||
320 | static inline void atc_enable_irq(struct at_dma_chan *atchan) | ||
321 | { | ||
322 | atc_setup_irq(atchan, 1); | ||
323 | } | ||
324 | |||
325 | static inline void atc_disable_irq(struct at_dma_chan *atchan) | ||
326 | { | ||
327 | atc_setup_irq(atchan, 0); | ||
328 | } | ||
329 | |||
330 | |||
331 | /** | ||
332 | * atc_chan_is_enabled - test if given channel is enabled | ||
333 | * @atchan: channel we want to test status | ||
334 | */ | ||
335 | static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) | ||
336 | { | ||
337 | struct at_dma *atdma = to_at_dma(atchan->chan_common.device); | ||
338 | |||
339 | return !!(dma_readl(atdma, CHSR) & atchan->mask); | ||
340 | } | ||
341 | |||
342 | |||
343 | /** | ||
344 | * set_desc_eol - set end-of-link to descriptor so it will end transfer | ||
345 | * @desc: descriptor, signle or at the end of a chain, to end chain on | ||
346 | */ | ||
347 | static void set_desc_eol(struct at_desc *desc) | ||
348 | { | ||
349 | desc->lli.ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; | ||
350 | desc->lli.dscr = 0; | ||
351 | } | ||
352 | |||
353 | #endif /* AT_HDMAC_REGS_H */ | ||
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index fb7da5141e96..d93017fc7872 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c | |||
@@ -38,6 +38,11 @@ module_param(max_channels, uint, S_IRUGO); | |||
38 | MODULE_PARM_DESC(max_channels, | 38 | MODULE_PARM_DESC(max_channels, |
39 | "Maximum number of channels to use (default: all)"); | 39 | "Maximum number of channels to use (default: all)"); |
40 | 40 | ||
41 | static unsigned int iterations; | ||
42 | module_param(iterations, uint, S_IRUGO); | ||
43 | MODULE_PARM_DESC(iterations, | ||
44 | "Iterations before stopping test (default: infinite)"); | ||
45 | |||
41 | static unsigned int xor_sources = 3; | 46 | static unsigned int xor_sources = 3; |
42 | module_param(xor_sources, uint, S_IRUGO); | 47 | module_param(xor_sources, uint, S_IRUGO); |
43 | MODULE_PARM_DESC(xor_sources, | 48 | MODULE_PARM_DESC(xor_sources, |
@@ -114,7 +119,7 @@ static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len) | |||
114 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); | 119 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); |
115 | for ( ; i < start + len; i++) | 120 | for ( ; i < start + len; i++) |
116 | buf[i] = PATTERN_SRC | PATTERN_COPY | 121 | buf[i] = PATTERN_SRC | PATTERN_COPY |
117 | | (~i & PATTERN_COUNT_MASK);; | 122 | | (~i & PATTERN_COUNT_MASK); |
118 | for ( ; i < test_buf_size; i++) | 123 | for ( ; i < test_buf_size; i++) |
119 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); | 124 | buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); |
120 | buf++; | 125 | buf++; |
@@ -270,7 +275,8 @@ static int dmatest_func(void *data) | |||
270 | 275 | ||
271 | flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT; | 276 | flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT; |
272 | 277 | ||
273 | while (!kthread_should_stop()) { | 278 | while (!kthread_should_stop() |
279 | && !(iterations && total_tests >= iterations)) { | ||
274 | struct dma_device *dev = chan->device; | 280 | struct dma_device *dev = chan->device; |
275 | struct dma_async_tx_descriptor *tx = NULL; | 281 | struct dma_async_tx_descriptor *tx = NULL; |
276 | dma_addr_t dma_srcs[src_cnt]; | 282 | dma_addr_t dma_srcs[src_cnt]; |
@@ -416,6 +422,13 @@ err_srcbuf: | |||
416 | err_srcs: | 422 | err_srcs: |
417 | pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", | 423 | pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", |
418 | thread_name, total_tests, failed_tests, ret); | 424 | thread_name, total_tests, failed_tests, ret); |
425 | |||
426 | if (iterations > 0) | ||
427 | while (!kthread_should_stop()) { | ||
428 | DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit); | ||
429 | interruptible_sleep_on(&wait_dmatest_exit); | ||
430 | } | ||
431 | |||
419 | return ret; | 432 | return ret; |
420 | } | 433 | } |
421 | 434 | ||
@@ -495,11 +508,11 @@ static int dmatest_add_channel(struct dma_chan *chan) | |||
495 | 508 | ||
496 | if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { | 509 | if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { |
497 | cnt = dmatest_add_threads(dtc, DMA_MEMCPY); | 510 | cnt = dmatest_add_threads(dtc, DMA_MEMCPY); |
498 | thread_count += cnt > 0 ?: 0; | 511 | thread_count += cnt > 0 ? cnt : 0; |
499 | } | 512 | } |
500 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { | 513 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { |
501 | cnt = dmatest_add_threads(dtc, DMA_XOR); | 514 | cnt = dmatest_add_threads(dtc, DMA_XOR); |
502 | thread_count += cnt > 0 ?: 0; | 515 | thread_count += cnt > 0 ? cnt : 0; |
503 | } | 516 | } |
504 | 517 | ||
505 | pr_info("dmatest: Started %u threads using %s\n", | 518 | pr_info("dmatest: Started %u threads using %s\n", |
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index f18d1bde0439..ef87a8984145 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c | |||
@@ -12,6 +12,11 @@ | |||
12 | * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. | 12 | * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. |
13 | * The support for MPC8349 DMA contorller is also added. | 13 | * The support for MPC8349 DMA contorller is also added. |
14 | * | 14 | * |
15 | * This driver instructs the DMA controller to issue the PCI Read Multiple | ||
16 | * command for PCI read operations, instead of using the default PCI Read Line | ||
17 | * command. Please be aware that this setting may result in read pre-fetching | ||
18 | * on some platforms. | ||
19 | * | ||
15 | * This is free software; you can redistribute it and/or modify | 20 | * This is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by | 21 | * it under the terms of the GNU General Public License as published by |
17 | * the Free Software Foundation; either version 2 of the License, or | 22 | * the Free Software Foundation; either version 2 of the License, or |
@@ -49,9 +54,10 @@ static void dma_init(struct fsl_dma_chan *fsl_chan) | |||
49 | case FSL_DMA_IP_83XX: | 54 | case FSL_DMA_IP_83XX: |
50 | /* Set the channel to below modes: | 55 | /* Set the channel to below modes: |
51 | * EOTIE - End-of-transfer interrupt enable | 56 | * EOTIE - End-of-transfer interrupt enable |
57 | * PRC_RM - PCI read multiple | ||
52 | */ | 58 | */ |
53 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE, | 59 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE |
54 | 32); | 60 | | FSL_DMA_MR_PRC_RM, 32); |
55 | break; | 61 | break; |
56 | } | 62 | } |
57 | 63 | ||
@@ -136,15 +142,16 @@ static int dma_is_idle(struct fsl_dma_chan *fsl_chan) | |||
136 | 142 | ||
137 | static void dma_start(struct fsl_dma_chan *fsl_chan) | 143 | static void dma_start(struct fsl_dma_chan *fsl_chan) |
138 | { | 144 | { |
139 | u32 mr_set = 0;; | 145 | u32 mr_set = 0; |
140 | 146 | ||
141 | if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { | 147 | if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { |
142 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32); | 148 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32); |
143 | mr_set |= FSL_DMA_MR_EMP_EN; | 149 | mr_set |= FSL_DMA_MR_EMP_EN; |
144 | } else | 150 | } else if ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { |
145 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, | 151 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, |
146 | DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | 152 | DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
147 | & ~FSL_DMA_MR_EMP_EN, 32); | 153 | & ~FSL_DMA_MR_EMP_EN, 32); |
154 | } | ||
148 | 155 | ||
149 | if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT) | 156 | if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT) |
150 | mr_set |= FSL_DMA_MR_EMS_EN; | 157 | mr_set |= FSL_DMA_MR_EMS_EN; |
@@ -871,9 +878,9 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, | |||
871 | 878 | ||
872 | switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) { | 879 | switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) { |
873 | case FSL_DMA_IP_85XX: | 880 | case FSL_DMA_IP_85XX: |
874 | new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; | ||
875 | new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; | 881 | new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; |
876 | case FSL_DMA_IP_83XX: | 882 | case FSL_DMA_IP_83XX: |
883 | new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; | ||
877 | new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; | 884 | new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; |
878 | new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; | 885 | new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; |
879 | } | 886 | } |
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index 4f21a512d848..dc7f26865797 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h | |||
@@ -38,6 +38,7 @@ | |||
38 | 38 | ||
39 | /* Special MR definition for MPC8349 */ | 39 | /* Special MR definition for MPC8349 */ |
40 | #define FSL_DMA_MR_EOTIE 0x00000080 | 40 | #define FSL_DMA_MR_EOTIE 0x00000080 |
41 | #define FSL_DMA_MR_PRC_RM 0x00000800 | ||
41 | 42 | ||
42 | #define FSL_DMA_SR_CH 0x00000020 | 43 | #define FSL_DMA_SR_CH 0x00000020 |
43 | #define FSL_DMA_SR_PE 0x00000010 | 44 | #define FSL_DMA_SR_PE 0x00000010 |
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index ddab94f51224..3f23eabe09f2 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c | |||
@@ -1176,7 +1176,7 @@ static int __devinit mv_xor_probe(struct platform_device *pdev) | |||
1176 | if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) | 1176 | if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) |
1177 | dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; | 1177 | dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; |
1178 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { | 1178 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { |
1179 | dma_dev->max_xor = 8; ; | 1179 | dma_dev->max_xor = 8; |
1180 | dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; | 1180 | dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; |
1181 | } | 1181 | } |
1182 | 1182 | ||
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 24964c1d0af9..e2a10bcba7a1 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -868,6 +868,8 @@ static void amd64_read_dbam_reg(struct amd64_pvt *pvt) | |||
868 | goto err_reg; | 868 | goto err_reg; |
869 | } | 869 | } |
870 | 870 | ||
871 | return; | ||
872 | |||
871 | err_reg: | 873 | err_reg: |
872 | debugf0("Error reading F2x%03x.\n", reg); | 874 | debugf0("Error reading F2x%03x.\n", reg); |
873 | } | 875 | } |
@@ -2634,6 +2636,8 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) | |||
2634 | 2636 | ||
2635 | amd64_dump_misc_regs(pvt); | 2637 | amd64_dump_misc_regs(pvt); |
2636 | 2638 | ||
2639 | return; | ||
2640 | |||
2637 | err_reg: | 2641 | err_reg: |
2638 | debugf0("Reading an MC register failed\n"); | 2642 | debugf0("Reading an MC register failed\n"); |
2639 | 2643 | ||
@@ -2977,6 +2981,9 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) | |||
2977 | "ECC is enabled by BIOS, Proceeding " | 2981 | "ECC is enabled by BIOS, Proceeding " |
2978 | "with EDAC module initialization\n"); | 2982 | "with EDAC module initialization\n"); |
2979 | 2983 | ||
2984 | /* Signal good ECC status */ | ||
2985 | ret = 0; | ||
2986 | |||
2980 | /* CLEAR the override, since BIOS controlled it */ | 2987 | /* CLEAR the override, since BIOS controlled it */ |
2981 | ecc_enable_override = 0; | 2988 | ecc_enable_override = 0; |
2982 | } | 2989 | } |
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 8fab7890a363..33be210d6723 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c | |||
@@ -1461,7 +1461,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, | |||
1461 | goto out; | 1461 | goto out; |
1462 | } | 1462 | } |
1463 | 1463 | ||
1464 | if (crtc_req->count_connectors > 0 && !mode && !fb) { | 1464 | if (crtc_req->count_connectors > 0 && (!mode || !fb)) { |
1465 | DRM_DEBUG("Count connectors is %d but no mode or fb set\n", | 1465 | DRM_DEBUG("Count connectors is %d but no mode or fb set\n", |
1466 | crtc_req->count_connectors); | 1466 | crtc_req->count_connectors); |
1467 | ret = -EINVAL; | 1467 | ret = -EINVAL; |
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 3da9cfa31848..6aaa2cb23365 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c | |||
@@ -706,8 +706,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) | |||
706 | struct drm_encoder **save_encoders, *new_encoder; | 706 | struct drm_encoder **save_encoders, *new_encoder; |
707 | struct drm_framebuffer *old_fb = NULL; | 707 | struct drm_framebuffer *old_fb = NULL; |
708 | bool save_enabled; | 708 | bool save_enabled; |
709 | bool mode_changed = false; | 709 | bool mode_changed = false; /* if true do a full mode set */ |
710 | bool fb_changed = false; | 710 | bool fb_changed = false; /* if true and !mode_changed just do a flip */ |
711 | struct drm_connector *connector; | 711 | struct drm_connector *connector; |
712 | int count = 0, ro, fail = 0; | 712 | int count = 0, ro, fail = 0; |
713 | struct drm_crtc_helper_funcs *crtc_funcs; | 713 | struct drm_crtc_helper_funcs *crtc_funcs; |
@@ -758,6 +758,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) | |||
758 | if (set->crtc->fb == NULL) { | 758 | if (set->crtc->fb == NULL) { |
759 | DRM_DEBUG("crtc has no fb, full mode set\n"); | 759 | DRM_DEBUG("crtc has no fb, full mode set\n"); |
760 | mode_changed = true; | 760 | mode_changed = true; |
761 | } else if (set->fb == NULL) { | ||
762 | mode_changed = true; | ||
761 | } else if ((set->fb->bits_per_pixel != | 763 | } else if ((set->fb->bits_per_pixel != |
762 | set->crtc->fb->bits_per_pixel) || | 764 | set->crtc->fb->bits_per_pixel) || |
763 | set->fb->depth != set->crtc->fb->depth) | 765 | set->fb->depth != set->crtc->fb->depth) |
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 05a44896dffb..f1ba8ff41130 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c | |||
@@ -722,13 +722,14 @@ int r100_cs_packet_parse(struct radeon_cs_parser *p, | |||
722 | unsigned idx) | 722 | unsigned idx) |
723 | { | 723 | { |
724 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; | 724 | struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; |
725 | uint32_t header = ib_chunk->kdata[idx]; | 725 | uint32_t header; |
726 | 726 | ||
727 | if (idx >= ib_chunk->length_dw) { | 727 | if (idx >= ib_chunk->length_dw) { |
728 | DRM_ERROR("Can not parse packet at %d after CS end %d !\n", | 728 | DRM_ERROR("Can not parse packet at %d after CS end %d !\n", |
729 | idx, ib_chunk->length_dw); | 729 | idx, ib_chunk->length_dw); |
730 | return -EINVAL; | 730 | return -EINVAL; |
731 | } | 731 | } |
732 | header = ib_chunk->kdata[idx]; | ||
732 | pkt->idx = idx; | 733 | pkt->idx = idx; |
733 | pkt->type = CP_PACKET_GET_TYPE(header); | 734 | pkt->type = CP_PACKET_GET_TYPE(header); |
734 | pkt->count = CP_PACKET_GET_COUNT(header); | 735 | pkt->count = CP_PACKET_GET_COUNT(header); |
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 3cfcee17dc56..0bd5879a4957 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c | |||
@@ -318,6 +318,14 @@ static int __init radeon_init(void) | |||
318 | driver = &driver_old; | 318 | driver = &driver_old; |
319 | driver->num_ioctls = radeon_max_ioctl; | 319 | driver->num_ioctls = radeon_max_ioctl; |
320 | #if defined(CONFIG_DRM_RADEON_KMS) | 320 | #if defined(CONFIG_DRM_RADEON_KMS) |
321 | #ifdef CONFIG_VGA_CONSOLE | ||
322 | if (vgacon_text_force() && radeon_modeset == -1) { | ||
323 | DRM_INFO("VGACON disable radeon kernel modesetting.\n"); | ||
324 | driver = &driver_old; | ||
325 | driver->driver_features &= ~DRIVER_MODESET; | ||
326 | radeon_modeset = 0; | ||
327 | } | ||
328 | #endif | ||
321 | /* if enabled by default */ | 329 | /* if enabled by default */ |
322 | if (radeon_modeset == -1) { | 330 | if (radeon_modeset == -1) { |
323 | DRM_INFO("radeon default to kernel modesetting.\n"); | 331 | DRM_INFO("radeon default to kernel modesetting.\n"); |
@@ -329,17 +337,8 @@ static int __init radeon_init(void) | |||
329 | driver->driver_features |= DRIVER_MODESET; | 337 | driver->driver_features |= DRIVER_MODESET; |
330 | driver->num_ioctls = radeon_max_kms_ioctl; | 338 | driver->num_ioctls = radeon_max_kms_ioctl; |
331 | } | 339 | } |
332 | |||
333 | /* if the vga console setting is enabled still | 340 | /* if the vga console setting is enabled still |
334 | * let modprobe override it */ | 341 | * let modprobe override it */ |
335 | #ifdef CONFIG_VGA_CONSOLE | ||
336 | if (vgacon_text_force() && radeon_modeset == -1) { | ||
337 | DRM_INFO("VGACON disable radeon kernel modesetting.\n"); | ||
338 | driver = &driver_old; | ||
339 | driver->driver_features &= ~DRIVER_MODESET; | ||
340 | radeon_modeset = 0; | ||
341 | } | ||
342 | #endif | ||
343 | #endif | 342 | #endif |
344 | return drm_init(driver); | 343 | return drm_init(driver); |
345 | } | 344 | } |
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 937a2f1cdb46..3357110e30ce 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c | |||
@@ -58,6 +58,8 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) | |||
58 | if (r) { | 58 | if (r) { |
59 | DRM_ERROR("Failed to initialize radeon, disabling IOCTL\n"); | 59 | DRM_ERROR("Failed to initialize radeon, disabling IOCTL\n"); |
60 | radeon_device_fini(rdev); | 60 | radeon_device_fini(rdev); |
61 | kfree(rdev); | ||
62 | dev->dev_private = NULL; | ||
61 | return r; | 63 | return r; |
62 | } | 64 | } |
63 | return 0; | 65 | return 0; |
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 551e608702e4..fd8f3ca716ea 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c | |||
@@ -370,6 +370,7 @@ void rv515_vram_info(struct radeon_device *rdev) | |||
370 | 370 | ||
371 | rv515_vram_get_type(rdev); | 371 | rv515_vram_get_type(rdev); |
372 | 372 | ||
373 | r100_vram_init_sizes(rdev); | ||
373 | /* FIXME: we should enforce default clock in case GPU is not in | 374 | /* FIXME: we should enforce default clock in case GPU is not in |
374 | * default setup | 375 | * default setup |
375 | */ | 376 | */ |
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6538d4236989..c2b0d710d10f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c | |||
@@ -1182,13 +1182,14 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, | |||
1182 | 1182 | ||
1183 | int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) | 1183 | int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) |
1184 | { | 1184 | { |
1185 | struct ttm_mem_type_manager *man = &bdev->man[mem_type]; | 1185 | struct ttm_mem_type_manager *man; |
1186 | int ret = -EINVAL; | 1186 | int ret = -EINVAL; |
1187 | 1187 | ||
1188 | if (mem_type >= TTM_NUM_MEM_TYPES) { | 1188 | if (mem_type >= TTM_NUM_MEM_TYPES) { |
1189 | printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", mem_type); | 1189 | printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", mem_type); |
1190 | return ret; | 1190 | return ret; |
1191 | } | 1191 | } |
1192 | man = &bdev->man[mem_type]; | ||
1192 | 1193 | ||
1193 | if (!man->has_type) { | 1194 | if (!man->has_type) { |
1194 | printk(KERN_ERR TTM_PFX "Trying to take down uninitialized " | 1195 | printk(KERN_ERR TTM_PFX "Trying to take down uninitialized " |
@@ -1575,6 +1576,10 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, | |||
1575 | driver->sync_obj_unref(&sync_obj); | 1576 | driver->sync_obj_unref(&sync_obj); |
1576 | driver->sync_obj_unref(&tmp_obj); | 1577 | driver->sync_obj_unref(&tmp_obj); |
1577 | spin_lock(&bo->lock); | 1578 | spin_lock(&bo->lock); |
1579 | } else { | ||
1580 | spin_unlock(&bo->lock); | ||
1581 | driver->sync_obj_unref(&sync_obj); | ||
1582 | spin_lock(&bo->lock); | ||
1578 | } | 1583 | } |
1579 | } | 1584 | } |
1580 | return 0; | 1585 | return 0; |
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ce2e6f38ea01..ad4ada07c6cf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c | |||
@@ -150,7 +150,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, | |||
150 | #ifdef CONFIG_X86 | 150 | #ifdef CONFIG_X86 |
151 | dst = kmap_atomic_prot(d, KM_USER0, prot); | 151 | dst = kmap_atomic_prot(d, KM_USER0, prot); |
152 | #else | 152 | #else |
153 | if (prot != PAGE_KERNEL) | 153 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
154 | dst = vmap(&d, 1, 0, prot); | 154 | dst = vmap(&d, 1, 0, prot); |
155 | else | 155 | else |
156 | dst = kmap(d); | 156 | dst = kmap(d); |
@@ -163,7 +163,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, | |||
163 | #ifdef CONFIG_X86 | 163 | #ifdef CONFIG_X86 |
164 | kunmap_atomic(dst, KM_USER0); | 164 | kunmap_atomic(dst, KM_USER0); |
165 | #else | 165 | #else |
166 | if (prot != PAGE_KERNEL) | 166 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
167 | vunmap(dst); | 167 | vunmap(dst); |
168 | else | 168 | else |
169 | kunmap(d); | 169 | kunmap(d); |
@@ -186,7 +186,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, | |||
186 | #ifdef CONFIG_X86 | 186 | #ifdef CONFIG_X86 |
187 | src = kmap_atomic_prot(s, KM_USER0, prot); | 187 | src = kmap_atomic_prot(s, KM_USER0, prot); |
188 | #else | 188 | #else |
189 | if (prot != PAGE_KERNEL) | 189 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
190 | src = vmap(&s, 1, 0, prot); | 190 | src = vmap(&s, 1, 0, prot); |
191 | else | 191 | else |
192 | src = kmap(s); | 192 | src = kmap(s); |
@@ -199,7 +199,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, | |||
199 | #ifdef CONFIG_X86 | 199 | #ifdef CONFIG_X86 |
200 | kunmap_atomic(src, KM_USER0); | 200 | kunmap_atomic(src, KM_USER0); |
201 | #else | 201 | #else |
202 | if (prot != PAGE_KERNEL) | 202 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) |
203 | vunmap(src); | 203 | vunmap(src); |
204 | else | 204 | else |
205 | kunmap(s); | 205 | kunmap(s); |
diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index b587e2d576ac..820e51673b26 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c | |||
@@ -296,7 +296,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) | |||
296 | priv->tseq[3] = 0; | 296 | priv->tseq[3] = 0; |
297 | if (mlc->opacket & HIL_CTRL_APE) { | 297 | if (mlc->opacket & HIL_CTRL_APE) { |
298 | priv->tseq[3] |= HP_SDC_LPC_APE_IPF; | 298 | priv->tseq[3] |= HP_SDC_LPC_APE_IPF; |
299 | down_trylock(&mlc->csem); | 299 | BUG_ON(down_trylock(&mlc->csem)); |
300 | } | 300 | } |
301 | enqueue: | 301 | enqueue: |
302 | hp_sdc_enqueue_transaction(&priv->trans); | 302 | hp_sdc_enqueue_transaction(&priv->trans); |
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index c3b661a666cb..7e5f30dbc0a0 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c | |||
@@ -1480,7 +1480,7 @@ l1oip_init(void) | |||
1480 | return -ENOMEM; | 1480 | return -ENOMEM; |
1481 | 1481 | ||
1482 | l1oip_cnt = 0; | 1482 | l1oip_cnt = 0; |
1483 | while (type[l1oip_cnt] && l1oip_cnt < MAX_CARDS) { | 1483 | while (l1oip_cnt < MAX_CARDS && type[l1oip_cnt]) { |
1484 | switch (type[l1oip_cnt] & 0xff) { | 1484 | switch (type[l1oip_cnt] & 0xff) { |
1485 | case 1: | 1485 | case 1: |
1486 | pri = 0; | 1486 | pri = 0; |
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index a6974e9b8ebf..1e2cb846b3c9 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -1,6 +1,8 @@ | |||
1 | /*P:400 This contains run_guest() which actually calls into the Host<->Guest | 1 | /*P:400 |
2 | * This contains run_guest() which actually calls into the Host<->Guest | ||
2 | * Switcher and analyzes the return, such as determining if the Guest wants the | 3 | * Switcher and analyzes the return, such as determining if the Guest wants the |
3 | * Host to do something. This file also contains useful helper routines. :*/ | 4 | * Host to do something. This file also contains useful helper routines. |
5 | :*/ | ||
4 | #include <linux/module.h> | 6 | #include <linux/module.h> |
5 | #include <linux/stringify.h> | 7 | #include <linux/stringify.h> |
6 | #include <linux/stddef.h> | 8 | #include <linux/stddef.h> |
@@ -24,7 +26,8 @@ static struct page **switcher_page; | |||
24 | /* This One Big lock protects all inter-guest data structures. */ | 26 | /* This One Big lock protects all inter-guest data structures. */ |
25 | DEFINE_MUTEX(lguest_lock); | 27 | DEFINE_MUTEX(lguest_lock); |
26 | 28 | ||
27 | /*H:010 We need to set up the Switcher at a high virtual address. Remember the | 29 | /*H:010 |
30 | * We need to set up the Switcher at a high virtual address. Remember the | ||
28 | * Switcher is a few hundred bytes of assembler code which actually changes the | 31 | * Switcher is a few hundred bytes of assembler code which actually changes the |
29 | * CPU to run the Guest, and then changes back to the Host when a trap or | 32 | * CPU to run the Guest, and then changes back to the Host when a trap or |
30 | * interrupt happens. | 33 | * interrupt happens. |
@@ -33,7 +36,8 @@ DEFINE_MUTEX(lguest_lock); | |||
33 | * Host since it will be running as the switchover occurs. | 36 | * Host since it will be running as the switchover occurs. |
34 | * | 37 | * |
35 | * Trying to map memory at a particular address is an unusual thing to do, so | 38 | * Trying to map memory at a particular address is an unusual thing to do, so |
36 | * it's not a simple one-liner. */ | 39 | * it's not a simple one-liner. |
40 | */ | ||
37 | static __init int map_switcher(void) | 41 | static __init int map_switcher(void) |
38 | { | 42 | { |
39 | int i, err; | 43 | int i, err; |
@@ -47,8 +51,10 @@ static __init int map_switcher(void) | |||
47 | * easy. | 51 | * easy. |
48 | */ | 52 | */ |
49 | 53 | ||
50 | /* We allocate an array of struct page pointers. map_vm_area() wants | 54 | /* |
51 | * this, rather than just an array of pages. */ | 55 | * We allocate an array of struct page pointers. map_vm_area() wants |
56 | * this, rather than just an array of pages. | ||
57 | */ | ||
52 | switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, | 58 | switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, |
53 | GFP_KERNEL); | 59 | GFP_KERNEL); |
54 | if (!switcher_page) { | 60 | if (!switcher_page) { |
@@ -56,8 +62,10 @@ static __init int map_switcher(void) | |||
56 | goto out; | 62 | goto out; |
57 | } | 63 | } |
58 | 64 | ||
59 | /* Now we actually allocate the pages. The Guest will see these pages, | 65 | /* |
60 | * so we make sure they're zeroed. */ | 66 | * Now we actually allocate the pages. The Guest will see these pages, |
67 | * so we make sure they're zeroed. | ||
68 | */ | ||
61 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { | 69 | for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { |
62 | unsigned long addr = get_zeroed_page(GFP_KERNEL); | 70 | unsigned long addr = get_zeroed_page(GFP_KERNEL); |
63 | if (!addr) { | 71 | if (!addr) { |
@@ -67,19 +75,23 @@ static __init int map_switcher(void) | |||
67 | switcher_page[i] = virt_to_page(addr); | 75 | switcher_page[i] = virt_to_page(addr); |
68 | } | 76 | } |
69 | 77 | ||
70 | /* First we check that the Switcher won't overlap the fixmap area at | 78 | /* |
79 | * First we check that the Switcher won't overlap the fixmap area at | ||
71 | * the top of memory. It's currently nowhere near, but it could have | 80 | * the top of memory. It's currently nowhere near, but it could have |
72 | * very strange effects if it ever happened. */ | 81 | * very strange effects if it ever happened. |
82 | */ | ||
73 | if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ | 83 | if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ |
74 | err = -ENOMEM; | 84 | err = -ENOMEM; |
75 | printk("lguest: mapping switcher would thwack fixmap\n"); | 85 | printk("lguest: mapping switcher would thwack fixmap\n"); |
76 | goto free_pages; | 86 | goto free_pages; |
77 | } | 87 | } |
78 | 88 | ||
79 | /* Now we reserve the "virtual memory area" we want: 0xFFC00000 | 89 | /* |
90 | * Now we reserve the "virtual memory area" we want: 0xFFC00000 | ||
80 | * (SWITCHER_ADDR). We might not get it in theory, but in practice | 91 | * (SWITCHER_ADDR). We might not get it in theory, but in practice |
81 | * it's worked so far. The end address needs +1 because __get_vm_area | 92 | * it's worked so far. The end address needs +1 because __get_vm_area |
82 | * allocates an extra guard page, so we need space for that. */ | 93 | * allocates an extra guard page, so we need space for that. |
94 | */ | ||
83 | switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, | 95 | switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, |
84 | VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR | 96 | VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR |
85 | + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); | 97 | + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); |
@@ -89,11 +101,13 @@ static __init int map_switcher(void) | |||
89 | goto free_pages; | 101 | goto free_pages; |
90 | } | 102 | } |
91 | 103 | ||
92 | /* This code actually sets up the pages we've allocated to appear at | 104 | /* |
105 | * This code actually sets up the pages we've allocated to appear at | ||
93 | * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the | 106 | * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the |
94 | * kind of pages we're mapping (kernel pages), and a pointer to our | 107 | * kind of pages we're mapping (kernel pages), and a pointer to our |
95 | * array of struct pages. It increments that pointer, but we don't | 108 | * array of struct pages. It increments that pointer, but we don't |
96 | * care. */ | 109 | * care. |
110 | */ | ||
97 | pagep = switcher_page; | 111 | pagep = switcher_page; |
98 | err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); | 112 | err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); |
99 | if (err) { | 113 | if (err) { |
@@ -101,8 +115,10 @@ static __init int map_switcher(void) | |||
101 | goto free_vma; | 115 | goto free_vma; |
102 | } | 116 | } |
103 | 117 | ||
104 | /* Now the Switcher is mapped at the right address, we can't fail! | 118 | /* |
105 | * Copy in the compiled-in Switcher code (from <arch>_switcher.S). */ | 119 | * Now the Switcher is mapped at the right address, we can't fail! |
120 | * Copy in the compiled-in Switcher code (from <arch>_switcher.S). | ||
121 | */ | ||
106 | memcpy(switcher_vma->addr, start_switcher_text, | 122 | memcpy(switcher_vma->addr, start_switcher_text, |
107 | end_switcher_text - start_switcher_text); | 123 | end_switcher_text - start_switcher_text); |
108 | 124 | ||
@@ -124,8 +140,7 @@ out: | |||
124 | } | 140 | } |
125 | /*:*/ | 141 | /*:*/ |
126 | 142 | ||
127 | /* Cleaning up the mapping when the module is unloaded is almost... | 143 | /* Cleaning up the mapping when the module is unloaded is almost... too easy. */ |
128 | * too easy. */ | ||
129 | static void unmap_switcher(void) | 144 | static void unmap_switcher(void) |
130 | { | 145 | { |
131 | unsigned int i; | 146 | unsigned int i; |
@@ -151,16 +166,19 @@ static void unmap_switcher(void) | |||
151 | * But we can't trust the Guest: it might be trying to access the Launcher | 166 | * But we can't trust the Guest: it might be trying to access the Launcher |
152 | * code. We have to check that the range is below the pfn_limit the Launcher | 167 | * code. We have to check that the range is below the pfn_limit the Launcher |
153 | * gave us. We have to make sure that addr + len doesn't give us a false | 168 | * gave us. We have to make sure that addr + len doesn't give us a false |
154 | * positive by overflowing, too. */ | 169 | * positive by overflowing, too. |
170 | */ | ||
155 | bool lguest_address_ok(const struct lguest *lg, | 171 | bool lguest_address_ok(const struct lguest *lg, |
156 | unsigned long addr, unsigned long len) | 172 | unsigned long addr, unsigned long len) |
157 | { | 173 | { |
158 | return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); | 174 | return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); |
159 | } | 175 | } |
160 | 176 | ||
161 | /* This routine copies memory from the Guest. Here we can see how useful the | 177 | /* |
178 | * This routine copies memory from the Guest. Here we can see how useful the | ||
162 | * kill_lguest() routine we met in the Launcher can be: we return a random | 179 | * kill_lguest() routine we met in the Launcher can be: we return a random |
163 | * value (all zeroes) instead of needing to return an error. */ | 180 | * value (all zeroes) instead of needing to return an error. |
181 | */ | ||
164 | void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) | 182 | void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) |
165 | { | 183 | { |
166 | if (!lguest_address_ok(cpu->lg, addr, bytes) | 184 | if (!lguest_address_ok(cpu->lg, addr, bytes) |
@@ -181,9 +199,11 @@ void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, | |||
181 | } | 199 | } |
182 | /*:*/ | 200 | /*:*/ |
183 | 201 | ||
184 | /*H:030 Let's jump straight to the the main loop which runs the Guest. | 202 | /*H:030 |
203 | * Let's jump straight to the the main loop which runs the Guest. | ||
185 | * Remember, this is called by the Launcher reading /dev/lguest, and we keep | 204 | * Remember, this is called by the Launcher reading /dev/lguest, and we keep |
186 | * going around and around until something interesting happens. */ | 205 | * going around and around until something interesting happens. |
206 | */ | ||
187 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | 207 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user) |
188 | { | 208 | { |
189 | /* We stop running once the Guest is dead. */ | 209 | /* We stop running once the Guest is dead. */ |
@@ -195,10 +215,17 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
195 | if (cpu->hcall) | 215 | if (cpu->hcall) |
196 | do_hypercalls(cpu); | 216 | do_hypercalls(cpu); |
197 | 217 | ||
198 | /* It's possible the Guest did a NOTIFY hypercall to the | 218 | /* |
199 | * Launcher, in which case we return from the read() now. */ | 219 | * It's possible the Guest did a NOTIFY hypercall to the |
220 | * Launcher. | ||
221 | */ | ||
200 | if (cpu->pending_notify) { | 222 | if (cpu->pending_notify) { |
223 | /* | ||
224 | * Does it just needs to write to a registered | ||
225 | * eventfd (ie. the appropriate virtqueue thread)? | ||
226 | */ | ||
201 | if (!send_notify_to_eventfd(cpu)) { | 227 | if (!send_notify_to_eventfd(cpu)) { |
228 | /* OK, we tell the main Laucher. */ | ||
202 | if (put_user(cpu->pending_notify, user)) | 229 | if (put_user(cpu->pending_notify, user)) |
203 | return -EFAULT; | 230 | return -EFAULT; |
204 | return sizeof(cpu->pending_notify); | 231 | return sizeof(cpu->pending_notify); |
@@ -209,29 +236,39 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
209 | if (signal_pending(current)) | 236 | if (signal_pending(current)) |
210 | return -ERESTARTSYS; | 237 | return -ERESTARTSYS; |
211 | 238 | ||
212 | /* Check if there are any interrupts which can be delivered now: | 239 | /* |
240 | * Check if there are any interrupts which can be delivered now: | ||
213 | * if so, this sets up the hander to be executed when we next | 241 | * if so, this sets up the hander to be executed when we next |
214 | * run the Guest. */ | 242 | * run the Guest. |
243 | */ | ||
215 | irq = interrupt_pending(cpu, &more); | 244 | irq = interrupt_pending(cpu, &more); |
216 | if (irq < LGUEST_IRQS) | 245 | if (irq < LGUEST_IRQS) |
217 | try_deliver_interrupt(cpu, irq, more); | 246 | try_deliver_interrupt(cpu, irq, more); |
218 | 247 | ||
219 | /* All long-lived kernel loops need to check with this horrible | 248 | /* |
249 | * All long-lived kernel loops need to check with this horrible | ||
220 | * thing called the freezer. If the Host is trying to suspend, | 250 | * thing called the freezer. If the Host is trying to suspend, |
221 | * it stops us. */ | 251 | * it stops us. |
252 | */ | ||
222 | try_to_freeze(); | 253 | try_to_freeze(); |
223 | 254 | ||
224 | /* Just make absolutely sure the Guest is still alive. One of | 255 | /* |
225 | * those hypercalls could have been fatal, for example. */ | 256 | * Just make absolutely sure the Guest is still alive. One of |
257 | * those hypercalls could have been fatal, for example. | ||
258 | */ | ||
226 | if (cpu->lg->dead) | 259 | if (cpu->lg->dead) |
227 | break; | 260 | break; |
228 | 261 | ||
229 | /* If the Guest asked to be stopped, we sleep. The Guest's | 262 | /* |
230 | * clock timer will wake us. */ | 263 | * If the Guest asked to be stopped, we sleep. The Guest's |
264 | * clock timer will wake us. | ||
265 | */ | ||
231 | if (cpu->halted) { | 266 | if (cpu->halted) { |
232 | set_current_state(TASK_INTERRUPTIBLE); | 267 | set_current_state(TASK_INTERRUPTIBLE); |
233 | /* Just before we sleep, make sure no interrupt snuck in | 268 | /* |
234 | * which we should be doing. */ | 269 | * Just before we sleep, make sure no interrupt snuck in |
270 | * which we should be doing. | ||
271 | */ | ||
235 | if (interrupt_pending(cpu, &more) < LGUEST_IRQS) | 272 | if (interrupt_pending(cpu, &more) < LGUEST_IRQS) |
236 | set_current_state(TASK_RUNNING); | 273 | set_current_state(TASK_RUNNING); |
237 | else | 274 | else |
@@ -239,8 +276,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) | |||
239 | continue; | 276 | continue; |
240 | } | 277 | } |
241 | 278 | ||
242 | /* OK, now we're ready to jump into the Guest. First we put up | 279 | /* |
243 | * the "Do Not Disturb" sign: */ | 280 | * OK, now we're ready to jump into the Guest. First we put up |
281 | * the "Do Not Disturb" sign: | ||
282 | */ | ||
244 | local_irq_disable(); | 283 | local_irq_disable(); |
245 | 284 | ||
246 | /* Actually run the Guest until something happens. */ | 285 | /* Actually run the Guest until something happens. */ |
@@ -327,8 +366,10 @@ static void __exit fini(void) | |||
327 | } | 366 | } |
328 | /*:*/ | 367 | /*:*/ |
329 | 368 | ||
330 | /* The Host side of lguest can be a module. This is a nice way for people to | 369 | /* |
331 | * play with it. */ | 370 | * The Host side of lguest can be a module. This is a nice way for people to |
371 | * play with it. | ||
372 | */ | ||
332 | module_init(init); | 373 | module_init(init); |
333 | module_exit(fini); | 374 | module_exit(fini); |
334 | MODULE_LICENSE("GPL"); | 375 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index c29ffa19cb74..83511eb0923d 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c | |||
@@ -1,8 +1,10 @@ | |||
1 | /*P:500 Just as userspace programs request kernel operations through a system | 1 | /*P:500 |
2 | * Just as userspace programs request kernel operations through a system | ||
2 | * call, the Guest requests Host operations through a "hypercall". You might | 3 | * call, the Guest requests Host operations through a "hypercall". You might |
3 | * notice this nomenclature doesn't really follow any logic, but the name has | 4 | * notice this nomenclature doesn't really follow any logic, but the name has |
4 | * been around for long enough that we're stuck with it. As you'd expect, this | 5 | * been around for long enough that we're stuck with it. As you'd expect, this |
5 | * code is basically a one big switch statement. :*/ | 6 | * code is basically a one big switch statement. |
7 | :*/ | ||
6 | 8 | ||
7 | /* Copyright (C) 2006 Rusty Russell IBM Corporation | 9 | /* Copyright (C) 2006 Rusty Russell IBM Corporation |
8 | 10 | ||
@@ -28,30 +30,41 @@ | |||
28 | #include <asm/pgtable.h> | 30 | #include <asm/pgtable.h> |
29 | #include "lg.h" | 31 | #include "lg.h" |
30 | 32 | ||
31 | /*H:120 This is the core hypercall routine: where the Guest gets what it wants. | 33 | /*H:120 |
32 | * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. */ | 34 | * This is the core hypercall routine: where the Guest gets what it wants. |
35 | * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. | ||
36 | */ | ||
33 | static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | 37 | static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) |
34 | { | 38 | { |
35 | switch (args->arg0) { | 39 | switch (args->arg0) { |
36 | case LHCALL_FLUSH_ASYNC: | 40 | case LHCALL_FLUSH_ASYNC: |
37 | /* This call does nothing, except by breaking out of the Guest | 41 | /* |
38 | * it makes us process all the asynchronous hypercalls. */ | 42 | * This call does nothing, except by breaking out of the Guest |
43 | * it makes us process all the asynchronous hypercalls. | ||
44 | */ | ||
39 | break; | 45 | break; |
40 | case LHCALL_SEND_INTERRUPTS: | 46 | case LHCALL_SEND_INTERRUPTS: |
41 | /* This call does nothing too, but by breaking out of the Guest | 47 | /* |
42 | * it makes us process any pending interrupts. */ | 48 | * This call does nothing too, but by breaking out of the Guest |
49 | * it makes us process any pending interrupts. | ||
50 | */ | ||
43 | break; | 51 | break; |
44 | case LHCALL_LGUEST_INIT: | 52 | case LHCALL_LGUEST_INIT: |
45 | /* You can't get here unless you're already initialized. Don't | 53 | /* |
46 | * do that. */ | 54 | * You can't get here unless you're already initialized. Don't |
55 | * do that. | ||
56 | */ | ||
47 | kill_guest(cpu, "already have lguest_data"); | 57 | kill_guest(cpu, "already have lguest_data"); |
48 | break; | 58 | break; |
49 | case LHCALL_SHUTDOWN: { | 59 | case LHCALL_SHUTDOWN: { |
50 | /* Shutdown is such a trivial hypercall that we do it in four | ||
51 | * lines right here. */ | ||
52 | char msg[128]; | 60 | char msg[128]; |
53 | /* If the lgread fails, it will call kill_guest() itself; the | 61 | /* |
54 | * kill_guest() with the message will be ignored. */ | 62 | * Shutdown is such a trivial hypercall that we do it in five |
63 | * lines right here. | ||
64 | * | ||
65 | * If the lgread fails, it will call kill_guest() itself; the | ||
66 | * kill_guest() with the message will be ignored. | ||
67 | */ | ||
55 | __lgread(cpu, msg, args->arg1, sizeof(msg)); | 68 | __lgread(cpu, msg, args->arg1, sizeof(msg)); |
56 | msg[sizeof(msg)-1] = '\0'; | 69 | msg[sizeof(msg)-1] = '\0'; |
57 | kill_guest(cpu, "CRASH: %s", msg); | 70 | kill_guest(cpu, "CRASH: %s", msg); |
@@ -60,16 +73,17 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | |||
60 | break; | 73 | break; |
61 | } | 74 | } |
62 | case LHCALL_FLUSH_TLB: | 75 | case LHCALL_FLUSH_TLB: |
63 | /* FLUSH_TLB comes in two flavors, depending on the | 76 | /* FLUSH_TLB comes in two flavors, depending on the argument: */ |
64 | * argument: */ | ||
65 | if (args->arg1) | 77 | if (args->arg1) |
66 | guest_pagetable_clear_all(cpu); | 78 | guest_pagetable_clear_all(cpu); |
67 | else | 79 | else |
68 | guest_pagetable_flush_user(cpu); | 80 | guest_pagetable_flush_user(cpu); |
69 | break; | 81 | break; |
70 | 82 | ||
71 | /* All these calls simply pass the arguments through to the right | 83 | /* |
72 | * routines. */ | 84 | * All these calls simply pass the arguments through to the right |
85 | * routines. | ||
86 | */ | ||
73 | case LHCALL_NEW_PGTABLE: | 87 | case LHCALL_NEW_PGTABLE: |
74 | guest_new_pagetable(cpu, args->arg1); | 88 | guest_new_pagetable(cpu, args->arg1); |
75 | break; | 89 | break; |
@@ -112,15 +126,16 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) | |||
112 | kill_guest(cpu, "Bad hypercall %li\n", args->arg0); | 126 | kill_guest(cpu, "Bad hypercall %li\n", args->arg0); |
113 | } | 127 | } |
114 | } | 128 | } |
115 | /*:*/ | ||
116 | 129 | ||
117 | /*H:124 Asynchronous hypercalls are easy: we just look in the array in the | 130 | /*H:124 |
131 | * Asynchronous hypercalls are easy: we just look in the array in the | ||
118 | * Guest's "struct lguest_data" to see if any new ones are marked "ready". | 132 | * Guest's "struct lguest_data" to see if any new ones are marked "ready". |
119 | * | 133 | * |
120 | * We are careful to do these in order: obviously we respect the order the | 134 | * We are careful to do these in order: obviously we respect the order the |
121 | * Guest put them in the ring, but we also promise the Guest that they will | 135 | * Guest put them in the ring, but we also promise the Guest that they will |
122 | * happen before any normal hypercall (which is why we check this before | 136 | * happen before any normal hypercall (which is why we check this before |
123 | * checking for a normal hcall). */ | 137 | * checking for a normal hcall). |
138 | */ | ||
124 | static void do_async_hcalls(struct lg_cpu *cpu) | 139 | static void do_async_hcalls(struct lg_cpu *cpu) |
125 | { | 140 | { |
126 | unsigned int i; | 141 | unsigned int i; |
@@ -133,22 +148,28 @@ static void do_async_hcalls(struct lg_cpu *cpu) | |||
133 | /* We process "struct lguest_data"s hcalls[] ring once. */ | 148 | /* We process "struct lguest_data"s hcalls[] ring once. */ |
134 | for (i = 0; i < ARRAY_SIZE(st); i++) { | 149 | for (i = 0; i < ARRAY_SIZE(st); i++) { |
135 | struct hcall_args args; | 150 | struct hcall_args args; |
136 | /* We remember where we were up to from last time. This makes | 151 | /* |
152 | * We remember where we were up to from last time. This makes | ||
137 | * sure that the hypercalls are done in the order the Guest | 153 | * sure that the hypercalls are done in the order the Guest |
138 | * places them in the ring. */ | 154 | * places them in the ring. |
155 | */ | ||
139 | unsigned int n = cpu->next_hcall; | 156 | unsigned int n = cpu->next_hcall; |
140 | 157 | ||
141 | /* 0xFF means there's no call here (yet). */ | 158 | /* 0xFF means there's no call here (yet). */ |
142 | if (st[n] == 0xFF) | 159 | if (st[n] == 0xFF) |
143 | break; | 160 | break; |
144 | 161 | ||
145 | /* OK, we have hypercall. Increment the "next_hcall" cursor, | 162 | /* |
146 | * and wrap back to 0 if we reach the end. */ | 163 | * OK, we have hypercall. Increment the "next_hcall" cursor, |
164 | * and wrap back to 0 if we reach the end. | ||
165 | */ | ||
147 | if (++cpu->next_hcall == LHCALL_RING_SIZE) | 166 | if (++cpu->next_hcall == LHCALL_RING_SIZE) |
148 | cpu->next_hcall = 0; | 167 | cpu->next_hcall = 0; |
149 | 168 | ||
150 | /* Copy the hypercall arguments into a local copy of | 169 | /* |
151 | * the hcall_args struct. */ | 170 | * Copy the hypercall arguments into a local copy of the |
171 | * hcall_args struct. | ||
172 | */ | ||
152 | if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], | 173 | if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], |
153 | sizeof(struct hcall_args))) { | 174 | sizeof(struct hcall_args))) { |
154 | kill_guest(cpu, "Fetching async hypercalls"); | 175 | kill_guest(cpu, "Fetching async hypercalls"); |
@@ -164,19 +185,25 @@ static void do_async_hcalls(struct lg_cpu *cpu) | |||
164 | break; | 185 | break; |
165 | } | 186 | } |
166 | 187 | ||
167 | /* Stop doing hypercalls if they want to notify the Launcher: | 188 | /* |
168 | * it needs to service this first. */ | 189 | * Stop doing hypercalls if they want to notify the Launcher: |
190 | * it needs to service this first. | ||
191 | */ | ||
169 | if (cpu->pending_notify) | 192 | if (cpu->pending_notify) |
170 | break; | 193 | break; |
171 | } | 194 | } |
172 | } | 195 | } |
173 | 196 | ||
174 | /* Last of all, we look at what happens first of all. The very first time the | 197 | /* |
175 | * Guest makes a hypercall, we end up here to set things up: */ | 198 | * Last of all, we look at what happens first of all. The very first time the |
199 | * Guest makes a hypercall, we end up here to set things up: | ||
200 | */ | ||
176 | static void initialize(struct lg_cpu *cpu) | 201 | static void initialize(struct lg_cpu *cpu) |
177 | { | 202 | { |
178 | /* You can't do anything until you're initialized. The Guest knows the | 203 | /* |
179 | * rules, so we're unforgiving here. */ | 204 | * You can't do anything until you're initialized. The Guest knows the |
205 | * rules, so we're unforgiving here. | ||
206 | */ | ||
180 | if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { | 207 | if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { |
181 | kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); | 208 | kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); |
182 | return; | 209 | return; |
@@ -185,32 +212,44 @@ static void initialize(struct lg_cpu *cpu) | |||
185 | if (lguest_arch_init_hypercalls(cpu)) | 212 | if (lguest_arch_init_hypercalls(cpu)) |
186 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); | 213 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
187 | 214 | ||
188 | /* The Guest tells us where we're not to deliver interrupts by putting | 215 | /* |
189 | * the range of addresses into "struct lguest_data". */ | 216 | * The Guest tells us where we're not to deliver interrupts by putting |
217 | * the range of addresses into "struct lguest_data". | ||
218 | */ | ||
190 | if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) | 219 | if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) |
191 | || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) | 220 | || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) |
192 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); | 221 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
193 | 222 | ||
194 | /* We write the current time into the Guest's data page once so it can | 223 | /* |
195 | * set its clock. */ | 224 | * We write the current time into the Guest's data page once so it can |
225 | * set its clock. | ||
226 | */ | ||
196 | write_timestamp(cpu); | 227 | write_timestamp(cpu); |
197 | 228 | ||
198 | /* page_tables.c will also do some setup. */ | 229 | /* page_tables.c will also do some setup. */ |
199 | page_table_guest_data_init(cpu); | 230 | page_table_guest_data_init(cpu); |
200 | 231 | ||
201 | /* This is the one case where the above accesses might have been the | 232 | /* |
233 | * This is the one case where the above accesses might have been the | ||
202 | * first write to a Guest page. This may have caused a copy-on-write | 234 | * first write to a Guest page. This may have caused a copy-on-write |
203 | * fault, but the old page might be (read-only) in the Guest | 235 | * fault, but the old page might be (read-only) in the Guest |
204 | * pagetable. */ | 236 | * pagetable. |
237 | */ | ||
205 | guest_pagetable_clear_all(cpu); | 238 | guest_pagetable_clear_all(cpu); |
206 | } | 239 | } |
207 | /*:*/ | 240 | /*:*/ |
208 | 241 | ||
209 | /*M:013 If a Guest reads from a page (so creates a mapping) that it has never | 242 | /*M:013 |
243 | * If a Guest reads from a page (so creates a mapping) that it has never | ||
210 | * written to, and then the Launcher writes to it (ie. the output of a virtual | 244 | * written to, and then the Launcher writes to it (ie. the output of a virtual |
211 | * device), the Guest will still see the old page. In practice, this never | 245 | * device), the Guest will still see the old page. In practice, this never |
212 | * happens: why would the Guest read a page which it has never written to? But | 246 | * happens: why would the Guest read a page which it has never written to? But |
213 | * a similar scenario might one day bite us, so it's worth mentioning. :*/ | 247 | * a similar scenario might one day bite us, so it's worth mentioning. |
248 | * | ||
249 | * Note that if we used a shared anonymous mapping in the Launcher instead of | ||
250 | * mapping /dev/zero private, we wouldn't worry about cop-on-write. And we | ||
251 | * need that to switch the Launcher to processes (away from threads) anyway. | ||
252 | :*/ | ||
214 | 253 | ||
215 | /*H:100 | 254 | /*H:100 |
216 | * Hypercalls | 255 | * Hypercalls |
@@ -229,17 +268,22 @@ void do_hypercalls(struct lg_cpu *cpu) | |||
229 | return; | 268 | return; |
230 | } | 269 | } |
231 | 270 | ||
232 | /* The Guest has initialized. | 271 | /* |
272 | * The Guest has initialized. | ||
233 | * | 273 | * |
234 | * Look in the hypercall ring for the async hypercalls: */ | 274 | * Look in the hypercall ring for the async hypercalls: |
275 | */ | ||
235 | do_async_hcalls(cpu); | 276 | do_async_hcalls(cpu); |
236 | 277 | ||
237 | /* If we stopped reading the hypercall ring because the Guest did a | 278 | /* |
279 | * If we stopped reading the hypercall ring because the Guest did a | ||
238 | * NOTIFY to the Launcher, we want to return now. Otherwise we do | 280 | * NOTIFY to the Launcher, we want to return now. Otherwise we do |
239 | * the hypercall. */ | 281 | * the hypercall. |
282 | */ | ||
240 | if (!cpu->pending_notify) { | 283 | if (!cpu->pending_notify) { |
241 | do_hcall(cpu, cpu->hcall); | 284 | do_hcall(cpu, cpu->hcall); |
242 | /* Tricky point: we reset the hcall pointer to mark the | 285 | /* |
286 | * Tricky point: we reset the hcall pointer to mark the | ||
243 | * hypercall as "done". We use the hcall pointer rather than | 287 | * hypercall as "done". We use the hcall pointer rather than |
244 | * the trap number to indicate a hypercall is pending. | 288 | * the trap number to indicate a hypercall is pending. |
245 | * Normally it doesn't matter: the Guest will run again and | 289 | * Normally it doesn't matter: the Guest will run again and |
@@ -248,13 +292,16 @@ void do_hypercalls(struct lg_cpu *cpu) | |||
248 | * However, if we are signalled or the Guest sends I/O to the | 292 | * However, if we are signalled or the Guest sends I/O to the |
249 | * Launcher, the run_guest() loop will exit without running the | 293 | * Launcher, the run_guest() loop will exit without running the |
250 | * Guest. When it comes back it would try to re-run the | 294 | * Guest. When it comes back it would try to re-run the |
251 | * hypercall. Finding that bug sucked. */ | 295 | * hypercall. Finding that bug sucked. |
296 | */ | ||
252 | cpu->hcall = NULL; | 297 | cpu->hcall = NULL; |
253 | } | 298 | } |
254 | } | 299 | } |
255 | 300 | ||
256 | /* This routine supplies the Guest with time: it's used for wallclock time at | 301 | /* |
257 | * initial boot and as a rough time source if the TSC isn't available. */ | 302 | * This routine supplies the Guest with time: it's used for wallclock time at |
303 | * initial boot and as a rough time source if the TSC isn't available. | ||
304 | */ | ||
258 | void write_timestamp(struct lg_cpu *cpu) | 305 | void write_timestamp(struct lg_cpu *cpu) |
259 | { | 306 | { |
260 | struct timespec now; | 307 | struct timespec now; |
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 0e9067b0d507..18648180db02 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c | |||
@@ -1,4 +1,5 @@ | |||
1 | /*P:800 Interrupts (traps) are complicated enough to earn their own file. | 1 | /*P:800 |
2 | * Interrupts (traps) are complicated enough to earn their own file. | ||
2 | * There are three classes of interrupts: | 3 | * There are three classes of interrupts: |
3 | * | 4 | * |
4 | * 1) Real hardware interrupts which occur while we're running the Guest, | 5 | * 1) Real hardware interrupts which occur while we're running the Guest, |
@@ -10,7 +11,8 @@ | |||
10 | * just like real hardware would deliver them. Traps from the Guest can be set | 11 | * just like real hardware would deliver them. Traps from the Guest can be set |
11 | * up to go directly back into the Guest, but sometimes the Host wants to see | 12 | * up to go directly back into the Guest, but sometimes the Host wants to see |
12 | * them first, so we also have a way of "reflecting" them into the Guest as if | 13 | * them first, so we also have a way of "reflecting" them into the Guest as if |
13 | * they had been delivered to it directly. :*/ | 14 | * they had been delivered to it directly. |
15 | :*/ | ||
14 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
15 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
16 | #include <linux/module.h> | 18 | #include <linux/module.h> |
@@ -26,8 +28,10 @@ static unsigned long idt_address(u32 lo, u32 hi) | |||
26 | return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); | 28 | return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); |
27 | } | 29 | } |
28 | 30 | ||
29 | /* The "type" of the interrupt handler is a 4 bit field: we only support a | 31 | /* |
30 | * couple of types. */ | 32 | * The "type" of the interrupt handler is a 4 bit field: we only support a |
33 | * couple of types. | ||
34 | */ | ||
31 | static int idt_type(u32 lo, u32 hi) | 35 | static int idt_type(u32 lo, u32 hi) |
32 | { | 36 | { |
33 | return (hi >> 8) & 0xF; | 37 | return (hi >> 8) & 0xF; |
@@ -39,8 +43,10 @@ static bool idt_present(u32 lo, u32 hi) | |||
39 | return (hi & 0x8000); | 43 | return (hi & 0x8000); |
40 | } | 44 | } |
41 | 45 | ||
42 | /* We need a helper to "push" a value onto the Guest's stack, since that's a | 46 | /* |
43 | * big part of what delivering an interrupt does. */ | 47 | * We need a helper to "push" a value onto the Guest's stack, since that's a |
48 | * big part of what delivering an interrupt does. | ||
49 | */ | ||
44 | static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) | 50 | static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) |
45 | { | 51 | { |
46 | /* Stack grows upwards: move stack then write value. */ | 52 | /* Stack grows upwards: move stack then write value. */ |
@@ -48,7 +54,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) | |||
48 | lgwrite(cpu, *gstack, u32, val); | 54 | lgwrite(cpu, *gstack, u32, val); |
49 | } | 55 | } |
50 | 56 | ||
51 | /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or | 57 | /*H:210 |
58 | * The set_guest_interrupt() routine actually delivers the interrupt or | ||
52 | * trap. The mechanics of delivering traps and interrupts to the Guest are the | 59 | * trap. The mechanics of delivering traps and interrupts to the Guest are the |
53 | * same, except some traps have an "error code" which gets pushed onto the | 60 | * same, except some traps have an "error code" which gets pushed onto the |
54 | * stack as well: the caller tells us if this is one. | 61 | * stack as well: the caller tells us if this is one. |
@@ -59,7 +66,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) | |||
59 | * | 66 | * |
60 | * We set up the stack just like the CPU does for a real interrupt, so it's | 67 | * We set up the stack just like the CPU does for a real interrupt, so it's |
61 | * identical for the Guest (and the standard "iret" instruction will undo | 68 | * identical for the Guest (and the standard "iret" instruction will undo |
62 | * it). */ | 69 | * it). |
70 | */ | ||
63 | static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | 71 | static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, |
64 | bool has_err) | 72 | bool has_err) |
65 | { | 73 | { |
@@ -67,20 +75,26 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
67 | u32 eflags, ss, irq_enable; | 75 | u32 eflags, ss, irq_enable; |
68 | unsigned long virtstack; | 76 | unsigned long virtstack; |
69 | 77 | ||
70 | /* There are two cases for interrupts: one where the Guest is already | 78 | /* |
79 | * There are two cases for interrupts: one where the Guest is already | ||
71 | * in the kernel, and a more complex one where the Guest is in | 80 | * in the kernel, and a more complex one where the Guest is in |
72 | * userspace. We check the privilege level to find out. */ | 81 | * userspace. We check the privilege level to find out. |
82 | */ | ||
73 | if ((cpu->regs->ss&0x3) != GUEST_PL) { | 83 | if ((cpu->regs->ss&0x3) != GUEST_PL) { |
74 | /* The Guest told us their kernel stack with the SET_STACK | 84 | /* |
75 | * hypercall: both the virtual address and the segment */ | 85 | * The Guest told us their kernel stack with the SET_STACK |
86 | * hypercall: both the virtual address and the segment. | ||
87 | */ | ||
76 | virtstack = cpu->esp1; | 88 | virtstack = cpu->esp1; |
77 | ss = cpu->ss1; | 89 | ss = cpu->ss1; |
78 | 90 | ||
79 | origstack = gstack = guest_pa(cpu, virtstack); | 91 | origstack = gstack = guest_pa(cpu, virtstack); |
80 | /* We push the old stack segment and pointer onto the new | 92 | /* |
93 | * We push the old stack segment and pointer onto the new | ||
81 | * stack: when the Guest does an "iret" back from the interrupt | 94 | * stack: when the Guest does an "iret" back from the interrupt |
82 | * handler the CPU will notice they're dropping privilege | 95 | * handler the CPU will notice they're dropping privilege |
83 | * levels and expect these here. */ | 96 | * levels and expect these here. |
97 | */ | ||
84 | push_guest_stack(cpu, &gstack, cpu->regs->ss); | 98 | push_guest_stack(cpu, &gstack, cpu->regs->ss); |
85 | push_guest_stack(cpu, &gstack, cpu->regs->esp); | 99 | push_guest_stack(cpu, &gstack, cpu->regs->esp); |
86 | } else { | 100 | } else { |
@@ -91,18 +105,22 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
91 | origstack = gstack = guest_pa(cpu, virtstack); | 105 | origstack = gstack = guest_pa(cpu, virtstack); |
92 | } | 106 | } |
93 | 107 | ||
94 | /* Remember that we never let the Guest actually disable interrupts, so | 108 | /* |
109 | * Remember that we never let the Guest actually disable interrupts, so | ||
95 | * the "Interrupt Flag" bit is always set. We copy that bit from the | 110 | * the "Interrupt Flag" bit is always set. We copy that bit from the |
96 | * Guest's "irq_enabled" field into the eflags word: we saw the Guest | 111 | * Guest's "irq_enabled" field into the eflags word: we saw the Guest |
97 | * copy it back in "lguest_iret". */ | 112 | * copy it back in "lguest_iret". |
113 | */ | ||
98 | eflags = cpu->regs->eflags; | 114 | eflags = cpu->regs->eflags; |
99 | if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 | 115 | if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 |
100 | && !(irq_enable & X86_EFLAGS_IF)) | 116 | && !(irq_enable & X86_EFLAGS_IF)) |
101 | eflags &= ~X86_EFLAGS_IF; | 117 | eflags &= ~X86_EFLAGS_IF; |
102 | 118 | ||
103 | /* An interrupt is expected to push three things on the stack: the old | 119 | /* |
120 | * An interrupt is expected to push three things on the stack: the old | ||
104 | * "eflags" word, the old code segment, and the old instruction | 121 | * "eflags" word, the old code segment, and the old instruction |
105 | * pointer. */ | 122 | * pointer. |
123 | */ | ||
106 | push_guest_stack(cpu, &gstack, eflags); | 124 | push_guest_stack(cpu, &gstack, eflags); |
107 | push_guest_stack(cpu, &gstack, cpu->regs->cs); | 125 | push_guest_stack(cpu, &gstack, cpu->regs->cs); |
108 | push_guest_stack(cpu, &gstack, cpu->regs->eip); | 126 | push_guest_stack(cpu, &gstack, cpu->regs->eip); |
@@ -111,15 +129,19 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
111 | if (has_err) | 129 | if (has_err) |
112 | push_guest_stack(cpu, &gstack, cpu->regs->errcode); | 130 | push_guest_stack(cpu, &gstack, cpu->regs->errcode); |
113 | 131 | ||
114 | /* Now we've pushed all the old state, we change the stack, the code | 132 | /* |
115 | * segment and the address to execute. */ | 133 | * Now we've pushed all the old state, we change the stack, the code |
134 | * segment and the address to execute. | ||
135 | */ | ||
116 | cpu->regs->ss = ss; | 136 | cpu->regs->ss = ss; |
117 | cpu->regs->esp = virtstack + (gstack - origstack); | 137 | cpu->regs->esp = virtstack + (gstack - origstack); |
118 | cpu->regs->cs = (__KERNEL_CS|GUEST_PL); | 138 | cpu->regs->cs = (__KERNEL_CS|GUEST_PL); |
119 | cpu->regs->eip = idt_address(lo, hi); | 139 | cpu->regs->eip = idt_address(lo, hi); |
120 | 140 | ||
121 | /* There are two kinds of interrupt handlers: 0xE is an "interrupt | 141 | /* |
122 | * gate" which expects interrupts to be disabled on entry. */ | 142 | * There are two kinds of interrupt handlers: 0xE is an "interrupt |
143 | * gate" which expects interrupts to be disabled on entry. | ||
144 | */ | ||
123 | if (idt_type(lo, hi) == 0xE) | 145 | if (idt_type(lo, hi) == 0xE) |
124 | if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) | 146 | if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) |
125 | kill_guest(cpu, "Disabling interrupts"); | 147 | kill_guest(cpu, "Disabling interrupts"); |
@@ -130,7 +152,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, | |||
130 | * | 152 | * |
131 | * interrupt_pending() returns the first pending interrupt which isn't blocked | 153 | * interrupt_pending() returns the first pending interrupt which isn't blocked |
132 | * by the Guest. It is called before every entry to the Guest, and just before | 154 | * by the Guest. It is called before every entry to the Guest, and just before |
133 | * we go to sleep when the Guest has halted itself. */ | 155 | * we go to sleep when the Guest has halted itself. |
156 | */ | ||
134 | unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) | 157 | unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) |
135 | { | 158 | { |
136 | unsigned int irq; | 159 | unsigned int irq; |
@@ -140,8 +163,10 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) | |||
140 | if (!cpu->lg->lguest_data) | 163 | if (!cpu->lg->lguest_data) |
141 | return LGUEST_IRQS; | 164 | return LGUEST_IRQS; |
142 | 165 | ||
143 | /* Take our "irqs_pending" array and remove any interrupts the Guest | 166 | /* |
144 | * wants blocked: the result ends up in "blk". */ | 167 | * Take our "irqs_pending" array and remove any interrupts the Guest |
168 | * wants blocked: the result ends up in "blk". | ||
169 | */ | ||
145 | if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, | 170 | if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, |
146 | sizeof(blk))) | 171 | sizeof(blk))) |
147 | return LGUEST_IRQS; | 172 | return LGUEST_IRQS; |
@@ -154,16 +179,20 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) | |||
154 | return irq; | 179 | return irq; |
155 | } | 180 | } |
156 | 181 | ||
157 | /* This actually diverts the Guest to running an interrupt handler, once an | 182 | /* |
158 | * interrupt has been identified by interrupt_pending(). */ | 183 | * This actually diverts the Guest to running an interrupt handler, once an |
184 | * interrupt has been identified by interrupt_pending(). | ||
185 | */ | ||
159 | void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) | 186 | void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) |
160 | { | 187 | { |
161 | struct desc_struct *idt; | 188 | struct desc_struct *idt; |
162 | 189 | ||
163 | BUG_ON(irq >= LGUEST_IRQS); | 190 | BUG_ON(irq >= LGUEST_IRQS); |
164 | 191 | ||
165 | /* They may be in the middle of an iret, where they asked us never to | 192 | /* |
166 | * deliver interrupts. */ | 193 | * They may be in the middle of an iret, where they asked us never to |
194 | * deliver interrupts. | ||
195 | */ | ||
167 | if (cpu->regs->eip >= cpu->lg->noirq_start && | 196 | if (cpu->regs->eip >= cpu->lg->noirq_start && |
168 | (cpu->regs->eip < cpu->lg->noirq_end)) | 197 | (cpu->regs->eip < cpu->lg->noirq_end)) |
169 | return; | 198 | return; |
@@ -187,29 +216,37 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) | |||
187 | } | 216 | } |
188 | } | 217 | } |
189 | 218 | ||
190 | /* Look at the IDT entry the Guest gave us for this interrupt. The | 219 | /* |
220 | * Look at the IDT entry the Guest gave us for this interrupt. The | ||
191 | * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip | 221 | * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip |
192 | * over them. */ | 222 | * over them. |
223 | */ | ||
193 | idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; | 224 | idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; |
194 | /* If they don't have a handler (yet?), we just ignore it */ | 225 | /* If they don't have a handler (yet?), we just ignore it */ |
195 | if (idt_present(idt->a, idt->b)) { | 226 | if (idt_present(idt->a, idt->b)) { |
196 | /* OK, mark it no longer pending and deliver it. */ | 227 | /* OK, mark it no longer pending and deliver it. */ |
197 | clear_bit(irq, cpu->irqs_pending); | 228 | clear_bit(irq, cpu->irqs_pending); |
198 | /* set_guest_interrupt() takes the interrupt descriptor and a | 229 | /* |
230 | * set_guest_interrupt() takes the interrupt descriptor and a | ||
199 | * flag to say whether this interrupt pushes an error code onto | 231 | * flag to say whether this interrupt pushes an error code onto |
200 | * the stack as well: virtual interrupts never do. */ | 232 | * the stack as well: virtual interrupts never do. |
233 | */ | ||
201 | set_guest_interrupt(cpu, idt->a, idt->b, false); | 234 | set_guest_interrupt(cpu, idt->a, idt->b, false); |
202 | } | 235 | } |
203 | 236 | ||
204 | /* Every time we deliver an interrupt, we update the timestamp in the | 237 | /* |
238 | * Every time we deliver an interrupt, we update the timestamp in the | ||
205 | * Guest's lguest_data struct. It would be better for the Guest if we | 239 | * Guest's lguest_data struct. It would be better for the Guest if we |
206 | * did this more often, but it can actually be quite slow: doing it | 240 | * did this more often, but it can actually be quite slow: doing it |
207 | * here is a compromise which means at least it gets updated every | 241 | * here is a compromise which means at least it gets updated every |
208 | * timer interrupt. */ | 242 | * timer interrupt. |
243 | */ | ||
209 | write_timestamp(cpu); | 244 | write_timestamp(cpu); |
210 | 245 | ||
211 | /* If there are no other interrupts we want to deliver, clear | 246 | /* |
212 | * the pending flag. */ | 247 | * If there are no other interrupts we want to deliver, clear |
248 | * the pending flag. | ||
249 | */ | ||
213 | if (!more) | 250 | if (!more) |
214 | put_user(0, &cpu->lg->lguest_data->irq_pending); | 251 | put_user(0, &cpu->lg->lguest_data->irq_pending); |
215 | } | 252 | } |
@@ -217,24 +254,29 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) | |||
217 | /* And this is the routine when we want to set an interrupt for the Guest. */ | 254 | /* And this is the routine when we want to set an interrupt for the Guest. */ |
218 | void set_interrupt(struct lg_cpu *cpu, unsigned int irq) | 255 | void set_interrupt(struct lg_cpu *cpu, unsigned int irq) |
219 | { | 256 | { |
220 | /* Next time the Guest runs, the core code will see if it can deliver | 257 | /* |
221 | * this interrupt. */ | 258 | * Next time the Guest runs, the core code will see if it can deliver |
259 | * this interrupt. | ||
260 | */ | ||
222 | set_bit(irq, cpu->irqs_pending); | 261 | set_bit(irq, cpu->irqs_pending); |
223 | 262 | ||
224 | /* Make sure it sees it; it might be asleep (eg. halted), or | 263 | /* |
225 | * running the Guest right now, in which case kick_process() | 264 | * Make sure it sees it; it might be asleep (eg. halted), or running |
226 | * will knock it out. */ | 265 | * the Guest right now, in which case kick_process() will knock it out. |
266 | */ | ||
227 | if (!wake_up_process(cpu->tsk)) | 267 | if (!wake_up_process(cpu->tsk)) |
228 | kick_process(cpu->tsk); | 268 | kick_process(cpu->tsk); |
229 | } | 269 | } |
230 | /*:*/ | 270 | /*:*/ |
231 | 271 | ||
232 | /* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent | 272 | /* |
273 | * Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent | ||
233 | * me a patch, so we support that too. It'd be a big step for lguest if half | 274 | * me a patch, so we support that too. It'd be a big step for lguest if half |
234 | * the Plan 9 user base were to start using it. | 275 | * the Plan 9 user base were to start using it. |
235 | * | 276 | * |
236 | * Actually now I think of it, it's possible that Ron *is* half the Plan 9 | 277 | * Actually now I think of it, it's possible that Ron *is* half the Plan 9 |
237 | * userbase. Oh well. */ | 278 | * userbase. Oh well. |
279 | */ | ||
238 | static bool could_be_syscall(unsigned int num) | 280 | static bool could_be_syscall(unsigned int num) |
239 | { | 281 | { |
240 | /* Normal Linux SYSCALL_VECTOR or reserved vector? */ | 282 | /* Normal Linux SYSCALL_VECTOR or reserved vector? */ |
@@ -274,9 +316,11 @@ void free_interrupts(void) | |||
274 | clear_bit(syscall_vector, used_vectors); | 316 | clear_bit(syscall_vector, used_vectors); |
275 | } | 317 | } |
276 | 318 | ||
277 | /*H:220 Now we've got the routines to deliver interrupts, delivering traps like | 319 | /*H:220 |
320 | * Now we've got the routines to deliver interrupts, delivering traps like | ||
278 | * page fault is easy. The only trick is that Intel decided that some traps | 321 | * page fault is easy. The only trick is that Intel decided that some traps |
279 | * should have error codes: */ | 322 | * should have error codes: |
323 | */ | ||
280 | static bool has_err(unsigned int trap) | 324 | static bool has_err(unsigned int trap) |
281 | { | 325 | { |
282 | return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); | 326 | return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); |
@@ -285,13 +329,17 @@ static bool has_err(unsigned int trap) | |||
285 | /* deliver_trap() returns true if it could deliver the trap. */ | 329 | /* deliver_trap() returns true if it could deliver the trap. */ |
286 | bool deliver_trap(struct lg_cpu *cpu, unsigned int num) | 330 | bool deliver_trap(struct lg_cpu *cpu, unsigned int num) |
287 | { | 331 | { |
288 | /* Trap numbers are always 8 bit, but we set an impossible trap number | 332 | /* |
289 | * for traps inside the Switcher, so check that here. */ | 333 | * Trap numbers are always 8 bit, but we set an impossible trap number |
334 | * for traps inside the Switcher, so check that here. | ||
335 | */ | ||
290 | if (num >= ARRAY_SIZE(cpu->arch.idt)) | 336 | if (num >= ARRAY_SIZE(cpu->arch.idt)) |
291 | return false; | 337 | return false; |
292 | 338 | ||
293 | /* Early on the Guest hasn't set the IDT entries (or maybe it put a | 339 | /* |
294 | * bogus one in): if we fail here, the Guest will be killed. */ | 340 | * Early on the Guest hasn't set the IDT entries (or maybe it put a |
341 | * bogus one in): if we fail here, the Guest will be killed. | ||
342 | */ | ||
295 | if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) | 343 | if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) |
296 | return false; | 344 | return false; |
297 | set_guest_interrupt(cpu, cpu->arch.idt[num].a, | 345 | set_guest_interrupt(cpu, cpu->arch.idt[num].a, |
@@ -299,7 +347,8 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) | |||
299 | return true; | 347 | return true; |
300 | } | 348 | } |
301 | 349 | ||
302 | /*H:250 Here's the hard part: returning to the Host every time a trap happens | 350 | /*H:250 |
351 | * Here's the hard part: returning to the Host every time a trap happens | ||
303 | * and then calling deliver_trap() and re-entering the Guest is slow. | 352 | * and then calling deliver_trap() and re-entering the Guest is slow. |
304 | * Particularly because Guest userspace system calls are traps (usually trap | 353 | * Particularly because Guest userspace system calls are traps (usually trap |
305 | * 128). | 354 | * 128). |
@@ -311,69 +360,87 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) | |||
311 | * the other hypervisors would beat it up at lunchtime. | 360 | * the other hypervisors would beat it up at lunchtime. |
312 | * | 361 | * |
313 | * This routine indicates if a particular trap number could be delivered | 362 | * This routine indicates if a particular trap number could be delivered |
314 | * directly. */ | 363 | * directly. |
364 | */ | ||
315 | static bool direct_trap(unsigned int num) | 365 | static bool direct_trap(unsigned int num) |
316 | { | 366 | { |
317 | /* Hardware interrupts don't go to the Guest at all (except system | 367 | /* |
318 | * call). */ | 368 | * Hardware interrupts don't go to the Guest at all (except system |
369 | * call). | ||
370 | */ | ||
319 | if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num)) | 371 | if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num)) |
320 | return false; | 372 | return false; |
321 | 373 | ||
322 | /* The Host needs to see page faults (for shadow paging and to save the | 374 | /* |
375 | * The Host needs to see page faults (for shadow paging and to save the | ||
323 | * fault address), general protection faults (in/out emulation) and | 376 | * fault address), general protection faults (in/out emulation) and |
324 | * device not available (TS handling), invalid opcode fault (kvm hcall), | 377 | * device not available (TS handling), invalid opcode fault (kvm hcall), |
325 | * and of course, the hypercall trap. */ | 378 | * and of course, the hypercall trap. |
379 | */ | ||
326 | return num != 14 && num != 13 && num != 7 && | 380 | return num != 14 && num != 13 && num != 7 && |
327 | num != 6 && num != LGUEST_TRAP_ENTRY; | 381 | num != 6 && num != LGUEST_TRAP_ENTRY; |
328 | } | 382 | } |
329 | /*:*/ | 383 | /*:*/ |
330 | 384 | ||
331 | /*M:005 The Guest has the ability to turn its interrupt gates into trap gates, | 385 | /*M:005 |
386 | * The Guest has the ability to turn its interrupt gates into trap gates, | ||
332 | * if it is careful. The Host will let trap gates can go directly to the | 387 | * if it is careful. The Host will let trap gates can go directly to the |
333 | * Guest, but the Guest needs the interrupts atomically disabled for an | 388 | * Guest, but the Guest needs the interrupts atomically disabled for an |
334 | * interrupt gate. It can do this by pointing the trap gate at instructions | 389 | * interrupt gate. It can do this by pointing the trap gate at instructions |
335 | * within noirq_start and noirq_end, where it can safely disable interrupts. */ | 390 | * within noirq_start and noirq_end, where it can safely disable interrupts. |
391 | */ | ||
336 | 392 | ||
337 | /*M:006 The Guests do not use the sysenter (fast system call) instruction, | 393 | /*M:006 |
394 | * The Guests do not use the sysenter (fast system call) instruction, | ||
338 | * because it's hardcoded to enter privilege level 0 and so can't go direct. | 395 | * because it's hardcoded to enter privilege level 0 and so can't go direct. |
339 | * It's about twice as fast as the older "int 0x80" system call, so it might | 396 | * It's about twice as fast as the older "int 0x80" system call, so it might |
340 | * still be worthwhile to handle it in the Switcher and lcall down to the | 397 | * still be worthwhile to handle it in the Switcher and lcall down to the |
341 | * Guest. The sysenter semantics are hairy tho: search for that keyword in | 398 | * Guest. The sysenter semantics are hairy tho: search for that keyword in |
342 | * entry.S :*/ | 399 | * entry.S |
400 | :*/ | ||
343 | 401 | ||
344 | /*H:260 When we make traps go directly into the Guest, we need to make sure | 402 | /*H:260 |
403 | * When we make traps go directly into the Guest, we need to make sure | ||
345 | * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the | 404 | * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the |
346 | * CPU trying to deliver the trap will fault while trying to push the interrupt | 405 | * CPU trying to deliver the trap will fault while trying to push the interrupt |
347 | * words on the stack: this is called a double fault, and it forces us to kill | 406 | * words on the stack: this is called a double fault, and it forces us to kill |
348 | * the Guest. | 407 | * the Guest. |
349 | * | 408 | * |
350 | * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ | 409 | * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. |
410 | */ | ||
351 | void pin_stack_pages(struct lg_cpu *cpu) | 411 | void pin_stack_pages(struct lg_cpu *cpu) |
352 | { | 412 | { |
353 | unsigned int i; | 413 | unsigned int i; |
354 | 414 | ||
355 | /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or | 415 | /* |
356 | * two pages of stack space. */ | 416 | * Depending on the CONFIG_4KSTACKS option, the Guest can have one or |
417 | * two pages of stack space. | ||
418 | */ | ||
357 | for (i = 0; i < cpu->lg->stack_pages; i++) | 419 | for (i = 0; i < cpu->lg->stack_pages; i++) |
358 | /* The stack grows *upwards*, so the address we're given is the | 420 | /* |
421 | * The stack grows *upwards*, so the address we're given is the | ||
359 | * start of the page after the kernel stack. Subtract one to | 422 | * start of the page after the kernel stack. Subtract one to |
360 | * get back onto the first stack page, and keep subtracting to | 423 | * get back onto the first stack page, and keep subtracting to |
361 | * get to the rest of the stack pages. */ | 424 | * get to the rest of the stack pages. |
425 | */ | ||
362 | pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); | 426 | pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); |
363 | } | 427 | } |
364 | 428 | ||
365 | /* Direct traps also mean that we need to know whenever the Guest wants to use | 429 | /* |
430 | * Direct traps also mean that we need to know whenever the Guest wants to use | ||
366 | * a different kernel stack, so we can change the IDT entries to use that | 431 | * a different kernel stack, so we can change the IDT entries to use that |
367 | * stack. The IDT entries expect a virtual address, so unlike most addresses | 432 | * stack. The IDT entries expect a virtual address, so unlike most addresses |
368 | * the Guest gives us, the "esp" (stack pointer) value here is virtual, not | 433 | * the Guest gives us, the "esp" (stack pointer) value here is virtual, not |
369 | * physical. | 434 | * physical. |
370 | * | 435 | * |
371 | * In Linux each process has its own kernel stack, so this happens a lot: we | 436 | * In Linux each process has its own kernel stack, so this happens a lot: we |
372 | * change stacks on each context switch. */ | 437 | * change stacks on each context switch. |
438 | */ | ||
373 | void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) | 439 | void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) |
374 | { | 440 | { |
375 | /* You are not allowed have a stack segment with privilege level 0: bad | 441 | /* |
376 | * Guest! */ | 442 | * You're not allowed a stack segment with privilege level 0: bad Guest! |
443 | */ | ||
377 | if ((seg & 0x3) != GUEST_PL) | 444 | if ((seg & 0x3) != GUEST_PL) |
378 | kill_guest(cpu, "bad stack segment %i", seg); | 445 | kill_guest(cpu, "bad stack segment %i", seg); |
379 | /* We only expect one or two stack pages. */ | 446 | /* We only expect one or two stack pages. */ |
@@ -387,11 +454,15 @@ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) | |||
387 | pin_stack_pages(cpu); | 454 | pin_stack_pages(cpu); |
388 | } | 455 | } |
389 | 456 | ||
390 | /* All this reference to mapping stacks leads us neatly into the other complex | 457 | /* |
391 | * part of the Host: page table handling. */ | 458 | * All this reference to mapping stacks leads us neatly into the other complex |
459 | * part of the Host: page table handling. | ||
460 | */ | ||
392 | 461 | ||
393 | /*H:235 This is the routine which actually checks the Guest's IDT entry and | 462 | /*H:235 |
394 | * transfers it into the entry in "struct lguest": */ | 463 | * This is the routine which actually checks the Guest's IDT entry and |
464 | * transfers it into the entry in "struct lguest": | ||
465 | */ | ||
395 | static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, | 466 | static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, |
396 | unsigned int num, u32 lo, u32 hi) | 467 | unsigned int num, u32 lo, u32 hi) |
397 | { | 468 | { |
@@ -407,30 +478,38 @@ static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, | |||
407 | if (type != 0xE && type != 0xF) | 478 | if (type != 0xE && type != 0xF) |
408 | kill_guest(cpu, "bad IDT type %i", type); | 479 | kill_guest(cpu, "bad IDT type %i", type); |
409 | 480 | ||
410 | /* We only copy the handler address, present bit, privilege level and | 481 | /* |
482 | * We only copy the handler address, present bit, privilege level and | ||
411 | * type. The privilege level controls where the trap can be triggered | 483 | * type. The privilege level controls where the trap can be triggered |
412 | * manually with an "int" instruction. This is usually GUEST_PL, | 484 | * manually with an "int" instruction. This is usually GUEST_PL, |
413 | * except for system calls which userspace can use. */ | 485 | * except for system calls which userspace can use. |
486 | */ | ||
414 | trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); | 487 | trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); |
415 | trap->b = (hi&0xFFFFEF00); | 488 | trap->b = (hi&0xFFFFEF00); |
416 | } | 489 | } |
417 | 490 | ||
418 | /*H:230 While we're here, dealing with delivering traps and interrupts to the | 491 | /*H:230 |
492 | * While we're here, dealing with delivering traps and interrupts to the | ||
419 | * Guest, we might as well complete the picture: how the Guest tells us where | 493 | * Guest, we might as well complete the picture: how the Guest tells us where |
420 | * it wants them to go. This would be simple, except making traps fast | 494 | * it wants them to go. This would be simple, except making traps fast |
421 | * requires some tricks. | 495 | * requires some tricks. |
422 | * | 496 | * |
423 | * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the | 497 | * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the |
424 | * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ | 498 | * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. |
499 | */ | ||
425 | void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) | 500 | void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) |
426 | { | 501 | { |
427 | /* Guest never handles: NMI, doublefault, spurious interrupt or | 502 | /* |
428 | * hypercall. We ignore when it tries to set them. */ | 503 | * Guest never handles: NMI, doublefault, spurious interrupt or |
504 | * hypercall. We ignore when it tries to set them. | ||
505 | */ | ||
429 | if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) | 506 | if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) |
430 | return; | 507 | return; |
431 | 508 | ||
432 | /* Mark the IDT as changed: next time the Guest runs we'll know we have | 509 | /* |
433 | * to copy this again. */ | 510 | * Mark the IDT as changed: next time the Guest runs we'll know we have |
511 | * to copy this again. | ||
512 | */ | ||
434 | cpu->changed |= CHANGED_IDT; | 513 | cpu->changed |= CHANGED_IDT; |
435 | 514 | ||
436 | /* Check that the Guest doesn't try to step outside the bounds. */ | 515 | /* Check that the Guest doesn't try to step outside the bounds. */ |
@@ -440,9 +519,11 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) | |||
440 | set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); | 519 | set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); |
441 | } | 520 | } |
442 | 521 | ||
443 | /* The default entry for each interrupt points into the Switcher routines which | 522 | /* |
523 | * The default entry for each interrupt points into the Switcher routines which | ||
444 | * simply return to the Host. The run_guest() loop will then call | 524 | * simply return to the Host. The run_guest() loop will then call |
445 | * deliver_trap() to bounce it back into the Guest. */ | 525 | * deliver_trap() to bounce it back into the Guest. |
526 | */ | ||
446 | static void default_idt_entry(struct desc_struct *idt, | 527 | static void default_idt_entry(struct desc_struct *idt, |
447 | int trap, | 528 | int trap, |
448 | const unsigned long handler, | 529 | const unsigned long handler, |
@@ -451,13 +532,17 @@ static void default_idt_entry(struct desc_struct *idt, | |||
451 | /* A present interrupt gate. */ | 532 | /* A present interrupt gate. */ |
452 | u32 flags = 0x8e00; | 533 | u32 flags = 0x8e00; |
453 | 534 | ||
454 | /* Set the privilege level on the entry for the hypercall: this allows | 535 | /* |
455 | * the Guest to use the "int" instruction to trigger it. */ | 536 | * Set the privilege level on the entry for the hypercall: this allows |
537 | * the Guest to use the "int" instruction to trigger it. | ||
538 | */ | ||
456 | if (trap == LGUEST_TRAP_ENTRY) | 539 | if (trap == LGUEST_TRAP_ENTRY) |
457 | flags |= (GUEST_PL << 13); | 540 | flags |= (GUEST_PL << 13); |
458 | else if (base) | 541 | else if (base) |
459 | /* Copy priv. level from what Guest asked for. This allows | 542 | /* |
460 | * debug (int 3) traps from Guest userspace, for example. */ | 543 | * Copy privilege level from what Guest asked for. This allows |
544 | * debug (int 3) traps from Guest userspace, for example. | ||
545 | */ | ||
461 | flags |= (base->b & 0x6000); | 546 | flags |= (base->b & 0x6000); |
462 | 547 | ||
463 | /* Now pack it into the IDT entry in its weird format. */ | 548 | /* Now pack it into the IDT entry in its weird format. */ |
@@ -475,16 +560,20 @@ void setup_default_idt_entries(struct lguest_ro_state *state, | |||
475 | default_idt_entry(&state->guest_idt[i], i, def[i], NULL); | 560 | default_idt_entry(&state->guest_idt[i], i, def[i], NULL); |
476 | } | 561 | } |
477 | 562 | ||
478 | /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead | 563 | /*H:240 |
564 | * We don't use the IDT entries in the "struct lguest" directly, instead | ||
479 | * we copy them into the IDT which we've set up for Guests on this CPU, just | 565 | * we copy them into the IDT which we've set up for Guests on this CPU, just |
480 | * before we run the Guest. This routine does that copy. */ | 566 | * before we run the Guest. This routine does that copy. |
567 | */ | ||
481 | void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | 568 | void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, |
482 | const unsigned long *def) | 569 | const unsigned long *def) |
483 | { | 570 | { |
484 | unsigned int i; | 571 | unsigned int i; |
485 | 572 | ||
486 | /* We can simply copy the direct traps, otherwise we use the default | 573 | /* |
487 | * ones in the Switcher: they will return to the Host. */ | 574 | * We can simply copy the direct traps, otherwise we use the default |
575 | * ones in the Switcher: they will return to the Host. | ||
576 | */ | ||
488 | for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { | 577 | for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { |
489 | const struct desc_struct *gidt = &cpu->arch.idt[i]; | 578 | const struct desc_struct *gidt = &cpu->arch.idt[i]; |
490 | 579 | ||
@@ -492,14 +581,16 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | |||
492 | if (!direct_trap(i)) | 581 | if (!direct_trap(i)) |
493 | continue; | 582 | continue; |
494 | 583 | ||
495 | /* Only trap gates (type 15) can go direct to the Guest. | 584 | /* |
585 | * Only trap gates (type 15) can go direct to the Guest. | ||
496 | * Interrupt gates (type 14) disable interrupts as they are | 586 | * Interrupt gates (type 14) disable interrupts as they are |
497 | * entered, which we never let the Guest do. Not present | 587 | * entered, which we never let the Guest do. Not present |
498 | * entries (type 0x0) also can't go direct, of course. | 588 | * entries (type 0x0) also can't go direct, of course. |
499 | * | 589 | * |
500 | * If it can't go direct, we still need to copy the priv. level: | 590 | * If it can't go direct, we still need to copy the priv. level: |
501 | * they might want to give userspace access to a software | 591 | * they might want to give userspace access to a software |
502 | * interrupt. */ | 592 | * interrupt. |
593 | */ | ||
503 | if (idt_type(gidt->a, gidt->b) == 0xF) | 594 | if (idt_type(gidt->a, gidt->b) == 0xF) |
504 | idt[i] = *gidt; | 595 | idt[i] = *gidt; |
505 | else | 596 | else |
@@ -518,7 +609,8 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, | |||
518 | * the next timer interrupt (in nanoseconds). We use the high-resolution timer | 609 | * the next timer interrupt (in nanoseconds). We use the high-resolution timer |
519 | * infrastructure to set a callback at that time. | 610 | * infrastructure to set a callback at that time. |
520 | * | 611 | * |
521 | * 0 means "turn off the clock". */ | 612 | * 0 means "turn off the clock". |
613 | */ | ||
522 | void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) | 614 | void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) |
523 | { | 615 | { |
524 | ktime_t expires; | 616 | ktime_t expires; |
@@ -529,9 +621,11 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) | |||
529 | return; | 621 | return; |
530 | } | 622 | } |
531 | 623 | ||
532 | /* We use wallclock time here, so the Guest might not be running for | 624 | /* |
625 | * We use wallclock time here, so the Guest might not be running for | ||
533 | * all the time between now and the timer interrupt it asked for. This | 626 | * all the time between now and the timer interrupt it asked for. This |
534 | * is almost always the right thing to do. */ | 627 | * is almost always the right thing to do. |
628 | */ | ||
535 | expires = ktime_add_ns(ktime_get_real(), delta); | 629 | expires = ktime_add_ns(ktime_get_real(), delta); |
536 | hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); | 630 | hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); |
537 | } | 631 | } |
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 01c591923793..bc28745d05af 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h | |||
@@ -16,15 +16,13 @@ | |||
16 | void free_pagetables(void); | 16 | void free_pagetables(void); |
17 | int init_pagetables(struct page **switcher_page, unsigned int pages); | 17 | int init_pagetables(struct page **switcher_page, unsigned int pages); |
18 | 18 | ||
19 | struct pgdir | 19 | struct pgdir { |
20 | { | ||
21 | unsigned long gpgdir; | 20 | unsigned long gpgdir; |
22 | pgd_t *pgdir; | 21 | pgd_t *pgdir; |
23 | }; | 22 | }; |
24 | 23 | ||
25 | /* We have two pages shared with guests, per cpu. */ | 24 | /* We have two pages shared with guests, per cpu. */ |
26 | struct lguest_pages | 25 | struct lguest_pages { |
27 | { | ||
28 | /* This is the stack page mapped rw in guest */ | 26 | /* This is the stack page mapped rw in guest */ |
29 | char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; | 27 | char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; |
30 | struct lguest_regs regs; | 28 | struct lguest_regs regs; |
@@ -54,13 +52,13 @@ struct lg_cpu { | |||
54 | 52 | ||
55 | unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ | 53 | unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ |
56 | 54 | ||
57 | /* At end of a page shared mapped over lguest_pages in guest. */ | 55 | /* At end of a page shared mapped over lguest_pages in guest. */ |
58 | unsigned long regs_page; | 56 | unsigned long regs_page; |
59 | struct lguest_regs *regs; | 57 | struct lguest_regs *regs; |
60 | 58 | ||
61 | struct lguest_pages *last_pages; | 59 | struct lguest_pages *last_pages; |
62 | 60 | ||
63 | int cpu_pgd; /* which pgd this cpu is currently using */ | 61 | int cpu_pgd; /* Which pgd this cpu is currently using */ |
64 | 62 | ||
65 | /* If a hypercall was asked for, this points to the arguments. */ | 63 | /* If a hypercall was asked for, this points to the arguments. */ |
66 | struct hcall_args *hcall; | 64 | struct hcall_args *hcall; |
@@ -89,15 +87,17 @@ struct lg_eventfd_map { | |||
89 | }; | 87 | }; |
90 | 88 | ||
91 | /* The private info the thread maintains about the guest. */ | 89 | /* The private info the thread maintains about the guest. */ |
92 | struct lguest | 90 | struct lguest { |
93 | { | ||
94 | struct lguest_data __user *lguest_data; | 91 | struct lguest_data __user *lguest_data; |
95 | struct lg_cpu cpus[NR_CPUS]; | 92 | struct lg_cpu cpus[NR_CPUS]; |
96 | unsigned int nr_cpus; | 93 | unsigned int nr_cpus; |
97 | 94 | ||
98 | u32 pfn_limit; | 95 | u32 pfn_limit; |
99 | /* This provides the offset to the base of guest-physical | 96 | |
100 | * memory in the Launcher. */ | 97 | /* |
98 | * This provides the offset to the base of guest-physical memory in the | ||
99 | * Launcher. | ||
100 | */ | ||
101 | void __user *mem_base; | 101 | void __user *mem_base; |
102 | unsigned long kernel_address; | 102 | unsigned long kernel_address; |
103 | 103 | ||
@@ -122,11 +122,13 @@ bool lguest_address_ok(const struct lguest *lg, | |||
122 | void __lgread(struct lg_cpu *, void *, unsigned long, unsigned); | 122 | void __lgread(struct lg_cpu *, void *, unsigned long, unsigned); |
123 | void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); | 123 | void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); |
124 | 124 | ||
125 | /*H:035 Using memory-copy operations like that is usually inconvient, so we | 125 | /*H:035 |
126 | * Using memory-copy operations like that is usually inconvient, so we | ||
126 | * have the following helper macros which read and write a specific type (often | 127 | * have the following helper macros which read and write a specific type (often |
127 | * an unsigned long). | 128 | * an unsigned long). |
128 | * | 129 | * |
129 | * This reads into a variable of the given type then returns that. */ | 130 | * This reads into a variable of the given type then returns that. |
131 | */ | ||
130 | #define lgread(cpu, addr, type) \ | 132 | #define lgread(cpu, addr, type) \ |
131 | ({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; }) | 133 | ({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; }) |
132 | 134 | ||
@@ -140,9 +142,11 @@ void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); | |||
140 | 142 | ||
141 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user); | 143 | int run_guest(struct lg_cpu *cpu, unsigned long __user *user); |
142 | 144 | ||
143 | /* Helper macros to obtain the first 12 or the last 20 bits, this is only the | 145 | /* |
146 | * Helper macros to obtain the first 12 or the last 20 bits, this is only the | ||
144 | * first step in the migration to the kernel types. pte_pfn is already defined | 147 | * first step in the migration to the kernel types. pte_pfn is already defined |
145 | * in the kernel. */ | 148 | * in the kernel. |
149 | */ | ||
146 | #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) | 150 | #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) |
147 | #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) | 151 | #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) |
148 | #define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK) | 152 | #define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK) |
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index e082cdac88b4..b6200bc39b58 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c | |||
@@ -1,10 +1,12 @@ | |||
1 | /*P:050 Lguest guests use a very simple method to describe devices. It's a | 1 | /*P:050 |
2 | * Lguest guests use a very simple method to describe devices. It's a | ||
2 | * series of device descriptors contained just above the top of normal Guest | 3 | * series of device descriptors contained just above the top of normal Guest |
3 | * memory. | 4 | * memory. |
4 | * | 5 | * |
5 | * We use the standard "virtio" device infrastructure, which provides us with a | 6 | * We use the standard "virtio" device infrastructure, which provides us with a |
6 | * console, a network and a block driver. Each one expects some configuration | 7 | * console, a network and a block driver. Each one expects some configuration |
7 | * information and a "virtqueue" or two to send and receive data. :*/ | 8 | * information and a "virtqueue" or two to send and receive data. |
9 | :*/ | ||
8 | #include <linux/init.h> | 10 | #include <linux/init.h> |
9 | #include <linux/bootmem.h> | 11 | #include <linux/bootmem.h> |
10 | #include <linux/lguest_launcher.h> | 12 | #include <linux/lguest_launcher.h> |
@@ -20,8 +22,10 @@ | |||
20 | /* The pointer to our (page) of device descriptions. */ | 22 | /* The pointer to our (page) of device descriptions. */ |
21 | static void *lguest_devices; | 23 | static void *lguest_devices; |
22 | 24 | ||
23 | /* For Guests, device memory can be used as normal memory, so we cast away the | 25 | /* |
24 | * __iomem to quieten sparse. */ | 26 | * For Guests, device memory can be used as normal memory, so we cast away the |
27 | * __iomem to quieten sparse. | ||
28 | */ | ||
25 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) | 29 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) |
26 | { | 30 | { |
27 | return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages); | 31 | return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages); |
@@ -32,8 +36,10 @@ static inline void lguest_unmap(void *addr) | |||
32 | iounmap((__force void __iomem *)addr); | 36 | iounmap((__force void __iomem *)addr); |
33 | } | 37 | } |
34 | 38 | ||
35 | /*D:100 Each lguest device is just a virtio device plus a pointer to its entry | 39 | /*D:100 |
36 | * in the lguest_devices page. */ | 40 | * Each lguest device is just a virtio device plus a pointer to its entry |
41 | * in the lguest_devices page. | ||
42 | */ | ||
37 | struct lguest_device { | 43 | struct lguest_device { |
38 | struct virtio_device vdev; | 44 | struct virtio_device vdev; |
39 | 45 | ||
@@ -41,9 +47,11 @@ struct lguest_device { | |||
41 | struct lguest_device_desc *desc; | 47 | struct lguest_device_desc *desc; |
42 | }; | 48 | }; |
43 | 49 | ||
44 | /* Since the virtio infrastructure hands us a pointer to the virtio_device all | 50 | /* |
51 | * Since the virtio infrastructure hands us a pointer to the virtio_device all | ||
45 | * the time, it helps to have a curt macro to get a pointer to the struct | 52 | * the time, it helps to have a curt macro to get a pointer to the struct |
46 | * lguest_device it's enclosed in. */ | 53 | * lguest_device it's enclosed in. |
54 | */ | ||
47 | #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev) | 55 | #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev) |
48 | 56 | ||
49 | /*D:130 | 57 | /*D:130 |
@@ -55,7 +63,8 @@ struct lguest_device { | |||
55 | * the driver will look at them during setup. | 63 | * the driver will look at them during setup. |
56 | * | 64 | * |
57 | * A convenient routine to return the device's virtqueue config array: | 65 | * A convenient routine to return the device's virtqueue config array: |
58 | * immediately after the descriptor. */ | 66 | * immediately after the descriptor. |
67 | */ | ||
59 | static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) | 68 | static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) |
60 | { | 69 | { |
61 | return (void *)(desc + 1); | 70 | return (void *)(desc + 1); |
@@ -98,10 +107,12 @@ static u32 lg_get_features(struct virtio_device *vdev) | |||
98 | return features; | 107 | return features; |
99 | } | 108 | } |
100 | 109 | ||
101 | /* The virtio core takes the features the Host offers, and copies the | 110 | /* |
102 | * ones supported by the driver into the vdev->features array. Once | 111 | * The virtio core takes the features the Host offers, and copies the ones |
103 | * that's all sorted out, this routine is called so we can tell the | 112 | * supported by the driver into the vdev->features array. Once that's all |
104 | * Host which features we understand and accept. */ | 113 | * sorted out, this routine is called so we can tell the Host which features we |
114 | * understand and accept. | ||
115 | */ | ||
105 | static void lg_finalize_features(struct virtio_device *vdev) | 116 | static void lg_finalize_features(struct virtio_device *vdev) |
106 | { | 117 | { |
107 | unsigned int i, bits; | 118 | unsigned int i, bits; |
@@ -112,10 +123,11 @@ static void lg_finalize_features(struct virtio_device *vdev) | |||
112 | /* Give virtio_ring a chance to accept features. */ | 123 | /* Give virtio_ring a chance to accept features. */ |
113 | vring_transport_features(vdev); | 124 | vring_transport_features(vdev); |
114 | 125 | ||
115 | /* The vdev->feature array is a Linux bitmask: this isn't the | 126 | /* |
116 | * same as a the simple array of bits used by lguest devices | 127 | * The vdev->feature array is a Linux bitmask: this isn't the same as a |
117 | * for features. So we do this slow, manual conversion which is | 128 | * the simple array of bits used by lguest devices for features. So we |
118 | * completely general. */ | 129 | * do this slow, manual conversion which is completely general. |
130 | */ | ||
119 | memset(out_features, 0, desc->feature_len); | 131 | memset(out_features, 0, desc->feature_len); |
120 | bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; | 132 | bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; |
121 | for (i = 0; i < bits; i++) { | 133 | for (i = 0; i < bits; i++) { |
@@ -146,15 +158,19 @@ static void lg_set(struct virtio_device *vdev, unsigned int offset, | |||
146 | memcpy(lg_config(desc) + offset, buf, len); | 158 | memcpy(lg_config(desc) + offset, buf, len); |
147 | } | 159 | } |
148 | 160 | ||
149 | /* The operations to get and set the status word just access the status field | 161 | /* |
150 | * of the device descriptor. */ | 162 | * The operations to get and set the status word just access the status field |
163 | * of the device descriptor. | ||
164 | */ | ||
151 | static u8 lg_get_status(struct virtio_device *vdev) | 165 | static u8 lg_get_status(struct virtio_device *vdev) |
152 | { | 166 | { |
153 | return to_lgdev(vdev)->desc->status; | 167 | return to_lgdev(vdev)->desc->status; |
154 | } | 168 | } |
155 | 169 | ||
156 | /* To notify on status updates, we (ab)use the NOTIFY hypercall, with the | 170 | /* |
157 | * descriptor address of the device. A zero status means "reset". */ | 171 | * To notify on status updates, we (ab)use the NOTIFY hypercall, with the |
172 | * descriptor address of the device. A zero status means "reset". | ||
173 | */ | ||
158 | static void set_status(struct virtio_device *vdev, u8 status) | 174 | static void set_status(struct virtio_device *vdev, u8 status) |
159 | { | 175 | { |
160 | unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; | 176 | unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; |
@@ -191,8 +207,7 @@ static void lg_reset(struct virtio_device *vdev) | |||
191 | */ | 207 | */ |
192 | 208 | ||
193 | /*D:140 This is the information we remember about each virtqueue. */ | 209 | /*D:140 This is the information we remember about each virtqueue. */ |
194 | struct lguest_vq_info | 210 | struct lguest_vq_info { |
195 | { | ||
196 | /* A copy of the information contained in the device config. */ | 211 | /* A copy of the information contained in the device config. */ |
197 | struct lguest_vqconfig config; | 212 | struct lguest_vqconfig config; |
198 | 213 | ||
@@ -200,13 +215,17 @@ struct lguest_vq_info | |||
200 | void *pages; | 215 | void *pages; |
201 | }; | 216 | }; |
202 | 217 | ||
203 | /* When the virtio_ring code wants to prod the Host, it calls us here and we | 218 | /* |
219 | * When the virtio_ring code wants to prod the Host, it calls us here and we | ||
204 | * make a hypercall. We hand the physical address of the virtqueue so the Host | 220 | * make a hypercall. We hand the physical address of the virtqueue so the Host |
205 | * knows which virtqueue we're talking about. */ | 221 | * knows which virtqueue we're talking about. |
222 | */ | ||
206 | static void lg_notify(struct virtqueue *vq) | 223 | static void lg_notify(struct virtqueue *vq) |
207 | { | 224 | { |
208 | /* We store our virtqueue information in the "priv" pointer of the | 225 | /* |
209 | * virtqueue structure. */ | 226 | * We store our virtqueue information in the "priv" pointer of the |
227 | * virtqueue structure. | ||
228 | */ | ||
210 | struct lguest_vq_info *lvq = vq->priv; | 229 | struct lguest_vq_info *lvq = vq->priv; |
211 | 230 | ||
212 | kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT); | 231 | kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT); |
@@ -215,7 +234,8 @@ static void lg_notify(struct virtqueue *vq) | |||
215 | /* An extern declaration inside a C file is bad form. Don't do it. */ | 234 | /* An extern declaration inside a C file is bad form. Don't do it. */ |
216 | extern void lguest_setup_irq(unsigned int irq); | 235 | extern void lguest_setup_irq(unsigned int irq); |
217 | 236 | ||
218 | /* This routine finds the first virtqueue described in the configuration of | 237 | /* |
238 | * This routine finds the Nth virtqueue described in the configuration of | ||
219 | * this device and sets it up. | 239 | * this device and sets it up. |
220 | * | 240 | * |
221 | * This is kind of an ugly duckling. It'd be nicer to have a standard | 241 | * This is kind of an ugly duckling. It'd be nicer to have a standard |
@@ -223,9 +243,7 @@ extern void lguest_setup_irq(unsigned int irq); | |||
223 | * everyone wants to do it differently. The KVM coders want the Guest to | 243 | * everyone wants to do it differently. The KVM coders want the Guest to |
224 | * allocate its own pages and tell the Host where they are, but for lguest it's | 244 | * allocate its own pages and tell the Host where they are, but for lguest it's |
225 | * simpler for the Host to simply tell us where the pages are. | 245 | * simpler for the Host to simply tell us where the pages are. |
226 | * | 246 | */ |
227 | * So we provide drivers with a "find the Nth virtqueue and set it up" | ||
228 | * function. */ | ||
229 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | 247 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, |
230 | unsigned index, | 248 | unsigned index, |
231 | void (*callback)(struct virtqueue *vq), | 249 | void (*callback)(struct virtqueue *vq), |
@@ -244,9 +262,11 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
244 | if (!lvq) | 262 | if (!lvq) |
245 | return ERR_PTR(-ENOMEM); | 263 | return ERR_PTR(-ENOMEM); |
246 | 264 | ||
247 | /* Make a copy of the "struct lguest_vqconfig" entry, which sits after | 265 | /* |
266 | * Make a copy of the "struct lguest_vqconfig" entry, which sits after | ||
248 | * the descriptor. We need a copy because the config space might not | 267 | * the descriptor. We need a copy because the config space might not |
249 | * be aligned correctly. */ | 268 | * be aligned correctly. |
269 | */ | ||
250 | memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); | 270 | memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); |
251 | 271 | ||
252 | printk("Mapping virtqueue %i addr %lx\n", index, | 272 | printk("Mapping virtqueue %i addr %lx\n", index, |
@@ -261,8 +281,10 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
261 | goto free_lvq; | 281 | goto free_lvq; |
262 | } | 282 | } |
263 | 283 | ||
264 | /* OK, tell virtio_ring.c to set up a virtqueue now we know its size | 284 | /* |
265 | * and we've got a pointer to its pages. */ | 285 | * OK, tell virtio_ring.c to set up a virtqueue now we know its size |
286 | * and we've got a pointer to its pages. | ||
287 | */ | ||
266 | vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, | 288 | vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, |
267 | vdev, lvq->pages, lg_notify, callback, name); | 289 | vdev, lvq->pages, lg_notify, callback, name); |
268 | if (!vq) { | 290 | if (!vq) { |
@@ -273,18 +295,23 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
273 | /* Make sure the interrupt is allocated. */ | 295 | /* Make sure the interrupt is allocated. */ |
274 | lguest_setup_irq(lvq->config.irq); | 296 | lguest_setup_irq(lvq->config.irq); |
275 | 297 | ||
276 | /* Tell the interrupt for this virtqueue to go to the virtio_ring | 298 | /* |
277 | * interrupt handler. */ | 299 | * Tell the interrupt for this virtqueue to go to the virtio_ring |
278 | /* FIXME: We used to have a flag for the Host to tell us we could use | 300 | * interrupt handler. |
301 | * | ||
302 | * FIXME: We used to have a flag for the Host to tell us we could use | ||
279 | * the interrupt as a source of randomness: it'd be nice to have that | 303 | * the interrupt as a source of randomness: it'd be nice to have that |
280 | * back.. */ | 304 | * back. |
305 | */ | ||
281 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, | 306 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, |
282 | dev_name(&vdev->dev), vq); | 307 | dev_name(&vdev->dev), vq); |
283 | if (err) | 308 | if (err) |
284 | goto destroy_vring; | 309 | goto destroy_vring; |
285 | 310 | ||
286 | /* Last of all we hook up our 'struct lguest_vq_info" to the | 311 | /* |
287 | * virtqueue's priv pointer. */ | 312 | * Last of all we hook up our 'struct lguest_vq_info" to the |
313 | * virtqueue's priv pointer. | ||
314 | */ | ||
288 | vq->priv = lvq; | 315 | vq->priv = lvq; |
289 | return vq; | 316 | return vq; |
290 | 317 | ||
@@ -358,11 +385,14 @@ static struct virtio_config_ops lguest_config_ops = { | |||
358 | .del_vqs = lg_del_vqs, | 385 | .del_vqs = lg_del_vqs, |
359 | }; | 386 | }; |
360 | 387 | ||
361 | /* The root device for the lguest virtio devices. This makes them appear as | 388 | /* |
362 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */ | 389 | * The root device for the lguest virtio devices. This makes them appear as |
390 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. | ||
391 | */ | ||
363 | static struct device *lguest_root; | 392 | static struct device *lguest_root; |
364 | 393 | ||
365 | /*D:120 This is the core of the lguest bus: actually adding a new device. | 394 | /*D:120 |
395 | * This is the core of the lguest bus: actually adding a new device. | ||
366 | * It's a separate function because it's neater that way, and because an | 396 | * It's a separate function because it's neater that way, and because an |
367 | * earlier version of the code supported hotplug and unplug. They were removed | 397 | * earlier version of the code supported hotplug and unplug. They were removed |
368 | * early on because they were never used. | 398 | * early on because they were never used. |
@@ -371,14 +401,14 @@ static struct device *lguest_root; | |||
371 | * | 401 | * |
372 | * It's worth reading this carefully: we start with a pointer to the new device | 402 | * It's worth reading this carefully: we start with a pointer to the new device |
373 | * descriptor in the "lguest_devices" page, and the offset into the device | 403 | * descriptor in the "lguest_devices" page, and the offset into the device |
374 | * descriptor page so we can uniquely identify it if things go badly wrong. */ | 404 | * descriptor page so we can uniquely identify it if things go badly wrong. |
405 | */ | ||
375 | static void add_lguest_device(struct lguest_device_desc *d, | 406 | static void add_lguest_device(struct lguest_device_desc *d, |
376 | unsigned int offset) | 407 | unsigned int offset) |
377 | { | 408 | { |
378 | struct lguest_device *ldev; | 409 | struct lguest_device *ldev; |
379 | 410 | ||
380 | /* Start with zeroed memory; Linux's device layer seems to count on | 411 | /* Start with zeroed memory; Linux's device layer counts on it. */ |
381 | * it. */ | ||
382 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); | 412 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); |
383 | if (!ldev) { | 413 | if (!ldev) { |
384 | printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n", | 414 | printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n", |
@@ -388,17 +418,25 @@ static void add_lguest_device(struct lguest_device_desc *d, | |||
388 | 418 | ||
389 | /* This devices' parent is the lguest/ dir. */ | 419 | /* This devices' parent is the lguest/ dir. */ |
390 | ldev->vdev.dev.parent = lguest_root; | 420 | ldev->vdev.dev.parent = lguest_root; |
391 | /* We have a unique device index thanks to the dev_index counter. */ | 421 | /* |
422 | * The device type comes straight from the descriptor. There's also a | ||
423 | * device vendor field in the virtio_device struct, which we leave as | ||
424 | * 0. | ||
425 | */ | ||
392 | ldev->vdev.id.device = d->type; | 426 | ldev->vdev.id.device = d->type; |
393 | /* We have a simple set of routines for querying the device's | 427 | /* |
394 | * configuration information and setting its status. */ | 428 | * We have a simple set of routines for querying the device's |
429 | * configuration information and setting its status. | ||
430 | */ | ||
395 | ldev->vdev.config = &lguest_config_ops; | 431 | ldev->vdev.config = &lguest_config_ops; |
396 | /* And we remember the device's descriptor for lguest_config_ops. */ | 432 | /* And we remember the device's descriptor for lguest_config_ops. */ |
397 | ldev->desc = d; | 433 | ldev->desc = d; |
398 | 434 | ||
399 | /* register_virtio_device() sets up the generic fields for the struct | 435 | /* |
436 | * register_virtio_device() sets up the generic fields for the struct | ||
400 | * virtio_device and calls device_register(). This makes the bus | 437 | * virtio_device and calls device_register(). This makes the bus |
401 | * infrastructure look for a matching driver. */ | 438 | * infrastructure look for a matching driver. |
439 | */ | ||
402 | if (register_virtio_device(&ldev->vdev) != 0) { | 440 | if (register_virtio_device(&ldev->vdev) != 0) { |
403 | printk(KERN_ERR "Failed to register lguest dev %u type %u\n", | 441 | printk(KERN_ERR "Failed to register lguest dev %u type %u\n", |
404 | offset, d->type); | 442 | offset, d->type); |
@@ -406,8 +444,10 @@ static void add_lguest_device(struct lguest_device_desc *d, | |||
406 | } | 444 | } |
407 | } | 445 | } |
408 | 446 | ||
409 | /*D:110 scan_devices() simply iterates through the device page. The type 0 is | 447 | /*D:110 |
410 | * reserved to mean "end of devices". */ | 448 | * scan_devices() simply iterates through the device page. The type 0 is |
449 | * reserved to mean "end of devices". | ||
450 | */ | ||
411 | static void scan_devices(void) | 451 | static void scan_devices(void) |
412 | { | 452 | { |
413 | unsigned int i; | 453 | unsigned int i; |
@@ -426,7 +466,8 @@ static void scan_devices(void) | |||
426 | } | 466 | } |
427 | } | 467 | } |
428 | 468 | ||
429 | /*D:105 Fairly early in boot, lguest_devices_init() is called to set up the | 469 | /*D:105 |
470 | * Fairly early in boot, lguest_devices_init() is called to set up the | ||
430 | * lguest device infrastructure. We check that we are a Guest by checking | 471 | * lguest device infrastructure. We check that we are a Guest by checking |
431 | * pv_info.name: there are other ways of checking, but this seems most | 472 | * pv_info.name: there are other ways of checking, but this seems most |
432 | * obvious to me. | 473 | * obvious to me. |
@@ -437,7 +478,8 @@ static void scan_devices(void) | |||
437 | * correct sysfs incantation). | 478 | * correct sysfs incantation). |
438 | * | 479 | * |
439 | * Finally we call scan_devices() which adds all the devices found in the | 480 | * Finally we call scan_devices() which adds all the devices found in the |
440 | * lguest_devices page. */ | 481 | * lguest_devices page. |
482 | */ | ||
441 | static int __init lguest_devices_init(void) | 483 | static int __init lguest_devices_init(void) |
442 | { | 484 | { |
443 | if (strcmp(pv_info.name, "lguest") != 0) | 485 | if (strcmp(pv_info.name, "lguest") != 0) |
@@ -456,11 +498,13 @@ static int __init lguest_devices_init(void) | |||
456 | /* We do this after core stuff, but before the drivers. */ | 498 | /* We do this after core stuff, but before the drivers. */ |
457 | postcore_initcall(lguest_devices_init); | 499 | postcore_initcall(lguest_devices_init); |
458 | 500 | ||
459 | /*D:150 At this point in the journey we used to now wade through the lguest | 501 | /*D:150 |
502 | * At this point in the journey we used to now wade through the lguest | ||
460 | * devices themselves: net, block and console. Since they're all now virtio | 503 | * devices themselves: net, block and console. Since they're all now virtio |
461 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, | 504 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, |
462 | * they're kind of boring. But this does mean you'll never experience the | 505 | * they're kind of boring. But this does mean you'll never experience the |
463 | * thrill of reading the forbidden love scene buried deep in the block driver. | 506 | * thrill of reading the forbidden love scene buried deep in the block driver. |
464 | * | 507 | * |
465 | * "make Launcher" beckons, where we answer questions like "Where do Guests | 508 | * "make Launcher" beckons, where we answer questions like "Where do Guests |
466 | * come from?", and "What do you do when someone asks for optimization?". */ | 509 | * come from?", and "What do you do when someone asks for optimization?". |
510 | */ | ||
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 9f9a2953b383..b4d3f7ca554f 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c | |||
@@ -1,8 +1,9 @@ | |||
1 | /*P:200 This contains all the /dev/lguest code, whereby the userspace launcher | 1 | /*P:200 This contains all the /dev/lguest code, whereby the userspace launcher |
2 | * controls and communicates with the Guest. For example, the first write will | 2 | * controls and communicates with the Guest. For example, the first write will |
3 | * tell us the Guest's memory layout, pagetable, entry point and kernel address | 3 | * tell us the Guest's memory layout and entry point. A read will run the |
4 | * offset. A read will run the Guest until something happens, such as a signal | 4 | * Guest until something happens, such as a signal or the Guest doing a NOTIFY |
5 | * or the Guest doing a NOTIFY out to the Launcher. :*/ | 5 | * out to the Launcher. |
6 | :*/ | ||
6 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
7 | #include <linux/miscdevice.h> | 8 | #include <linux/miscdevice.h> |
8 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
@@ -11,14 +12,41 @@ | |||
11 | #include <linux/file.h> | 12 | #include <linux/file.h> |
12 | #include "lg.h" | 13 | #include "lg.h" |
13 | 14 | ||
15 | /*L:056 | ||
16 | * Before we move on, let's jump ahead and look at what the kernel does when | ||
17 | * it needs to look up the eventfds. That will complete our picture of how we | ||
18 | * use RCU. | ||
19 | * | ||
20 | * The notification value is in cpu->pending_notify: we return true if it went | ||
21 | * to an eventfd. | ||
22 | */ | ||
14 | bool send_notify_to_eventfd(struct lg_cpu *cpu) | 23 | bool send_notify_to_eventfd(struct lg_cpu *cpu) |
15 | { | 24 | { |
16 | unsigned int i; | 25 | unsigned int i; |
17 | struct lg_eventfd_map *map; | 26 | struct lg_eventfd_map *map; |
18 | 27 | ||
19 | /* lg->eventfds is RCU-protected */ | 28 | /* |
29 | * This "rcu_read_lock()" helps track when someone is still looking at | ||
30 | * the (RCU-using) eventfds array. It's not actually a lock at all; | ||
31 | * indeed it's a noop in many configurations. (You didn't expect me to | ||
32 | * explain all the RCU secrets here, did you?) | ||
33 | */ | ||
20 | rcu_read_lock(); | 34 | rcu_read_lock(); |
35 | /* | ||
36 | * rcu_dereference is the counter-side of rcu_assign_pointer(); it | ||
37 | * makes sure we don't access the memory pointed to by | ||
38 | * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, | ||
39 | * but Alpha allows this! Paul McKenney points out that a really | ||
40 | * aggressive compiler could have the same effect: | ||
41 | * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html | ||
42 | * | ||
43 | * So play safe, use rcu_dereference to get the rcu-protected pointer: | ||
44 | */ | ||
21 | map = rcu_dereference(cpu->lg->eventfds); | 45 | map = rcu_dereference(cpu->lg->eventfds); |
46 | /* | ||
47 | * Simple array search: even if they add an eventfd while we do this, | ||
48 | * we'll continue to use the old array and just won't see the new one. | ||
49 | */ | ||
22 | for (i = 0; i < map->num; i++) { | 50 | for (i = 0; i < map->num; i++) { |
23 | if (map->map[i].addr == cpu->pending_notify) { | 51 | if (map->map[i].addr == cpu->pending_notify) { |
24 | eventfd_signal(map->map[i].event, 1); | 52 | eventfd_signal(map->map[i].event, 1); |
@@ -26,19 +54,50 @@ bool send_notify_to_eventfd(struct lg_cpu *cpu) | |||
26 | break; | 54 | break; |
27 | } | 55 | } |
28 | } | 56 | } |
57 | /* We're done with the rcu-protected variable cpu->lg->eventfds. */ | ||
29 | rcu_read_unlock(); | 58 | rcu_read_unlock(); |
59 | |||
60 | /* If we cleared the notification, it's because we found a match. */ | ||
30 | return cpu->pending_notify == 0; | 61 | return cpu->pending_notify == 0; |
31 | } | 62 | } |
32 | 63 | ||
64 | /*L:055 | ||
65 | * One of the more tricksy tricks in the Linux Kernel is a technique called | ||
66 | * Read Copy Update. Since one point of lguest is to teach lguest journeyers | ||
67 | * about kernel coding, I use it here. (In case you're curious, other purposes | ||
68 | * include learning about virtualization and instilling a deep appreciation for | ||
69 | * simplicity and puppies). | ||
70 | * | ||
71 | * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we | ||
72 | * add new eventfds without ever blocking readers from accessing the array. | ||
73 | * The current Launcher only does this during boot, so that never happens. But | ||
74 | * Read Copy Update is cool, and adding a lock risks damaging even more puppies | ||
75 | * than this code does. | ||
76 | * | ||
77 | * We allocate a brand new one-larger array, copy the old one and add our new | ||
78 | * element. Then we make the lg eventfd pointer point to the new array. | ||
79 | * That's the easy part: now we need to free the old one, but we need to make | ||
80 | * sure no slow CPU somewhere is still looking at it. That's what | ||
81 | * synchronize_rcu does for us: waits until every CPU has indicated that it has | ||
82 | * moved on to know it's no longer using the old one. | ||
83 | * | ||
84 | * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. | ||
85 | */ | ||
33 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | 86 | static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) |
34 | { | 87 | { |
35 | struct lg_eventfd_map *new, *old = lg->eventfds; | 88 | struct lg_eventfd_map *new, *old = lg->eventfds; |
36 | 89 | ||
90 | /* | ||
91 | * We don't allow notifications on value 0 anyway (pending_notify of | ||
92 | * 0 means "nothing pending"). | ||
93 | */ | ||
37 | if (!addr) | 94 | if (!addr) |
38 | return -EINVAL; | 95 | return -EINVAL; |
39 | 96 | ||
40 | /* Replace the old array with the new one, carefully: others can | 97 | /* |
41 | * be accessing it at the same time */ | 98 | * Replace the old array with the new one, carefully: others can |
99 | * be accessing it at the same time. | ||
100 | */ | ||
42 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), | 101 | new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), |
43 | GFP_KERNEL); | 102 | GFP_KERNEL); |
44 | if (!new) | 103 | if (!new) |
@@ -52,22 +111,41 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) | |||
52 | new->map[new->num].addr = addr; | 111 | new->map[new->num].addr = addr; |
53 | new->map[new->num].event = eventfd_ctx_fdget(fd); | 112 | new->map[new->num].event = eventfd_ctx_fdget(fd); |
54 | if (IS_ERR(new->map[new->num].event)) { | 113 | if (IS_ERR(new->map[new->num].event)) { |
114 | int err = PTR_ERR(new->map[new->num].event); | ||
55 | kfree(new); | 115 | kfree(new); |
56 | return PTR_ERR(new->map[new->num].event); | 116 | return err; |
57 | } | 117 | } |
58 | new->num++; | 118 | new->num++; |
59 | 119 | ||
60 | /* Now put new one in place. */ | 120 | /* |
121 | * Now put new one in place: rcu_assign_pointer() is a fancy way of | ||
122 | * doing "lg->eventfds = new", but it uses memory barriers to make | ||
123 | * absolutely sure that the contents of "new" written above is nailed | ||
124 | * down before we actually do the assignment. | ||
125 | * | ||
126 | * We have to think about these kinds of things when we're operating on | ||
127 | * live data without locks. | ||
128 | */ | ||
61 | rcu_assign_pointer(lg->eventfds, new); | 129 | rcu_assign_pointer(lg->eventfds, new); |
62 | 130 | ||
63 | /* We're not in a big hurry. Wait until noone's looking at old | 131 | /* |
64 | * version, then delete it. */ | 132 | * We're not in a big hurry. Wait until noone's looking at old |
133 | * version, then free it. | ||
134 | */ | ||
65 | synchronize_rcu(); | 135 | synchronize_rcu(); |
66 | kfree(old); | 136 | kfree(old); |
67 | 137 | ||
68 | return 0; | 138 | return 0; |
69 | } | 139 | } |
70 | 140 | ||
141 | /*L:052 | ||
142 | * Receiving notifications from the Guest is usually done by attaching a | ||
143 | * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will | ||
144 | * become readable when the Guest does an LHCALL_NOTIFY with that value. | ||
145 | * | ||
146 | * This is really convenient for processing each virtqueue in a separate | ||
147 | * thread. | ||
148 | */ | ||
71 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | 149 | static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) |
72 | { | 150 | { |
73 | unsigned long addr, fd; | 151 | unsigned long addr, fd; |
@@ -79,15 +157,22 @@ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) | |||
79 | if (get_user(fd, input) != 0) | 157 | if (get_user(fd, input) != 0) |
80 | return -EFAULT; | 158 | return -EFAULT; |
81 | 159 | ||
160 | /* | ||
161 | * Just make sure two callers don't add eventfds at once. We really | ||
162 | * only need to lock against callers adding to the same Guest, so using | ||
163 | * the Big Lguest Lock is overkill. But this is setup, not a fast path. | ||
164 | */ | ||
82 | mutex_lock(&lguest_lock); | 165 | mutex_lock(&lguest_lock); |
83 | err = add_eventfd(lg, addr, fd); | 166 | err = add_eventfd(lg, addr, fd); |
84 | mutex_unlock(&lguest_lock); | 167 | mutex_unlock(&lguest_lock); |
85 | 168 | ||
86 | return 0; | 169 | return err; |
87 | } | 170 | } |
88 | 171 | ||
89 | /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt | 172 | /*L:050 |
90 | * number to /dev/lguest. */ | 173 | * Sending an interrupt is done by writing LHREQ_IRQ and an interrupt |
174 | * number to /dev/lguest. | ||
175 | */ | ||
91 | static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) | 176 | static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) |
92 | { | 177 | { |
93 | unsigned long irq; | 178 | unsigned long irq; |
@@ -97,12 +182,18 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) | |||
97 | if (irq >= LGUEST_IRQS) | 182 | if (irq >= LGUEST_IRQS) |
98 | return -EINVAL; | 183 | return -EINVAL; |
99 | 184 | ||
185 | /* | ||
186 | * Next time the Guest runs, the core code will see if it can deliver | ||
187 | * this interrupt. | ||
188 | */ | ||
100 | set_interrupt(cpu, irq); | 189 | set_interrupt(cpu, irq); |
101 | return 0; | 190 | return 0; |
102 | } | 191 | } |
103 | 192 | ||
104 | /*L:040 Once our Guest is initialized, the Launcher makes it run by reading | 193 | /*L:040 |
105 | * from /dev/lguest. */ | 194 | * Once our Guest is initialized, the Launcher makes it run by reading |
195 | * from /dev/lguest. | ||
196 | */ | ||
106 | static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | 197 | static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) |
107 | { | 198 | { |
108 | struct lguest *lg = file->private_data; | 199 | struct lguest *lg = file->private_data; |
@@ -138,8 +229,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
138 | return len; | 229 | return len; |
139 | } | 230 | } |
140 | 231 | ||
141 | /* If we returned from read() last time because the Guest sent I/O, | 232 | /* |
142 | * clear the flag. */ | 233 | * If we returned from read() last time because the Guest sent I/O, |
234 | * clear the flag. | ||
235 | */ | ||
143 | if (cpu->pending_notify) | 236 | if (cpu->pending_notify) |
144 | cpu->pending_notify = 0; | 237 | cpu->pending_notify = 0; |
145 | 238 | ||
@@ -147,8 +240,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) | |||
147 | return run_guest(cpu, (unsigned long __user *)user); | 240 | return run_guest(cpu, (unsigned long __user *)user); |
148 | } | 241 | } |
149 | 242 | ||
150 | /*L:025 This actually initializes a CPU. For the moment, a Guest is only | 243 | /*L:025 |
151 | * uniprocessor, so "id" is always 0. */ | 244 | * This actually initializes a CPU. For the moment, a Guest is only |
245 | * uniprocessor, so "id" is always 0. | ||
246 | */ | ||
152 | static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | 247 | static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) |
153 | { | 248 | { |
154 | /* We have a limited number the number of CPUs in the lguest struct. */ | 249 | /* We have a limited number the number of CPUs in the lguest struct. */ |
@@ -163,8 +258,10 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
163 | /* Each CPU has a timer it can set. */ | 258 | /* Each CPU has a timer it can set. */ |
164 | init_clockdev(cpu); | 259 | init_clockdev(cpu); |
165 | 260 | ||
166 | /* We need a complete page for the Guest registers: they are accessible | 261 | /* |
167 | * to the Guest and we can only grant it access to whole pages. */ | 262 | * We need a complete page for the Guest registers: they are accessible |
263 | * to the Guest and we can only grant it access to whole pages. | ||
264 | */ | ||
168 | cpu->regs_page = get_zeroed_page(GFP_KERNEL); | 265 | cpu->regs_page = get_zeroed_page(GFP_KERNEL); |
169 | if (!cpu->regs_page) | 266 | if (!cpu->regs_page) |
170 | return -ENOMEM; | 267 | return -ENOMEM; |
@@ -172,29 +269,38 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
172 | /* We actually put the registers at the bottom of the page. */ | 269 | /* We actually put the registers at the bottom of the page. */ |
173 | cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); | 270 | cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); |
174 | 271 | ||
175 | /* Now we initialize the Guest's registers, handing it the start | 272 | /* |
176 | * address. */ | 273 | * Now we initialize the Guest's registers, handing it the start |
274 | * address. | ||
275 | */ | ||
177 | lguest_arch_setup_regs(cpu, start_ip); | 276 | lguest_arch_setup_regs(cpu, start_ip); |
178 | 277 | ||
179 | /* We keep a pointer to the Launcher task (ie. current task) for when | 278 | /* |
180 | * other Guests want to wake this one (eg. console input). */ | 279 | * We keep a pointer to the Launcher task (ie. current task) for when |
280 | * other Guests want to wake this one (eg. console input). | ||
281 | */ | ||
181 | cpu->tsk = current; | 282 | cpu->tsk = current; |
182 | 283 | ||
183 | /* We need to keep a pointer to the Launcher's memory map, because if | 284 | /* |
285 | * We need to keep a pointer to the Launcher's memory map, because if | ||
184 | * the Launcher dies we need to clean it up. If we don't keep a | 286 | * the Launcher dies we need to clean it up. If we don't keep a |
185 | * reference, it is destroyed before close() is called. */ | 287 | * reference, it is destroyed before close() is called. |
288 | */ | ||
186 | cpu->mm = get_task_mm(cpu->tsk); | 289 | cpu->mm = get_task_mm(cpu->tsk); |
187 | 290 | ||
188 | /* We remember which CPU's pages this Guest used last, for optimization | 291 | /* |
189 | * when the same Guest runs on the same CPU twice. */ | 292 | * We remember which CPU's pages this Guest used last, for optimization |
293 | * when the same Guest runs on the same CPU twice. | ||
294 | */ | ||
190 | cpu->last_pages = NULL; | 295 | cpu->last_pages = NULL; |
191 | 296 | ||
192 | /* No error == success. */ | 297 | /* No error == success. */ |
193 | return 0; | 298 | return 0; |
194 | } | 299 | } |
195 | 300 | ||
196 | /*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit) | 301 | /*L:020 |
197 | * values (in addition to the LHREQ_INITIALIZE value). These are: | 302 | * The initialization write supplies 3 pointer sized (32 or 64 bit) values (in |
303 | * addition to the LHREQ_INITIALIZE value). These are: | ||
198 | * | 304 | * |
199 | * base: The start of the Guest-physical memory inside the Launcher memory. | 305 | * base: The start of the Guest-physical memory inside the Launcher memory. |
200 | * | 306 | * |
@@ -206,14 +312,15 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) | |||
206 | */ | 312 | */ |
207 | static int initialize(struct file *file, const unsigned long __user *input) | 313 | static int initialize(struct file *file, const unsigned long __user *input) |
208 | { | 314 | { |
209 | /* "struct lguest" contains everything we (the Host) know about a | 315 | /* "struct lguest" contains all we (the Host) know about a Guest. */ |
210 | * Guest. */ | ||
211 | struct lguest *lg; | 316 | struct lguest *lg; |
212 | int err; | 317 | int err; |
213 | unsigned long args[3]; | 318 | unsigned long args[3]; |
214 | 319 | ||
215 | /* We grab the Big Lguest lock, which protects against multiple | 320 | /* |
216 | * simultaneous initializations. */ | 321 | * We grab the Big Lguest lock, which protects against multiple |
322 | * simultaneous initializations. | ||
323 | */ | ||
217 | mutex_lock(&lguest_lock); | 324 | mutex_lock(&lguest_lock); |
218 | /* You can't initialize twice! Close the device and start again... */ | 325 | /* You can't initialize twice! Close the device and start again... */ |
219 | if (file->private_data) { | 326 | if (file->private_data) { |
@@ -248,8 +355,10 @@ static int initialize(struct file *file, const unsigned long __user *input) | |||
248 | if (err) | 355 | if (err) |
249 | goto free_eventfds; | 356 | goto free_eventfds; |
250 | 357 | ||
251 | /* Initialize the Guest's shadow page tables, using the toplevel | 358 | /* |
252 | * address the Launcher gave us. This allocates memory, so can fail. */ | 359 | * Initialize the Guest's shadow page tables, using the toplevel |
360 | * address the Launcher gave us. This allocates memory, so can fail. | ||
361 | */ | ||
253 | err = init_guest_pagetable(lg); | 362 | err = init_guest_pagetable(lg); |
254 | if (err) | 363 | if (err) |
255 | goto free_regs; | 364 | goto free_regs; |
@@ -274,20 +383,24 @@ unlock: | |||
274 | return err; | 383 | return err; |
275 | } | 384 | } |
276 | 385 | ||
277 | /*L:010 The first operation the Launcher does must be a write. All writes | 386 | /*L:010 |
387 | * The first operation the Launcher does must be a write. All writes | ||
278 | * start with an unsigned long number: for the first write this must be | 388 | * start with an unsigned long number: for the first write this must be |
279 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use | 389 | * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use |
280 | * writes of other values to send interrupts. | 390 | * writes of other values to send interrupts or set up receipt of notifications. |
281 | * | 391 | * |
282 | * Note that we overload the "offset" in the /dev/lguest file to indicate what | 392 | * Note that we overload the "offset" in the /dev/lguest file to indicate what |
283 | * CPU number we're dealing with. Currently this is always 0, since we only | 393 | * CPU number we're dealing with. Currently this is always 0 since we only |
284 | * support uniprocessor Guests, but you can see the beginnings of SMP support | 394 | * support uniprocessor Guests, but you can see the beginnings of SMP support |
285 | * here. */ | 395 | * here. |
396 | */ | ||
286 | static ssize_t write(struct file *file, const char __user *in, | 397 | static ssize_t write(struct file *file, const char __user *in, |
287 | size_t size, loff_t *off) | 398 | size_t size, loff_t *off) |
288 | { | 399 | { |
289 | /* Once the Guest is initialized, we hold the "struct lguest" in the | 400 | /* |
290 | * file private data. */ | 401 | * Once the Guest is initialized, we hold the "struct lguest" in the |
402 | * file private data. | ||
403 | */ | ||
291 | struct lguest *lg = file->private_data; | 404 | struct lguest *lg = file->private_data; |
292 | const unsigned long __user *input = (const unsigned long __user *)in; | 405 | const unsigned long __user *input = (const unsigned long __user *)in; |
293 | unsigned long req; | 406 | unsigned long req; |
@@ -322,13 +435,15 @@ static ssize_t write(struct file *file, const char __user *in, | |||
322 | } | 435 | } |
323 | } | 436 | } |
324 | 437 | ||
325 | /*L:060 The final piece of interface code is the close() routine. It reverses | 438 | /*L:060 |
439 | * The final piece of interface code is the close() routine. It reverses | ||
326 | * everything done in initialize(). This is usually called because the | 440 | * everything done in initialize(). This is usually called because the |
327 | * Launcher exited. | 441 | * Launcher exited. |
328 | * | 442 | * |
329 | * Note that the close routine returns 0 or a negative error number: it can't | 443 | * Note that the close routine returns 0 or a negative error number: it can't |
330 | * really fail, but it can whine. I blame Sun for this wart, and K&R C for | 444 | * really fail, but it can whine. I blame Sun for this wart, and K&R C for |
331 | * letting them do it. :*/ | 445 | * letting them do it. |
446 | :*/ | ||
332 | static int close(struct inode *inode, struct file *file) | 447 | static int close(struct inode *inode, struct file *file) |
333 | { | 448 | { |
334 | struct lguest *lg = file->private_data; | 449 | struct lguest *lg = file->private_data; |
@@ -338,8 +453,10 @@ static int close(struct inode *inode, struct file *file) | |||
338 | if (!lg) | 453 | if (!lg) |
339 | return 0; | 454 | return 0; |
340 | 455 | ||
341 | /* We need the big lock, to protect from inter-guest I/O and other | 456 | /* |
342 | * Launchers initializing guests. */ | 457 | * We need the big lock, to protect from inter-guest I/O and other |
458 | * Launchers initializing guests. | ||
459 | */ | ||
343 | mutex_lock(&lguest_lock); | 460 | mutex_lock(&lguest_lock); |
344 | 461 | ||
345 | /* Free up the shadow page tables for the Guest. */ | 462 | /* Free up the shadow page tables for the Guest. */ |
@@ -350,8 +467,10 @@ static int close(struct inode *inode, struct file *file) | |||
350 | hrtimer_cancel(&lg->cpus[i].hrt); | 467 | hrtimer_cancel(&lg->cpus[i].hrt); |
351 | /* We can free up the register page we allocated. */ | 468 | /* We can free up the register page we allocated. */ |
352 | free_page(lg->cpus[i].regs_page); | 469 | free_page(lg->cpus[i].regs_page); |
353 | /* Now all the memory cleanups are done, it's safe to release | 470 | /* |
354 | * the Launcher's memory management structure. */ | 471 | * Now all the memory cleanups are done, it's safe to release |
472 | * the Launcher's memory management structure. | ||
473 | */ | ||
355 | mmput(lg->cpus[i].mm); | 474 | mmput(lg->cpus[i].mm); |
356 | } | 475 | } |
357 | 476 | ||
@@ -360,8 +479,10 @@ static int close(struct inode *inode, struct file *file) | |||
360 | eventfd_ctx_put(lg->eventfds->map[i].event); | 479 | eventfd_ctx_put(lg->eventfds->map[i].event); |
361 | kfree(lg->eventfds); | 480 | kfree(lg->eventfds); |
362 | 481 | ||
363 | /* If lg->dead doesn't contain an error code it will be NULL or a | 482 | /* |
364 | * kmalloc()ed string, either of which is ok to hand to kfree(). */ | 483 | * If lg->dead doesn't contain an error code it will be NULL or a |
484 | * kmalloc()ed string, either of which is ok to hand to kfree(). | ||
485 | */ | ||
365 | if (!IS_ERR(lg->dead)) | 486 | if (!IS_ERR(lg->dead)) |
366 | kfree(lg->dead); | 487 | kfree(lg->dead); |
367 | /* Free the memory allocated to the lguest_struct */ | 488 | /* Free the memory allocated to the lguest_struct */ |
@@ -385,7 +506,8 @@ static int close(struct inode *inode, struct file *file) | |||
385 | * | 506 | * |
386 | * We begin our understanding with the Host kernel interface which the Launcher | 507 | * We begin our understanding with the Host kernel interface which the Launcher |
387 | * uses: reading and writing a character device called /dev/lguest. All the | 508 | * uses: reading and writing a character device called /dev/lguest. All the |
388 | * work happens in the read(), write() and close() routines: */ | 509 | * work happens in the read(), write() and close() routines: |
510 | */ | ||
389 | static struct file_operations lguest_fops = { | 511 | static struct file_operations lguest_fops = { |
390 | .owner = THIS_MODULE, | 512 | .owner = THIS_MODULE, |
391 | .release = close, | 513 | .release = close, |
@@ -393,8 +515,10 @@ static struct file_operations lguest_fops = { | |||
393 | .read = read, | 515 | .read = read, |
394 | }; | 516 | }; |
395 | 517 | ||
396 | /* This is a textbook example of a "misc" character device. Populate a "struct | 518 | /* |
397 | * miscdevice" and register it with misc_register(). */ | 519 | * This is a textbook example of a "misc" character device. Populate a "struct |
520 | * miscdevice" and register it with misc_register(). | ||
521 | */ | ||
398 | static struct miscdevice lguest_dev = { | 522 | static struct miscdevice lguest_dev = { |
399 | .minor = MISC_DYNAMIC_MINOR, | 523 | .minor = MISC_DYNAMIC_MINOR, |
400 | .name = "lguest", | 524 | .name = "lguest", |
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index a6fe1abda240..a8d0aee3bc0e 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c | |||
@@ -1,9 +1,11 @@ | |||
1 | /*P:700 The pagetable code, on the other hand, still shows the scars of | 1 | /*P:700 |
2 | * The pagetable code, on the other hand, still shows the scars of | ||
2 | * previous encounters. It's functional, and as neat as it can be in the | 3 | * previous encounters. It's functional, and as neat as it can be in the |
3 | * circumstances, but be wary, for these things are subtle and break easily. | 4 | * circumstances, but be wary, for these things are subtle and break easily. |
4 | * The Guest provides a virtual to physical mapping, but we can neither trust | 5 | * The Guest provides a virtual to physical mapping, but we can neither trust |
5 | * it nor use it: we verify and convert it here then point the CPU to the | 6 | * it nor use it: we verify and convert it here then point the CPU to the |
6 | * converted Guest pages when running the Guest. :*/ | 7 | * converted Guest pages when running the Guest. |
8 | :*/ | ||
7 | 9 | ||
8 | /* Copyright (C) Rusty Russell IBM Corporation 2006. | 10 | /* Copyright (C) Rusty Russell IBM Corporation 2006. |
9 | * GPL v2 and any later version */ | 11 | * GPL v2 and any later version */ |
@@ -17,18 +19,20 @@ | |||
17 | #include <asm/bootparam.h> | 19 | #include <asm/bootparam.h> |
18 | #include "lg.h" | 20 | #include "lg.h" |
19 | 21 | ||
20 | /*M:008 We hold reference to pages, which prevents them from being swapped. | 22 | /*M:008 |
23 | * We hold reference to pages, which prevents them from being swapped. | ||
21 | * It'd be nice to have a callback in the "struct mm_struct" when Linux wants | 24 | * It'd be nice to have a callback in the "struct mm_struct" when Linux wants |
22 | * to swap out. If we had this, and a shrinker callback to trim PTE pages, we | 25 | * to swap out. If we had this, and a shrinker callback to trim PTE pages, we |
23 | * could probably consider launching Guests as non-root. :*/ | 26 | * could probably consider launching Guests as non-root. |
27 | :*/ | ||
24 | 28 | ||
25 | /*H:300 | 29 | /*H:300 |
26 | * The Page Table Code | 30 | * The Page Table Code |
27 | * | 31 | * |
28 | * We use two-level page tables for the Guest. If you're not entirely | 32 | * We use two-level page tables for the Guest, or three-level with PAE. If |
29 | * comfortable with virtual addresses, physical addresses and page tables then | 33 | * you're not entirely comfortable with virtual addresses, physical addresses |
30 | * I recommend you review arch/x86/lguest/boot.c's "Page Table Handling" (with | 34 | * and page tables then I recommend you review arch/x86/lguest/boot.c's "Page |
31 | * diagrams!). | 35 | * Table Handling" (with diagrams!). |
32 | * | 36 | * |
33 | * The Guest keeps page tables, but we maintain the actual ones here: these are | 37 | * The Guest keeps page tables, but we maintain the actual ones here: these are |
34 | * called "shadow" page tables. Which is a very Guest-centric name: these are | 38 | * called "shadow" page tables. Which is a very Guest-centric name: these are |
@@ -45,16 +49,18 @@ | |||
45 | * (v) Flushing (throwing away) page tables, | 49 | * (v) Flushing (throwing away) page tables, |
46 | * (vi) Mapping the Switcher when the Guest is about to run, | 50 | * (vi) Mapping the Switcher when the Guest is about to run, |
47 | * (vii) Setting up the page tables initially. | 51 | * (vii) Setting up the page tables initially. |
48 | :*/ | 52 | :*/ |
49 | 53 | ||
50 | 54 | /* | |
51 | /* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is | 55 | * The Switcher uses the complete top PTE page. That's 1024 PTE entries (4MB) |
52 | * conveniently placed at the top 4MB, so it uses a separate, complete PTE | 56 | * or 512 PTE entries with PAE (2MB). |
53 | * page. */ | 57 | */ |
54 | #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1) | 58 | #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1) |
55 | 59 | ||
56 | /* For PAE we need the PMD index as well. We use the last 2MB, so we | 60 | /* |
57 | * will need the last pmd entry of the last pmd page. */ | 61 | * For PAE we need the PMD index as well. We use the last 2MB, so we |
62 | * will need the last pmd entry of the last pmd page. | ||
63 | */ | ||
58 | #ifdef CONFIG_X86_PAE | 64 | #ifdef CONFIG_X86_PAE |
59 | #define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) | 65 | #define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) |
60 | #define RESERVE_MEM 2U | 66 | #define RESERVE_MEM 2U |
@@ -64,14 +70,18 @@ | |||
64 | #define CHECK_GPGD_MASK _PAGE_TABLE | 70 | #define CHECK_GPGD_MASK _PAGE_TABLE |
65 | #endif | 71 | #endif |
66 | 72 | ||
67 | /* We actually need a separate PTE page for each CPU. Remember that after the | 73 | /* |
74 | * We actually need a separate PTE page for each CPU. Remember that after the | ||
68 | * Switcher code itself comes two pages for each CPU, and we don't want this | 75 | * Switcher code itself comes two pages for each CPU, and we don't want this |
69 | * CPU's guest to see the pages of any other CPU. */ | 76 | * CPU's guest to see the pages of any other CPU. |
77 | */ | ||
70 | static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); | 78 | static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); |
71 | #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) | 79 | #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) |
72 | 80 | ||
73 | /*H:320 The page table code is curly enough to need helper functions to keep it | 81 | /*H:320 |
74 | * clear and clean. | 82 | * The page table code is curly enough to need helper functions to keep it |
83 | * clear and clean. The kernel itself provides many of them; one advantage | ||
84 | * of insisting that the Guest and Host use the same CONFIG_PAE setting. | ||
75 | * | 85 | * |
76 | * There are two functions which return pointers to the shadow (aka "real") | 86 | * There are two functions which return pointers to the shadow (aka "real") |
77 | * page tables. | 87 | * page tables. |
@@ -79,7 +89,8 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); | |||
79 | * spgd_addr() takes the virtual address and returns a pointer to the top-level | 89 | * spgd_addr() takes the virtual address and returns a pointer to the top-level |
80 | * page directory entry (PGD) for that address. Since we keep track of several | 90 | * page directory entry (PGD) for that address. Since we keep track of several |
81 | * page tables, the "i" argument tells us which one we're interested in (it's | 91 | * page tables, the "i" argument tells us which one we're interested in (it's |
82 | * usually the current one). */ | 92 | * usually the current one). |
93 | */ | ||
83 | static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) | 94 | static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) |
84 | { | 95 | { |
85 | unsigned int index = pgd_index(vaddr); | 96 | unsigned int index = pgd_index(vaddr); |
@@ -96,9 +107,11 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) | |||
96 | } | 107 | } |
97 | 108 | ||
98 | #ifdef CONFIG_X86_PAE | 109 | #ifdef CONFIG_X86_PAE |
99 | /* This routine then takes the PGD entry given above, which contains the | 110 | /* |
111 | * This routine then takes the PGD entry given above, which contains the | ||
100 | * address of the PMD page. It then returns a pointer to the PMD entry for the | 112 | * address of the PMD page. It then returns a pointer to the PMD entry for the |
101 | * given address. */ | 113 | * given address. |
114 | */ | ||
102 | static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | 115 | static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) |
103 | { | 116 | { |
104 | unsigned int index = pmd_index(vaddr); | 117 | unsigned int index = pmd_index(vaddr); |
@@ -119,9 +132,11 @@ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | |||
119 | } | 132 | } |
120 | #endif | 133 | #endif |
121 | 134 | ||
122 | /* This routine then takes the page directory entry returned above, which | 135 | /* |
136 | * This routine then takes the page directory entry returned above, which | ||
123 | * contains the address of the page table entry (PTE) page. It then returns a | 137 | * contains the address of the page table entry (PTE) page. It then returns a |
124 | * pointer to the PTE entry for the given address. */ | 138 | * pointer to the PTE entry for the given address. |
139 | */ | ||
125 | static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | 140 | static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) |
126 | { | 141 | { |
127 | #ifdef CONFIG_X86_PAE | 142 | #ifdef CONFIG_X86_PAE |
@@ -139,8 +154,10 @@ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) | |||
139 | return &page[pte_index(vaddr)]; | 154 | return &page[pte_index(vaddr)]; |
140 | } | 155 | } |
141 | 156 | ||
142 | /* These two functions just like the above two, except they access the Guest | 157 | /* |
143 | * page tables. Hence they return a Guest address. */ | 158 | * These functions are just like the above two, except they access the Guest |
159 | * page tables. Hence they return a Guest address. | ||
160 | */ | ||
144 | static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) | 161 | static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) |
145 | { | 162 | { |
146 | unsigned int index = vaddr >> (PGDIR_SHIFT); | 163 | unsigned int index = vaddr >> (PGDIR_SHIFT); |
@@ -148,6 +165,7 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) | |||
148 | } | 165 | } |
149 | 166 | ||
150 | #ifdef CONFIG_X86_PAE | 167 | #ifdef CONFIG_X86_PAE |
168 | /* Follow the PGD to the PMD. */ | ||
151 | static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) | 169 | static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) |
152 | { | 170 | { |
153 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; | 171 | unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; |
@@ -155,6 +173,7 @@ static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) | |||
155 | return gpage + pmd_index(vaddr) * sizeof(pmd_t); | 173 | return gpage + pmd_index(vaddr) * sizeof(pmd_t); |
156 | } | 174 | } |
157 | 175 | ||
176 | /* Follow the PMD to the PTE. */ | ||
158 | static unsigned long gpte_addr(struct lg_cpu *cpu, | 177 | static unsigned long gpte_addr(struct lg_cpu *cpu, |
159 | pmd_t gpmd, unsigned long vaddr) | 178 | pmd_t gpmd, unsigned long vaddr) |
160 | { | 179 | { |
@@ -164,6 +183,7 @@ static unsigned long gpte_addr(struct lg_cpu *cpu, | |||
164 | return gpage + pte_index(vaddr) * sizeof(pte_t); | 183 | return gpage + pte_index(vaddr) * sizeof(pte_t); |
165 | } | 184 | } |
166 | #else | 185 | #else |
186 | /* Follow the PGD to the PTE (no mid-level for !PAE). */ | ||
167 | static unsigned long gpte_addr(struct lg_cpu *cpu, | 187 | static unsigned long gpte_addr(struct lg_cpu *cpu, |
168 | pgd_t gpgd, unsigned long vaddr) | 188 | pgd_t gpgd, unsigned long vaddr) |
169 | { | 189 | { |
@@ -175,17 +195,21 @@ static unsigned long gpte_addr(struct lg_cpu *cpu, | |||
175 | #endif | 195 | #endif |
176 | /*:*/ | 196 | /*:*/ |
177 | 197 | ||
178 | /*M:014 get_pfn is slow: we could probably try to grab batches of pages here as | 198 | /*M:014 |
179 | * an optimization (ie. pre-faulting). :*/ | 199 | * get_pfn is slow: we could probably try to grab batches of pages here as |
200 | * an optimization (ie. pre-faulting). | ||
201 | :*/ | ||
180 | 202 | ||
181 | /*H:350 This routine takes a page number given by the Guest and converts it to | 203 | /*H:350 |
204 | * This routine takes a page number given by the Guest and converts it to | ||
182 | * an actual, physical page number. It can fail for several reasons: the | 205 | * an actual, physical page number. It can fail for several reasons: the |
183 | * virtual address might not be mapped by the Launcher, the write flag is set | 206 | * virtual address might not be mapped by the Launcher, the write flag is set |
184 | * and the page is read-only, or the write flag was set and the page was | 207 | * and the page is read-only, or the write flag was set and the page was |
185 | * shared so had to be copied, but we ran out of memory. | 208 | * shared so had to be copied, but we ran out of memory. |
186 | * | 209 | * |
187 | * This holds a reference to the page, so release_pte() is careful to put that | 210 | * This holds a reference to the page, so release_pte() is careful to put that |
188 | * back. */ | 211 | * back. |
212 | */ | ||
189 | static unsigned long get_pfn(unsigned long virtpfn, int write) | 213 | static unsigned long get_pfn(unsigned long virtpfn, int write) |
190 | { | 214 | { |
191 | struct page *page; | 215 | struct page *page; |
@@ -198,33 +222,41 @@ static unsigned long get_pfn(unsigned long virtpfn, int write) | |||
198 | return -1UL; | 222 | return -1UL; |
199 | } | 223 | } |
200 | 224 | ||
201 | /*H:340 Converting a Guest page table entry to a shadow (ie. real) page table | 225 | /*H:340 |
226 | * Converting a Guest page table entry to a shadow (ie. real) page table | ||
202 | * entry can be a little tricky. The flags are (almost) the same, but the | 227 | * entry can be a little tricky. The flags are (almost) the same, but the |
203 | * Guest PTE contains a virtual page number: the CPU needs the real page | 228 | * Guest PTE contains a virtual page number: the CPU needs the real page |
204 | * number. */ | 229 | * number. |
230 | */ | ||
205 | static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) | 231 | static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) |
206 | { | 232 | { |
207 | unsigned long pfn, base, flags; | 233 | unsigned long pfn, base, flags; |
208 | 234 | ||
209 | /* The Guest sets the global flag, because it thinks that it is using | 235 | /* |
236 | * The Guest sets the global flag, because it thinks that it is using | ||
210 | * PGE. We only told it to use PGE so it would tell us whether it was | 237 | * PGE. We only told it to use PGE so it would tell us whether it was |
211 | * flushing a kernel mapping or a userspace mapping. We don't actually | 238 | * flushing a kernel mapping or a userspace mapping. We don't actually |
212 | * use the global bit, so throw it away. */ | 239 | * use the global bit, so throw it away. |
240 | */ | ||
213 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); | 241 | flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); |
214 | 242 | ||
215 | /* The Guest's pages are offset inside the Launcher. */ | 243 | /* The Guest's pages are offset inside the Launcher. */ |
216 | base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; | 244 | base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; |
217 | 245 | ||
218 | /* We need a temporary "unsigned long" variable to hold the answer from | 246 | /* |
247 | * We need a temporary "unsigned long" variable to hold the answer from | ||
219 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't | 248 | * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't |
220 | * fit in spte.pfn. get_pfn() finds the real physical number of the | 249 | * fit in spte.pfn. get_pfn() finds the real physical number of the |
221 | * page, given the virtual number. */ | 250 | * page, given the virtual number. |
251 | */ | ||
222 | pfn = get_pfn(base + pte_pfn(gpte), write); | 252 | pfn = get_pfn(base + pte_pfn(gpte), write); |
223 | if (pfn == -1UL) { | 253 | if (pfn == -1UL) { |
224 | kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); | 254 | kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); |
225 | /* When we destroy the Guest, we'll go through the shadow page | 255 | /* |
256 | * When we destroy the Guest, we'll go through the shadow page | ||
226 | * tables and release_pte() them. Make sure we don't think | 257 | * tables and release_pte() them. Make sure we don't think |
227 | * this one is valid! */ | 258 | * this one is valid! |
259 | */ | ||
228 | flags = 0; | 260 | flags = 0; |
229 | } | 261 | } |
230 | /* Now we assemble our shadow PTE from the page number and flags. */ | 262 | /* Now we assemble our shadow PTE from the page number and flags. */ |
@@ -234,8 +266,10 @@ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) | |||
234 | /*H:460 And to complete the chain, release_pte() looks like this: */ | 266 | /*H:460 And to complete the chain, release_pte() looks like this: */ |
235 | static void release_pte(pte_t pte) | 267 | static void release_pte(pte_t pte) |
236 | { | 268 | { |
237 | /* Remember that get_user_pages_fast() took a reference to the page, in | 269 | /* |
238 | * get_pfn()? We have to put it back now. */ | 270 | * Remember that get_user_pages_fast() took a reference to the page, in |
271 | * get_pfn()? We have to put it back now. | ||
272 | */ | ||
239 | if (pte_flags(pte) & _PAGE_PRESENT) | 273 | if (pte_flags(pte) & _PAGE_PRESENT) |
240 | put_page(pte_page(pte)); | 274 | put_page(pte_page(pte)); |
241 | } | 275 | } |
@@ -273,7 +307,8 @@ static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) | |||
273 | * and return to the Guest without it knowing. | 307 | * and return to the Guest without it knowing. |
274 | * | 308 | * |
275 | * If we fixed up the fault (ie. we mapped the address), this routine returns | 309 | * If we fixed up the fault (ie. we mapped the address), this routine returns |
276 | * true. Otherwise, it was a real fault and we need to tell the Guest. */ | 310 | * true. Otherwise, it was a real fault and we need to tell the Guest. |
311 | */ | ||
277 | bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | 312 | bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) |
278 | { | 313 | { |
279 | pgd_t gpgd; | 314 | pgd_t gpgd; |
@@ -282,6 +317,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
282 | pte_t gpte; | 317 | pte_t gpte; |
283 | pte_t *spte; | 318 | pte_t *spte; |
284 | 319 | ||
320 | /* Mid level for PAE. */ | ||
285 | #ifdef CONFIG_X86_PAE | 321 | #ifdef CONFIG_X86_PAE |
286 | pmd_t *spmd; | 322 | pmd_t *spmd; |
287 | pmd_t gpmd; | 323 | pmd_t gpmd; |
@@ -298,22 +334,26 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
298 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { | 334 | if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { |
299 | /* No shadow entry: allocate a new shadow PTE page. */ | 335 | /* No shadow entry: allocate a new shadow PTE page. */ |
300 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); | 336 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); |
301 | /* This is not really the Guest's fault, but killing it is | 337 | /* |
302 | * simple for this corner case. */ | 338 | * This is not really the Guest's fault, but killing it is |
339 | * simple for this corner case. | ||
340 | */ | ||
303 | if (!ptepage) { | 341 | if (!ptepage) { |
304 | kill_guest(cpu, "out of memory allocating pte page"); | 342 | kill_guest(cpu, "out of memory allocating pte page"); |
305 | return false; | 343 | return false; |
306 | } | 344 | } |
307 | /* We check that the Guest pgd is OK. */ | 345 | /* We check that the Guest pgd is OK. */ |
308 | check_gpgd(cpu, gpgd); | 346 | check_gpgd(cpu, gpgd); |
309 | /* And we copy the flags to the shadow PGD entry. The page | 347 | /* |
310 | * number in the shadow PGD is the page we just allocated. */ | 348 | * And we copy the flags to the shadow PGD entry. The page |
349 | * number in the shadow PGD is the page we just allocated. | ||
350 | */ | ||
311 | set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd))); | 351 | set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd))); |
312 | } | 352 | } |
313 | 353 | ||
314 | #ifdef CONFIG_X86_PAE | 354 | #ifdef CONFIG_X86_PAE |
315 | gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); | 355 | gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); |
316 | /* middle level not present? We can't map it in. */ | 356 | /* Middle level not present? We can't map it in. */ |
317 | if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) | 357 | if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) |
318 | return false; | 358 | return false; |
319 | 359 | ||
@@ -324,8 +364,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
324 | /* No shadow entry: allocate a new shadow PTE page. */ | 364 | /* No shadow entry: allocate a new shadow PTE page. */ |
325 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); | 365 | unsigned long ptepage = get_zeroed_page(GFP_KERNEL); |
326 | 366 | ||
327 | /* This is not really the Guest's fault, but killing it is | 367 | /* |
328 | * simple for this corner case. */ | 368 | * This is not really the Guest's fault, but killing it is |
369 | * simple for this corner case. | ||
370 | */ | ||
329 | if (!ptepage) { | 371 | if (!ptepage) { |
330 | kill_guest(cpu, "out of memory allocating pte page"); | 372 | kill_guest(cpu, "out of memory allocating pte page"); |
331 | return false; | 373 | return false; |
@@ -334,27 +376,37 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
334 | /* We check that the Guest pmd is OK. */ | 376 | /* We check that the Guest pmd is OK. */ |
335 | check_gpmd(cpu, gpmd); | 377 | check_gpmd(cpu, gpmd); |
336 | 378 | ||
337 | /* And we copy the flags to the shadow PMD entry. The page | 379 | /* |
338 | * number in the shadow PMD is the page we just allocated. */ | 380 | * And we copy the flags to the shadow PMD entry. The page |
381 | * number in the shadow PMD is the page we just allocated. | ||
382 | */ | ||
339 | native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); | 383 | native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); |
340 | } | 384 | } |
341 | 385 | ||
342 | /* OK, now we look at the lower level in the Guest page table: keep its | 386 | /* |
343 | * address, because we might update it later. */ | 387 | * OK, now we look at the lower level in the Guest page table: keep its |
388 | * address, because we might update it later. | ||
389 | */ | ||
344 | gpte_ptr = gpte_addr(cpu, gpmd, vaddr); | 390 | gpte_ptr = gpte_addr(cpu, gpmd, vaddr); |
345 | #else | 391 | #else |
346 | /* OK, now we look at the lower level in the Guest page table: keep its | 392 | /* |
347 | * address, because we might update it later. */ | 393 | * OK, now we look at the lower level in the Guest page table: keep its |
394 | * address, because we might update it later. | ||
395 | */ | ||
348 | gpte_ptr = gpte_addr(cpu, gpgd, vaddr); | 396 | gpte_ptr = gpte_addr(cpu, gpgd, vaddr); |
349 | #endif | 397 | #endif |
398 | |||
399 | /* Read the actual PTE value. */ | ||
350 | gpte = lgread(cpu, gpte_ptr, pte_t); | 400 | gpte = lgread(cpu, gpte_ptr, pte_t); |
351 | 401 | ||
352 | /* If this page isn't in the Guest page tables, we can't page it in. */ | 402 | /* If this page isn't in the Guest page tables, we can't page it in. */ |
353 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) | 403 | if (!(pte_flags(gpte) & _PAGE_PRESENT)) |
354 | return false; | 404 | return false; |
355 | 405 | ||
356 | /* Check they're not trying to write to a page the Guest wants | 406 | /* |
357 | * read-only (bit 2 of errcode == write). */ | 407 | * Check they're not trying to write to a page the Guest wants |
408 | * read-only (bit 2 of errcode == write). | ||
409 | */ | ||
358 | if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW)) | 410 | if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW)) |
359 | return false; | 411 | return false; |
360 | 412 | ||
@@ -362,8 +414,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
362 | if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) | 414 | if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) |
363 | return false; | 415 | return false; |
364 | 416 | ||
365 | /* Check that the Guest PTE flags are OK, and the page number is below | 417 | /* |
366 | * the pfn_limit (ie. not mapping the Launcher binary). */ | 418 | * Check that the Guest PTE flags are OK, and the page number is below |
419 | * the pfn_limit (ie. not mapping the Launcher binary). | ||
420 | */ | ||
367 | check_gpte(cpu, gpte); | 421 | check_gpte(cpu, gpte); |
368 | 422 | ||
369 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ | 423 | /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ |
@@ -373,29 +427,40 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
373 | 427 | ||
374 | /* Get the pointer to the shadow PTE entry we're going to set. */ | 428 | /* Get the pointer to the shadow PTE entry we're going to set. */ |
375 | spte = spte_addr(cpu, *spgd, vaddr); | 429 | spte = spte_addr(cpu, *spgd, vaddr); |
376 | /* If there was a valid shadow PTE entry here before, we release it. | 430 | |
377 | * This can happen with a write to a previously read-only entry. */ | 431 | /* |
432 | * If there was a valid shadow PTE entry here before, we release it. | ||
433 | * This can happen with a write to a previously read-only entry. | ||
434 | */ | ||
378 | release_pte(*spte); | 435 | release_pte(*spte); |
379 | 436 | ||
380 | /* If this is a write, we insist that the Guest page is writable (the | 437 | /* |
381 | * final arg to gpte_to_spte()). */ | 438 | * If this is a write, we insist that the Guest page is writable (the |
439 | * final arg to gpte_to_spte()). | ||
440 | */ | ||
382 | if (pte_dirty(gpte)) | 441 | if (pte_dirty(gpte)) |
383 | *spte = gpte_to_spte(cpu, gpte, 1); | 442 | *spte = gpte_to_spte(cpu, gpte, 1); |
384 | else | 443 | else |
385 | /* If this is a read, don't set the "writable" bit in the page | 444 | /* |
445 | * If this is a read, don't set the "writable" bit in the page | ||
386 | * table entry, even if the Guest says it's writable. That way | 446 | * table entry, even if the Guest says it's writable. That way |
387 | * we will come back here when a write does actually occur, so | 447 | * we will come back here when a write does actually occur, so |
388 | * we can update the Guest's _PAGE_DIRTY flag. */ | 448 | * we can update the Guest's _PAGE_DIRTY flag. |
449 | */ | ||
389 | native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0)); | 450 | native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0)); |
390 | 451 | ||
391 | /* Finally, we write the Guest PTE entry back: we've set the | 452 | /* |
392 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ | 453 | * Finally, we write the Guest PTE entry back: we've set the |
454 | * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. | ||
455 | */ | ||
393 | lgwrite(cpu, gpte_ptr, pte_t, gpte); | 456 | lgwrite(cpu, gpte_ptr, pte_t, gpte); |
394 | 457 | ||
395 | /* The fault is fixed, the page table is populated, the mapping | 458 | /* |
459 | * The fault is fixed, the page table is populated, the mapping | ||
396 | * manipulated, the result returned and the code complete. A small | 460 | * manipulated, the result returned and the code complete. A small |
397 | * delay and a trace of alliteration are the only indications the Guest | 461 | * delay and a trace of alliteration are the only indications the Guest |
398 | * has that a page fault occurred at all. */ | 462 | * has that a page fault occurred at all. |
463 | */ | ||
399 | return true; | 464 | return true; |
400 | } | 465 | } |
401 | 466 | ||
@@ -408,7 +473,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) | |||
408 | * mapped, so it's overkill. | 473 | * mapped, so it's overkill. |
409 | * | 474 | * |
410 | * This is a quick version which answers the question: is this virtual address | 475 | * This is a quick version which answers the question: is this virtual address |
411 | * mapped by the shadow page tables, and is it writable? */ | 476 | * mapped by the shadow page tables, and is it writable? |
477 | */ | ||
412 | static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) | 478 | static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) |
413 | { | 479 | { |
414 | pgd_t *spgd; | 480 | pgd_t *spgd; |
@@ -428,21 +494,26 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) | |||
428 | return false; | 494 | return false; |
429 | #endif | 495 | #endif |
430 | 496 | ||
431 | /* Check the flags on the pte entry itself: it must be present and | 497 | /* |
432 | * writable. */ | 498 | * Check the flags on the pte entry itself: it must be present and |
499 | * writable. | ||
500 | */ | ||
433 | flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr))); | 501 | flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr))); |
434 | 502 | ||
435 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); | 503 | return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); |
436 | } | 504 | } |
437 | 505 | ||
438 | /* So, when pin_stack_pages() asks us to pin a page, we check if it's already | 506 | /* |
507 | * So, when pin_stack_pages() asks us to pin a page, we check if it's already | ||
439 | * in the page tables, and if not, we call demand_page() with error code 2 | 508 | * in the page tables, and if not, we call demand_page() with error code 2 |
440 | * (meaning "write"). */ | 509 | * (meaning "write"). |
510 | */ | ||
441 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) | 511 | void pin_page(struct lg_cpu *cpu, unsigned long vaddr) |
442 | { | 512 | { |
443 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) | 513 | if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) |
444 | kill_guest(cpu, "bad stack page %#lx", vaddr); | 514 | kill_guest(cpu, "bad stack page %#lx", vaddr); |
445 | } | 515 | } |
516 | /*:*/ | ||
446 | 517 | ||
447 | #ifdef CONFIG_X86_PAE | 518 | #ifdef CONFIG_X86_PAE |
448 | static void release_pmd(pmd_t *spmd) | 519 | static void release_pmd(pmd_t *spmd) |
@@ -479,15 +550,21 @@ static void release_pgd(pgd_t *spgd) | |||
479 | } | 550 | } |
480 | 551 | ||
481 | #else /* !CONFIG_X86_PAE */ | 552 | #else /* !CONFIG_X86_PAE */ |
482 | /*H:450 If we chase down the release_pgd() code, it looks like this: */ | 553 | /*H:450 |
554 | * If we chase down the release_pgd() code, the non-PAE version looks like | ||
555 | * this. The PAE version is almost identical, but instead of calling | ||
556 | * release_pte it calls release_pmd(), which looks much like this. | ||
557 | */ | ||
483 | static void release_pgd(pgd_t *spgd) | 558 | static void release_pgd(pgd_t *spgd) |
484 | { | 559 | { |
485 | /* If the entry's not present, there's nothing to release. */ | 560 | /* If the entry's not present, there's nothing to release. */ |
486 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { | 561 | if (pgd_flags(*spgd) & _PAGE_PRESENT) { |
487 | unsigned int i; | 562 | unsigned int i; |
488 | /* Converting the pfn to find the actual PTE page is easy: turn | 563 | /* |
564 | * Converting the pfn to find the actual PTE page is easy: turn | ||
489 | * the page number into a physical address, then convert to a | 565 | * the page number into a physical address, then convert to a |
490 | * virtual address (easy for kernel pages like this one). */ | 566 | * virtual address (easy for kernel pages like this one). |
567 | */ | ||
491 | pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); | 568 | pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); |
492 | /* For each entry in the page, we might need to release it. */ | 569 | /* For each entry in the page, we might need to release it. */ |
493 | for (i = 0; i < PTRS_PER_PTE; i++) | 570 | for (i = 0; i < PTRS_PER_PTE; i++) |
@@ -499,9 +576,12 @@ static void release_pgd(pgd_t *spgd) | |||
499 | } | 576 | } |
500 | } | 577 | } |
501 | #endif | 578 | #endif |
502 | /*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings() | 579 | |
580 | /*H:445 | ||
581 | * We saw flush_user_mappings() twice: once from the flush_user_mappings() | ||
503 | * hypercall and once in new_pgdir() when we re-used a top-level pgdir page. | 582 | * hypercall and once in new_pgdir() when we re-used a top-level pgdir page. |
504 | * It simply releases every PTE page from 0 up to the Guest's kernel address. */ | 583 | * It simply releases every PTE page from 0 up to the Guest's kernel address. |
584 | */ | ||
505 | static void flush_user_mappings(struct lguest *lg, int idx) | 585 | static void flush_user_mappings(struct lguest *lg, int idx) |
506 | { | 586 | { |
507 | unsigned int i; | 587 | unsigned int i; |
@@ -510,10 +590,12 @@ static void flush_user_mappings(struct lguest *lg, int idx) | |||
510 | release_pgd(lg->pgdirs[idx].pgdir + i); | 590 | release_pgd(lg->pgdirs[idx].pgdir + i); |
511 | } | 591 | } |
512 | 592 | ||
513 | /*H:440 (v) Flushing (throwing away) page tables, | 593 | /*H:440 |
594 | * (v) Flushing (throwing away) page tables, | ||
514 | * | 595 | * |
515 | * The Guest has a hypercall to throw away the page tables: it's used when a | 596 | * The Guest has a hypercall to throw away the page tables: it's used when a |
516 | * large number of mappings have been changed. */ | 597 | * large number of mappings have been changed. |
598 | */ | ||
517 | void guest_pagetable_flush_user(struct lg_cpu *cpu) | 599 | void guest_pagetable_flush_user(struct lg_cpu *cpu) |
518 | { | 600 | { |
519 | /* Drop the userspace part of the current page table. */ | 601 | /* Drop the userspace part of the current page table. */ |
@@ -551,9 +633,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) | |||
551 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); | 633 | return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); |
552 | } | 634 | } |
553 | 635 | ||
554 | /* We keep several page tables. This is a simple routine to find the page | 636 | /* |
637 | * We keep several page tables. This is a simple routine to find the page | ||
555 | * table (if any) corresponding to this top-level address the Guest has given | 638 | * table (if any) corresponding to this top-level address the Guest has given |
556 | * us. */ | 639 | * us. |
640 | */ | ||
557 | static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | 641 | static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) |
558 | { | 642 | { |
559 | unsigned int i; | 643 | unsigned int i; |
@@ -563,9 +647,11 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) | |||
563 | return i; | 647 | return i; |
564 | } | 648 | } |
565 | 649 | ||
566 | /*H:435 And this is us, creating the new page directory. If we really do | 650 | /*H:435 |
651 | * And this is us, creating the new page directory. If we really do | ||
567 | * allocate a new one (and so the kernel parts are not there), we set | 652 | * allocate a new one (and so the kernel parts are not there), we set |
568 | * blank_pgdir. */ | 653 | * blank_pgdir. |
654 | */ | ||
569 | static unsigned int new_pgdir(struct lg_cpu *cpu, | 655 | static unsigned int new_pgdir(struct lg_cpu *cpu, |
570 | unsigned long gpgdir, | 656 | unsigned long gpgdir, |
571 | int *blank_pgdir) | 657 | int *blank_pgdir) |
@@ -575,8 +661,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
575 | pmd_t *pmd_table; | 661 | pmd_t *pmd_table; |
576 | #endif | 662 | #endif |
577 | 663 | ||
578 | /* We pick one entry at random to throw out. Choosing the Least | 664 | /* |
579 | * Recently Used might be better, but this is easy. */ | 665 | * We pick one entry at random to throw out. Choosing the Least |
666 | * Recently Used might be better, but this is easy. | ||
667 | */ | ||
580 | next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); | 668 | next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); |
581 | /* If it's never been allocated at all before, try now. */ | 669 | /* If it's never been allocated at all before, try now. */ |
582 | if (!cpu->lg->pgdirs[next].pgdir) { | 670 | if (!cpu->lg->pgdirs[next].pgdir) { |
@@ -587,8 +675,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
587 | next = cpu->cpu_pgd; | 675 | next = cpu->cpu_pgd; |
588 | else { | 676 | else { |
589 | #ifdef CONFIG_X86_PAE | 677 | #ifdef CONFIG_X86_PAE |
590 | /* In PAE mode, allocate a pmd page and populate the | 678 | /* |
591 | * last pgd entry. */ | 679 | * In PAE mode, allocate a pmd page and populate the |
680 | * last pgd entry. | ||
681 | */ | ||
592 | pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); | 682 | pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); |
593 | if (!pmd_table) { | 683 | if (!pmd_table) { |
594 | free_page((long)cpu->lg->pgdirs[next].pgdir); | 684 | free_page((long)cpu->lg->pgdirs[next].pgdir); |
@@ -598,8 +688,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
598 | set_pgd(cpu->lg->pgdirs[next].pgdir + | 688 | set_pgd(cpu->lg->pgdirs[next].pgdir + |
599 | SWITCHER_PGD_INDEX, | 689 | SWITCHER_PGD_INDEX, |
600 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 690 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
601 | /* This is a blank page, so there are no kernel | 691 | /* |
602 | * mappings: caller must map the stack! */ | 692 | * This is a blank page, so there are no kernel |
693 | * mappings: caller must map the stack! | ||
694 | */ | ||
603 | *blank_pgdir = 1; | 695 | *blank_pgdir = 1; |
604 | } | 696 | } |
605 | #else | 697 | #else |
@@ -615,19 +707,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, | |||
615 | return next; | 707 | return next; |
616 | } | 708 | } |
617 | 709 | ||
618 | /*H:430 (iv) Switching page tables | 710 | /*H:430 |
711 | * (iv) Switching page tables | ||
619 | * | 712 | * |
620 | * Now we've seen all the page table setting and manipulation, let's see | 713 | * Now we've seen all the page table setting and manipulation, let's see |
621 | * what happens when the Guest changes page tables (ie. changes the top-level | 714 | * what happens when the Guest changes page tables (ie. changes the top-level |
622 | * pgdir). This occurs on almost every context switch. */ | 715 | * pgdir). This occurs on almost every context switch. |
716 | */ | ||
623 | void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) | 717 | void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) |
624 | { | 718 | { |
625 | int newpgdir, repin = 0; | 719 | int newpgdir, repin = 0; |
626 | 720 | ||
627 | /* Look to see if we have this one already. */ | 721 | /* Look to see if we have this one already. */ |
628 | newpgdir = find_pgdir(cpu->lg, pgtable); | 722 | newpgdir = find_pgdir(cpu->lg, pgtable); |
629 | /* If not, we allocate or mug an existing one: if it's a fresh one, | 723 | /* |
630 | * repin gets set to 1. */ | 724 | * If not, we allocate or mug an existing one: if it's a fresh one, |
725 | * repin gets set to 1. | ||
726 | */ | ||
631 | if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) | 727 | if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) |
632 | newpgdir = new_pgdir(cpu, pgtable, &repin); | 728 | newpgdir = new_pgdir(cpu, pgtable, &repin); |
633 | /* Change the current pgd index to the new one. */ | 729 | /* Change the current pgd index to the new one. */ |
@@ -637,9 +733,11 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) | |||
637 | pin_stack_pages(cpu); | 733 | pin_stack_pages(cpu); |
638 | } | 734 | } |
639 | 735 | ||
640 | /*H:470 Finally, a routine which throws away everything: all PGD entries in all | 736 | /*H:470 |
737 | * Finally, a routine which throws away everything: all PGD entries in all | ||
641 | * the shadow page tables, including the Guest's kernel mappings. This is used | 738 | * the shadow page tables, including the Guest's kernel mappings. This is used |
642 | * when we destroy the Guest. */ | 739 | * when we destroy the Guest. |
740 | */ | ||
643 | static void release_all_pagetables(struct lguest *lg) | 741 | static void release_all_pagetables(struct lguest *lg) |
644 | { | 742 | { |
645 | unsigned int i, j; | 743 | unsigned int i, j; |
@@ -656,8 +754,10 @@ static void release_all_pagetables(struct lguest *lg) | |||
656 | spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; | 754 | spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; |
657 | pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); | 755 | pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); |
658 | 756 | ||
659 | /* And release the pmd entries of that pmd page, | 757 | /* |
660 | * except for the switcher pmd. */ | 758 | * And release the pmd entries of that pmd page, |
759 | * except for the switcher pmd. | ||
760 | */ | ||
661 | for (k = 0; k < SWITCHER_PMD_INDEX; k++) | 761 | for (k = 0; k < SWITCHER_PMD_INDEX; k++) |
662 | release_pmd(&pmdpage[k]); | 762 | release_pmd(&pmdpage[k]); |
663 | #endif | 763 | #endif |
@@ -667,10 +767,12 @@ static void release_all_pagetables(struct lguest *lg) | |||
667 | } | 767 | } |
668 | } | 768 | } |
669 | 769 | ||
670 | /* We also throw away everything when a Guest tells us it's changed a kernel | 770 | /* |
771 | * We also throw away everything when a Guest tells us it's changed a kernel | ||
671 | * mapping. Since kernel mappings are in every page table, it's easiest to | 772 | * mapping. Since kernel mappings are in every page table, it's easiest to |
672 | * throw them all away. This traps the Guest in amber for a while as | 773 | * throw them all away. This traps the Guest in amber for a while as |
673 | * everything faults back in, but it's rare. */ | 774 | * everything faults back in, but it's rare. |
775 | */ | ||
674 | void guest_pagetable_clear_all(struct lg_cpu *cpu) | 776 | void guest_pagetable_clear_all(struct lg_cpu *cpu) |
675 | { | 777 | { |
676 | release_all_pagetables(cpu->lg); | 778 | release_all_pagetables(cpu->lg); |
@@ -678,15 +780,19 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) | |||
678 | pin_stack_pages(cpu); | 780 | pin_stack_pages(cpu); |
679 | } | 781 | } |
680 | /*:*/ | 782 | /*:*/ |
681 | /*M:009 Since we throw away all mappings when a kernel mapping changes, our | 783 | |
784 | /*M:009 | ||
785 | * Since we throw away all mappings when a kernel mapping changes, our | ||
682 | * performance sucks for guests using highmem. In fact, a guest with | 786 | * performance sucks for guests using highmem. In fact, a guest with |
683 | * PAGE_OFFSET 0xc0000000 (the default) and more than about 700MB of RAM is | 787 | * PAGE_OFFSET 0xc0000000 (the default) and more than about 700MB of RAM is |
684 | * usually slower than a Guest with less memory. | 788 | * usually slower than a Guest with less memory. |
685 | * | 789 | * |
686 | * This, of course, cannot be fixed. It would take some kind of... well, I | 790 | * This, of course, cannot be fixed. It would take some kind of... well, I |
687 | * don't know, but the term "puissant code-fu" comes to mind. :*/ | 791 | * don't know, but the term "puissant code-fu" comes to mind. |
792 | :*/ | ||
688 | 793 | ||
689 | /*H:420 This is the routine which actually sets the page table entry for then | 794 | /*H:420 |
795 | * This is the routine which actually sets the page table entry for then | ||
690 | * "idx"'th shadow page table. | 796 | * "idx"'th shadow page table. |
691 | * | 797 | * |
692 | * Normally, we can just throw out the old entry and replace it with 0: if they | 798 | * Normally, we can just throw out the old entry and replace it with 0: if they |
@@ -715,31 +821,36 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, | |||
715 | spmd = spmd_addr(cpu, *spgd, vaddr); | 821 | spmd = spmd_addr(cpu, *spgd, vaddr); |
716 | if (pmd_flags(*spmd) & _PAGE_PRESENT) { | 822 | if (pmd_flags(*spmd) & _PAGE_PRESENT) { |
717 | #endif | 823 | #endif |
718 | /* Otherwise, we start by releasing | 824 | /* Otherwise, start by releasing the existing entry. */ |
719 | * the existing entry. */ | ||
720 | pte_t *spte = spte_addr(cpu, *spgd, vaddr); | 825 | pte_t *spte = spte_addr(cpu, *spgd, vaddr); |
721 | release_pte(*spte); | 826 | release_pte(*spte); |
722 | 827 | ||
723 | /* If they're setting this entry as dirty or accessed, | 828 | /* |
724 | * we might as well put that entry they've given us | 829 | * If they're setting this entry as dirty or accessed, |
725 | * in now. This shaves 10% off a | 830 | * we might as well put that entry they've given us in |
726 | * copy-on-write micro-benchmark. */ | 831 | * now. This shaves 10% off a copy-on-write |
832 | * micro-benchmark. | ||
833 | */ | ||
727 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { | 834 | if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { |
728 | check_gpte(cpu, gpte); | 835 | check_gpte(cpu, gpte); |
729 | native_set_pte(spte, | 836 | native_set_pte(spte, |
730 | gpte_to_spte(cpu, gpte, | 837 | gpte_to_spte(cpu, gpte, |
731 | pte_flags(gpte) & _PAGE_DIRTY)); | 838 | pte_flags(gpte) & _PAGE_DIRTY)); |
732 | } else | 839 | } else { |
733 | /* Otherwise kill it and we can demand_page() | 840 | /* |
734 | * it in later. */ | 841 | * Otherwise kill it and we can demand_page() |
842 | * it in later. | ||
843 | */ | ||
735 | native_set_pte(spte, __pte(0)); | 844 | native_set_pte(spte, __pte(0)); |
845 | } | ||
736 | #ifdef CONFIG_X86_PAE | 846 | #ifdef CONFIG_X86_PAE |
737 | } | 847 | } |
738 | #endif | 848 | #endif |
739 | } | 849 | } |
740 | } | 850 | } |
741 | 851 | ||
742 | /*H:410 Updating a PTE entry is a little trickier. | 852 | /*H:410 |
853 | * Updating a PTE entry is a little trickier. | ||
743 | * | 854 | * |
744 | * We keep track of several different page tables (the Guest uses one for each | 855 | * We keep track of several different page tables (the Guest uses one for each |
745 | * process, so it makes sense to cache at least a few). Each of these have | 856 | * process, so it makes sense to cache at least a few). Each of these have |
@@ -748,12 +859,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, | |||
748 | * all the page tables, not just the current one. This is rare. | 859 | * all the page tables, not just the current one. This is rare. |
749 | * | 860 | * |
750 | * The benefit is that when we have to track a new page table, we can keep all | 861 | * The benefit is that when we have to track a new page table, we can keep all |
751 | * the kernel mappings. This speeds up context switch immensely. */ | 862 | * the kernel mappings. This speeds up context switch immensely. |
863 | */ | ||
752 | void guest_set_pte(struct lg_cpu *cpu, | 864 | void guest_set_pte(struct lg_cpu *cpu, |
753 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) | 865 | unsigned long gpgdir, unsigned long vaddr, pte_t gpte) |
754 | { | 866 | { |
755 | /* Kernel mappings must be changed on all top levels. Slow, but doesn't | 867 | /* |
756 | * happen often. */ | 868 | * Kernel mappings must be changed on all top levels. Slow, but doesn't |
869 | * happen often. | ||
870 | */ | ||
757 | if (vaddr >= cpu->lg->kernel_address) { | 871 | if (vaddr >= cpu->lg->kernel_address) { |
758 | unsigned int i; | 872 | unsigned int i; |
759 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) | 873 | for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) |
@@ -795,19 +909,25 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) | |||
795 | /* ... throw it away. */ | 909 | /* ... throw it away. */ |
796 | release_pgd(lg->pgdirs[pgdir].pgdir + idx); | 910 | release_pgd(lg->pgdirs[pgdir].pgdir + idx); |
797 | } | 911 | } |
912 | |||
798 | #ifdef CONFIG_X86_PAE | 913 | #ifdef CONFIG_X86_PAE |
914 | /* For setting a mid-level, we just throw everything away. It's easy. */ | ||
799 | void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) | 915 | void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) |
800 | { | 916 | { |
801 | guest_pagetable_clear_all(&lg->cpus[0]); | 917 | guest_pagetable_clear_all(&lg->cpus[0]); |
802 | } | 918 | } |
803 | #endif | 919 | #endif |
804 | 920 | ||
805 | /* Once we know how much memory we have we can construct simple identity | 921 | /*H:505 |
806 | * (which set virtual == physical) and linear mappings | 922 | * To get through boot, we construct simple identity page mappings (which |
807 | * which will get the Guest far enough into the boot to create its own. | 923 | * set virtual == physical) and linear mappings which will get the Guest far |
924 | * enough into the boot to create its own. The linear mapping means we | ||
925 | * simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET, | ||
926 | * as you'll see. | ||
808 | * | 927 | * |
809 | * We lay them out of the way, just below the initrd (which is why we need to | 928 | * We lay them out of the way, just below the initrd (which is why we need to |
810 | * know its size here). */ | 929 | * know its size here). |
930 | */ | ||
811 | static unsigned long setup_pagetables(struct lguest *lg, | 931 | static unsigned long setup_pagetables(struct lguest *lg, |
812 | unsigned long mem, | 932 | unsigned long mem, |
813 | unsigned long initrd_size) | 933 | unsigned long initrd_size) |
@@ -825,8 +945,10 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
825 | unsigned int phys_linear; | 945 | unsigned int phys_linear; |
826 | #endif | 946 | #endif |
827 | 947 | ||
828 | /* We have mapped_pages frames to map, so we need | 948 | /* |
829 | * linear_pages page tables to map them. */ | 949 | * We have mapped_pages frames to map, so we need linear_pages page |
950 | * tables to map them. | ||
951 | */ | ||
830 | mapped_pages = mem / PAGE_SIZE; | 952 | mapped_pages = mem / PAGE_SIZE; |
831 | linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; | 953 | linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; |
832 | 954 | ||
@@ -837,10 +959,16 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
837 | linear = (void *)pgdir - linear_pages * PAGE_SIZE; | 959 | linear = (void *)pgdir - linear_pages * PAGE_SIZE; |
838 | 960 | ||
839 | #ifdef CONFIG_X86_PAE | 961 | #ifdef CONFIG_X86_PAE |
962 | /* | ||
963 | * And the single mid page goes below that. We only use one, but | ||
964 | * that's enough to map 1G, which definitely gets us through boot. | ||
965 | */ | ||
840 | pmds = (void *)linear - PAGE_SIZE; | 966 | pmds = (void *)linear - PAGE_SIZE; |
841 | #endif | 967 | #endif |
842 | /* Linear mapping is easy: put every page's address into the | 968 | /* |
843 | * mapping in order. */ | 969 | * Linear mapping is easy: put every page's address into the |
970 | * mapping in order. | ||
971 | */ | ||
844 | for (i = 0; i < mapped_pages; i++) { | 972 | for (i = 0; i < mapped_pages; i++) { |
845 | pte_t pte; | 973 | pte_t pte; |
846 | pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); | 974 | pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); |
@@ -848,11 +976,14 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
848 | return -EFAULT; | 976 | return -EFAULT; |
849 | } | 977 | } |
850 | 978 | ||
851 | /* The top level points to the linear page table pages above. | ||
852 | * We setup the identity and linear mappings here. */ | ||
853 | #ifdef CONFIG_X86_PAE | 979 | #ifdef CONFIG_X86_PAE |
980 | /* | ||
981 | * Make the Guest PMD entries point to the corresponding place in the | ||
982 | * linear mapping (up to one page worth of PMD). | ||
983 | */ | ||
854 | for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; | 984 | for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; |
855 | i += PTRS_PER_PTE, j++) { | 985 | i += PTRS_PER_PTE, j++) { |
986 | /* FIXME: native_set_pmd is overkill here. */ | ||
856 | native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i) | 987 | native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i) |
857 | - mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); | 988 | - mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); |
858 | 989 | ||
@@ -860,18 +991,36 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
860 | return -EFAULT; | 991 | return -EFAULT; |
861 | } | 992 | } |
862 | 993 | ||
994 | /* One PGD entry, pointing to that PMD page. */ | ||
863 | set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT)); | 995 | set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT)); |
996 | /* Copy it in as the first PGD entry (ie. addresses 0-1G). */ | ||
864 | if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0) | 997 | if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0) |
865 | return -EFAULT; | 998 | return -EFAULT; |
999 | /* | ||
1000 | * And the third PGD entry (ie. addresses 3G-4G). | ||
1001 | * | ||
1002 | * FIXME: This assumes that PAGE_OFFSET for the Guest is 0xC0000000. | ||
1003 | */ | ||
866 | if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0) | 1004 | if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0) |
867 | return -EFAULT; | 1005 | return -EFAULT; |
868 | #else | 1006 | #else |
1007 | /* | ||
1008 | * The top level points to the linear page table pages above. | ||
1009 | * We setup the identity and linear mappings here. | ||
1010 | */ | ||
869 | phys_linear = (unsigned long)linear - mem_base; | 1011 | phys_linear = (unsigned long)linear - mem_base; |
870 | for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { | 1012 | for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { |
871 | pgd_t pgd; | 1013 | pgd_t pgd; |
1014 | /* | ||
1015 | * Create a PGD entry which points to the right part of the | ||
1016 | * linear PTE pages. | ||
1017 | */ | ||
872 | pgd = __pgd((phys_linear + i * sizeof(pte_t)) | | 1018 | pgd = __pgd((phys_linear + i * sizeof(pte_t)) | |
873 | (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); | 1019 | (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); |
874 | 1020 | ||
1021 | /* | ||
1022 | * Copy it into the PGD page at 0 and PAGE_OFFSET. | ||
1023 | */ | ||
875 | if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) | 1024 | if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) |
876 | || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) | 1025 | || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) |
877 | + i / PTRS_PER_PTE], | 1026 | + i / PTRS_PER_PTE], |
@@ -880,15 +1029,19 @@ static unsigned long setup_pagetables(struct lguest *lg, | |||
880 | } | 1029 | } |
881 | #endif | 1030 | #endif |
882 | 1031 | ||
883 | /* We return the top level (guest-physical) address: remember where | 1032 | /* |
884 | * this is. */ | 1033 | * We return the top level (guest-physical) address: we remember where |
1034 | * this is to write it into lguest_data when the Guest initializes. | ||
1035 | */ | ||
885 | return (unsigned long)pgdir - mem_base; | 1036 | return (unsigned long)pgdir - mem_base; |
886 | } | 1037 | } |
887 | 1038 | ||
888 | /*H:500 (vii) Setting up the page tables initially. | 1039 | /*H:500 |
1040 | * (vii) Setting up the page tables initially. | ||
889 | * | 1041 | * |
890 | * When a Guest is first created, the Launcher tells us where the toplevel of | 1042 | * When a Guest is first created, the Launcher tells us where the toplevel of |
891 | * its first page table is. We set some things up here: */ | 1043 | * its first page table is. We set some things up here: |
1044 | */ | ||
892 | int init_guest_pagetable(struct lguest *lg) | 1045 | int init_guest_pagetable(struct lguest *lg) |
893 | { | 1046 | { |
894 | u64 mem; | 1047 | u64 mem; |
@@ -898,21 +1051,27 @@ int init_guest_pagetable(struct lguest *lg) | |||
898 | pgd_t *pgd; | 1051 | pgd_t *pgd; |
899 | pmd_t *pmd_table; | 1052 | pmd_t *pmd_table; |
900 | #endif | 1053 | #endif |
901 | /* Get the Guest memory size and the ramdisk size from the boot header | 1054 | /* |
902 | * located at lg->mem_base (Guest address 0). */ | 1055 | * Get the Guest memory size and the ramdisk size from the boot header |
1056 | * located at lg->mem_base (Guest address 0). | ||
1057 | */ | ||
903 | if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) | 1058 | if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) |
904 | || get_user(initrd_size, &boot->hdr.ramdisk_size)) | 1059 | || get_user(initrd_size, &boot->hdr.ramdisk_size)) |
905 | return -EFAULT; | 1060 | return -EFAULT; |
906 | 1061 | ||
907 | /* We start on the first shadow page table, and give it a blank PGD | 1062 | /* |
908 | * page. */ | 1063 | * We start on the first shadow page table, and give it a blank PGD |
1064 | * page. | ||
1065 | */ | ||
909 | lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); | 1066 | lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); |
910 | if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) | 1067 | if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) |
911 | return lg->pgdirs[0].gpgdir; | 1068 | return lg->pgdirs[0].gpgdir; |
912 | lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); | 1069 | lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); |
913 | if (!lg->pgdirs[0].pgdir) | 1070 | if (!lg->pgdirs[0].pgdir) |
914 | return -ENOMEM; | 1071 | return -ENOMEM; |
1072 | |||
915 | #ifdef CONFIG_X86_PAE | 1073 | #ifdef CONFIG_X86_PAE |
1074 | /* For PAE, we also create the initial mid-level. */ | ||
916 | pgd = lg->pgdirs[0].pgdir; | 1075 | pgd = lg->pgdirs[0].pgdir; |
917 | pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL); | 1076 | pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL); |
918 | if (!pmd_table) | 1077 | if (!pmd_table) |
@@ -921,27 +1080,33 @@ int init_guest_pagetable(struct lguest *lg) | |||
921 | set_pgd(pgd + SWITCHER_PGD_INDEX, | 1080 | set_pgd(pgd + SWITCHER_PGD_INDEX, |
922 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 1081 | __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
923 | #endif | 1082 | #endif |
1083 | |||
1084 | /* This is the current page table. */ | ||
924 | lg->cpus[0].cpu_pgd = 0; | 1085 | lg->cpus[0].cpu_pgd = 0; |
925 | return 0; | 1086 | return 0; |
926 | } | 1087 | } |
927 | 1088 | ||
928 | /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ | 1089 | /*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ |
929 | void page_table_guest_data_init(struct lg_cpu *cpu) | 1090 | void page_table_guest_data_init(struct lg_cpu *cpu) |
930 | { | 1091 | { |
931 | /* We get the kernel address: above this is all kernel memory. */ | 1092 | /* We get the kernel address: above this is all kernel memory. */ |
932 | if (get_user(cpu->lg->kernel_address, | 1093 | if (get_user(cpu->lg->kernel_address, |
933 | &cpu->lg->lguest_data->kernel_address) | 1094 | &cpu->lg->lguest_data->kernel_address) |
934 | /* We tell the Guest that it can't use the top 2 or 4 MB | 1095 | /* |
935 | * of virtual addresses used by the Switcher. */ | 1096 | * We tell the Guest that it can't use the top 2 or 4 MB |
1097 | * of virtual addresses used by the Switcher. | ||
1098 | */ | ||
936 | || put_user(RESERVE_MEM * 1024 * 1024, | 1099 | || put_user(RESERVE_MEM * 1024 * 1024, |
937 | &cpu->lg->lguest_data->reserve_mem) | 1100 | &cpu->lg->lguest_data->reserve_mem) |
938 | || put_user(cpu->lg->pgdirs[0].gpgdir, | 1101 | || put_user(cpu->lg->pgdirs[0].gpgdir, |
939 | &cpu->lg->lguest_data->pgdir)) | 1102 | &cpu->lg->lguest_data->pgdir)) |
940 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); | 1103 | kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); |
941 | 1104 | ||
942 | /* In flush_user_mappings() we loop from 0 to | 1105 | /* |
1106 | * In flush_user_mappings() we loop from 0 to | ||
943 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the | 1107 | * "pgd_index(lg->kernel_address)". This assumes it won't hit the |
944 | * Switcher mappings, so check that now. */ | 1108 | * Switcher mappings, so check that now. |
1109 | */ | ||
945 | #ifdef CONFIG_X86_PAE | 1110 | #ifdef CONFIG_X86_PAE |
946 | if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX && | 1111 | if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX && |
947 | pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX) | 1112 | pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX) |
@@ -964,12 +1129,14 @@ void free_guest_pagetable(struct lguest *lg) | |||
964 | free_page((long)lg->pgdirs[i].pgdir); | 1129 | free_page((long)lg->pgdirs[i].pgdir); |
965 | } | 1130 | } |
966 | 1131 | ||
967 | /*H:480 (vi) Mapping the Switcher when the Guest is about to run. | 1132 | /*H:480 |
1133 | * (vi) Mapping the Switcher when the Guest is about to run. | ||
968 | * | 1134 | * |
969 | * The Switcher and the two pages for this CPU need to be visible in the | 1135 | * The Switcher and the two pages for this CPU need to be visible in the |
970 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages | 1136 | * Guest (and not the pages for other CPUs). We have the appropriate PTE pages |
971 | * for each CPU already set up, we just need to hook them in now we know which | 1137 | * for each CPU already set up, we just need to hook them in now we know which |
972 | * Guest is about to run on this CPU. */ | 1138 | * Guest is about to run on this CPU. |
1139 | */ | ||
973 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) | 1140 | void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) |
974 | { | 1141 | { |
975 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); | 1142 | pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); |
@@ -980,30 +1147,38 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) | |||
980 | pmd_t switcher_pmd; | 1147 | pmd_t switcher_pmd; |
981 | pmd_t *pmd_table; | 1148 | pmd_t *pmd_table; |
982 | 1149 | ||
1150 | /* FIXME: native_set_pmd is overkill here. */ | ||
983 | native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >> | 1151 | native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >> |
984 | PAGE_SHIFT, PAGE_KERNEL_EXEC)); | 1152 | PAGE_SHIFT, PAGE_KERNEL_EXEC)); |
985 | 1153 | ||
1154 | /* Figure out where the pmd page is, by reading the PGD, and converting | ||
1155 | * it to a virtual address. */ | ||
986 | pmd_table = __va(pgd_pfn(cpu->lg-> | 1156 | pmd_table = __va(pgd_pfn(cpu->lg-> |
987 | pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) | 1157 | pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) |
988 | << PAGE_SHIFT); | 1158 | << PAGE_SHIFT); |
1159 | /* Now write it into the shadow page table. */ | ||
989 | native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); | 1160 | native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); |
990 | #else | 1161 | #else |
991 | pgd_t switcher_pgd; | 1162 | pgd_t switcher_pgd; |
992 | 1163 | ||
993 | /* Make the last PGD entry for this Guest point to the Switcher's PTE | 1164 | /* |
994 | * page for this CPU (with appropriate flags). */ | 1165 | * Make the last PGD entry for this Guest point to the Switcher's PTE |
1166 | * page for this CPU (with appropriate flags). | ||
1167 | */ | ||
995 | switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); | 1168 | switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); |
996 | 1169 | ||
997 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; | 1170 | cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; |
998 | 1171 | ||
999 | #endif | 1172 | #endif |
1000 | /* We also change the Switcher PTE page. When we're running the Guest, | 1173 | /* |
1174 | * We also change the Switcher PTE page. When we're running the Guest, | ||
1001 | * we want the Guest's "regs" page to appear where the first Switcher | 1175 | * we want the Guest's "regs" page to appear where the first Switcher |
1002 | * page for this CPU is. This is an optimization: when the Switcher | 1176 | * page for this CPU is. This is an optimization: when the Switcher |
1003 | * saves the Guest registers, it saves them into the first page of this | 1177 | * saves the Guest registers, it saves them into the first page of this |
1004 | * CPU's "struct lguest_pages": if we make sure the Guest's register | 1178 | * CPU's "struct lguest_pages": if we make sure the Guest's register |
1005 | * page is already mapped there, we don't have to copy them out | 1179 | * page is already mapped there, we don't have to copy them out |
1006 | * again. */ | 1180 | * again. |
1181 | */ | ||
1007 | pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; | 1182 | pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; |
1008 | native_set_pte(®s_pte, pfn_pte(pfn, PAGE_KERNEL)); | 1183 | native_set_pte(®s_pte, pfn_pte(pfn, PAGE_KERNEL)); |
1009 | native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], | 1184 | native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], |
@@ -1019,10 +1194,12 @@ static void free_switcher_pte_pages(void) | |||
1019 | free_page((long)switcher_pte_page(i)); | 1194 | free_page((long)switcher_pte_page(i)); |
1020 | } | 1195 | } |
1021 | 1196 | ||
1022 | /*H:520 Setting up the Switcher PTE page for given CPU is fairly easy, given | 1197 | /*H:520 |
1198 | * Setting up the Switcher PTE page for given CPU is fairly easy, given | ||
1023 | * the CPU number and the "struct page"s for the Switcher code itself. | 1199 | * the CPU number and the "struct page"s for the Switcher code itself. |
1024 | * | 1200 | * |
1025 | * Currently the Switcher is less than a page long, so "pages" is always 1. */ | 1201 | * Currently the Switcher is less than a page long, so "pages" is always 1. |
1202 | */ | ||
1026 | static __init void populate_switcher_pte_page(unsigned int cpu, | 1203 | static __init void populate_switcher_pte_page(unsigned int cpu, |
1027 | struct page *switcher_page[], | 1204 | struct page *switcher_page[], |
1028 | unsigned int pages) | 1205 | unsigned int pages) |
@@ -1043,13 +1220,16 @@ static __init void populate_switcher_pte_page(unsigned int cpu, | |||
1043 | native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), | 1220 | native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), |
1044 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); | 1221 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); |
1045 | 1222 | ||
1046 | /* The second page contains the "struct lguest_ro_state", and is | 1223 | /* |
1047 | * read-only. */ | 1224 | * The second page contains the "struct lguest_ro_state", and is |
1225 | * read-only. | ||
1226 | */ | ||
1048 | native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), | 1227 | native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), |
1049 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); | 1228 | __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); |
1050 | } | 1229 | } |
1051 | 1230 | ||
1052 | /* We've made it through the page table code. Perhaps our tired brains are | 1231 | /* |
1232 | * We've made it through the page table code. Perhaps our tired brains are | ||
1053 | * still processing the details, or perhaps we're simply glad it's over. | 1233 | * still processing the details, or perhaps we're simply glad it's over. |
1054 | * | 1234 | * |
1055 | * If nothing else, note that all this complexity in juggling shadow page tables | 1235 | * If nothing else, note that all this complexity in juggling shadow page tables |
@@ -1058,10 +1238,13 @@ static __init void populate_switcher_pte_page(unsigned int cpu, | |||
1058 | * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD | 1238 | * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD |
1059 | * have implemented shadow page table support directly into hardware. | 1239 | * have implemented shadow page table support directly into hardware. |
1060 | * | 1240 | * |
1061 | * There is just one file remaining in the Host. */ | 1241 | * There is just one file remaining in the Host. |
1242 | */ | ||
1062 | 1243 | ||
1063 | /*H:510 At boot or module load time, init_pagetables() allocates and populates | 1244 | /*H:510 |
1064 | * the Switcher PTE page for each CPU. */ | 1245 | * At boot or module load time, init_pagetables() allocates and populates |
1246 | * the Switcher PTE page for each CPU. | ||
1247 | */ | ||
1065 | __init int init_pagetables(struct page **switcher_page, unsigned int pages) | 1248 | __init int init_pagetables(struct page **switcher_page, unsigned int pages) |
1066 | { | 1249 | { |
1067 | unsigned int i; | 1250 | unsigned int i; |
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 482ed5a18750..951c57b0a7e0 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c | |||
@@ -1,4 +1,5 @@ | |||
1 | /*P:600 The x86 architecture has segments, which involve a table of descriptors | 1 | /*P:600 |
2 | * The x86 architecture has segments, which involve a table of descriptors | ||
2 | * which can be used to do funky things with virtual address interpretation. | 3 | * which can be used to do funky things with virtual address interpretation. |
3 | * We originally used to use segments so the Guest couldn't alter the | 4 | * We originally used to use segments so the Guest couldn't alter the |
4 | * Guest<->Host Switcher, and then we had to trim Guest segments, and restore | 5 | * Guest<->Host Switcher, and then we had to trim Guest segments, and restore |
@@ -8,7 +9,8 @@ | |||
8 | * | 9 | * |
9 | * In these modern times, the segment handling code consists of simple sanity | 10 | * In these modern times, the segment handling code consists of simple sanity |
10 | * checks, and the worst you'll experience reading this code is butterfly-rash | 11 | * checks, and the worst you'll experience reading this code is butterfly-rash |
11 | * from frolicking through its parklike serenity. :*/ | 12 | * from frolicking through its parklike serenity. |
13 | :*/ | ||
12 | #include "lg.h" | 14 | #include "lg.h" |
13 | 15 | ||
14 | /*H:600 | 16 | /*H:600 |
@@ -41,10 +43,12 @@ | |||
41 | * begin. | 43 | * begin. |
42 | */ | 44 | */ |
43 | 45 | ||
44 | /* There are several entries we don't let the Guest set. The TSS entry is the | 46 | /* |
47 | * There are several entries we don't let the Guest set. The TSS entry is the | ||
45 | * "Task State Segment" which controls all kinds of delicate things. The | 48 | * "Task State Segment" which controls all kinds of delicate things. The |
46 | * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the | 49 | * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the |
47 | * the Guest can't be trusted to deal with double faults. */ | 50 | * the Guest can't be trusted to deal with double faults. |
51 | */ | ||
48 | static bool ignored_gdt(unsigned int num) | 52 | static bool ignored_gdt(unsigned int num) |
49 | { | 53 | { |
50 | return (num == GDT_ENTRY_TSS | 54 | return (num == GDT_ENTRY_TSS |
@@ -53,42 +57,52 @@ static bool ignored_gdt(unsigned int num) | |||
53 | || num == GDT_ENTRY_DOUBLEFAULT_TSS); | 57 | || num == GDT_ENTRY_DOUBLEFAULT_TSS); |
54 | } | 58 | } |
55 | 59 | ||
56 | /*H:630 Once the Guest gave us new GDT entries, we fix them up a little. We | 60 | /*H:630 |
61 | * Once the Guest gave us new GDT entries, we fix them up a little. We | ||
57 | * don't care if they're invalid: the worst that can happen is a General | 62 | * don't care if they're invalid: the worst that can happen is a General |
58 | * Protection Fault in the Switcher when it restores a Guest segment register | 63 | * Protection Fault in the Switcher when it restores a Guest segment register |
59 | * which tries to use that entry. Then we kill the Guest for causing such a | 64 | * which tries to use that entry. Then we kill the Guest for causing such a |
60 | * mess: the message will be "unhandled trap 256". */ | 65 | * mess: the message will be "unhandled trap 256". |
66 | */ | ||
61 | static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) | 67 | static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) |
62 | { | 68 | { |
63 | unsigned int i; | 69 | unsigned int i; |
64 | 70 | ||
65 | for (i = start; i < end; i++) { | 71 | for (i = start; i < end; i++) { |
66 | /* We never copy these ones to real GDT, so we don't care what | 72 | /* |
67 | * they say */ | 73 | * We never copy these ones to real GDT, so we don't care what |
74 | * they say | ||
75 | */ | ||
68 | if (ignored_gdt(i)) | 76 | if (ignored_gdt(i)) |
69 | continue; | 77 | continue; |
70 | 78 | ||
71 | /* Segment descriptors contain a privilege level: the Guest is | 79 | /* |
80 | * Segment descriptors contain a privilege level: the Guest is | ||
72 | * sometimes careless and leaves this as 0, even though it's | 81 | * sometimes careless and leaves this as 0, even though it's |
73 | * running at privilege level 1. If so, we fix it here. */ | 82 | * running at privilege level 1. If so, we fix it here. |
83 | */ | ||
74 | if ((cpu->arch.gdt[i].b & 0x00006000) == 0) | 84 | if ((cpu->arch.gdt[i].b & 0x00006000) == 0) |
75 | cpu->arch.gdt[i].b |= (GUEST_PL << 13); | 85 | cpu->arch.gdt[i].b |= (GUEST_PL << 13); |
76 | 86 | ||
77 | /* Each descriptor has an "accessed" bit. If we don't set it | 87 | /* |
88 | * Each descriptor has an "accessed" bit. If we don't set it | ||
78 | * now, the CPU will try to set it when the Guest first loads | 89 | * now, the CPU will try to set it when the Guest first loads |
79 | * that entry into a segment register. But the GDT isn't | 90 | * that entry into a segment register. But the GDT isn't |
80 | * writable by the Guest, so bad things can happen. */ | 91 | * writable by the Guest, so bad things can happen. |
92 | */ | ||
81 | cpu->arch.gdt[i].b |= 0x00000100; | 93 | cpu->arch.gdt[i].b |= 0x00000100; |
82 | } | 94 | } |
83 | } | 95 | } |
84 | 96 | ||
85 | /*H:610 Like the IDT, we never simply use the GDT the Guest gives us. We keep | 97 | /*H:610 |
98 | * Like the IDT, we never simply use the GDT the Guest gives us. We keep | ||
86 | * a GDT for each CPU, and copy across the Guest's entries each time we want to | 99 | * a GDT for each CPU, and copy across the Guest's entries each time we want to |
87 | * run the Guest on that CPU. | 100 | * run the Guest on that CPU. |
88 | * | 101 | * |
89 | * This routine is called at boot or modprobe time for each CPU to set up the | 102 | * This routine is called at boot or modprobe time for each CPU to set up the |
90 | * constant GDT entries: the ones which are the same no matter what Guest we're | 103 | * constant GDT entries: the ones which are the same no matter what Guest we're |
91 | * running. */ | 104 | * running. |
105 | */ | ||
92 | void setup_default_gdt_entries(struct lguest_ro_state *state) | 106 | void setup_default_gdt_entries(struct lguest_ro_state *state) |
93 | { | 107 | { |
94 | struct desc_struct *gdt = state->guest_gdt; | 108 | struct desc_struct *gdt = state->guest_gdt; |
@@ -98,30 +112,37 @@ void setup_default_gdt_entries(struct lguest_ro_state *state) | |||
98 | gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; | 112 | gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; |
99 | gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; | 113 | gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; |
100 | 114 | ||
101 | /* The TSS segment refers to the TSS entry for this particular CPU. | 115 | /* |
116 | * The TSS segment refers to the TSS entry for this particular CPU. | ||
102 | * Forgive the magic flags: the 0x8900 means the entry is Present, it's | 117 | * Forgive the magic flags: the 0x8900 means the entry is Present, it's |
103 | * privilege level 0 Available 386 TSS system segment, and the 0x67 | 118 | * privilege level 0 Available 386 TSS system segment, and the 0x67 |
104 | * means Saturn is eclipsed by Mercury in the twelfth house. */ | 119 | * means Saturn is eclipsed by Mercury in the twelfth house. |
120 | */ | ||
105 | gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); | 121 | gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); |
106 | gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) | 122 | gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) |
107 | | ((tss >> 16) & 0x000000FF); | 123 | | ((tss >> 16) & 0x000000FF); |
108 | } | 124 | } |
109 | 125 | ||
110 | /* This routine sets up the initial Guest GDT for booting. All entries start | 126 | /* |
111 | * as 0 (unusable). */ | 127 | * This routine sets up the initial Guest GDT for booting. All entries start |
128 | * as 0 (unusable). | ||
129 | */ | ||
112 | void setup_guest_gdt(struct lg_cpu *cpu) | 130 | void setup_guest_gdt(struct lg_cpu *cpu) |
113 | { | 131 | { |
114 | /* Start with full 0-4G segments... */ | 132 | /* |
133 | * Start with full 0-4G segments...except the Guest is allowed to use | ||
134 | * them, so set the privilege level appropriately in the flags. | ||
135 | */ | ||
115 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; | 136 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; |
116 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; | 137 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; |
117 | /* ...except the Guest is allowed to use them, so set the privilege | ||
118 | * level appropriately in the flags. */ | ||
119 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); | 138 | cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); |
120 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); | 139 | cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); |
121 | } | 140 | } |
122 | 141 | ||
123 | /*H:650 An optimization of copy_gdt(), for just the three "thead-local storage" | 142 | /*H:650 |
124 | * entries. */ | 143 | * An optimization of copy_gdt(), for just the three "thead-local storage" |
144 | * entries. | ||
145 | */ | ||
125 | void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) | 146 | void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) |
126 | { | 147 | { |
127 | unsigned int i; | 148 | unsigned int i; |
@@ -130,26 +151,34 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) | |||
130 | gdt[i] = cpu->arch.gdt[i]; | 151 | gdt[i] = cpu->arch.gdt[i]; |
131 | } | 152 | } |
132 | 153 | ||
133 | /*H:640 When the Guest is run on a different CPU, or the GDT entries have | 154 | /*H:640 |
134 | * changed, copy_gdt() is called to copy the Guest's GDT entries across to this | 155 | * When the Guest is run on a different CPU, or the GDT entries have changed, |
135 | * CPU's GDT. */ | 156 | * copy_gdt() is called to copy the Guest's GDT entries across to this CPU's |
157 | * GDT. | ||
158 | */ | ||
136 | void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) | 159 | void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) |
137 | { | 160 | { |
138 | unsigned int i; | 161 | unsigned int i; |
139 | 162 | ||
140 | /* The default entries from setup_default_gdt_entries() are not | 163 | /* |
141 | * replaced. See ignored_gdt() above. */ | 164 | * The default entries from setup_default_gdt_entries() are not |
165 | * replaced. See ignored_gdt() above. | ||
166 | */ | ||
142 | for (i = 0; i < GDT_ENTRIES; i++) | 167 | for (i = 0; i < GDT_ENTRIES; i++) |
143 | if (!ignored_gdt(i)) | 168 | if (!ignored_gdt(i)) |
144 | gdt[i] = cpu->arch.gdt[i]; | 169 | gdt[i] = cpu->arch.gdt[i]; |
145 | } | 170 | } |
146 | 171 | ||
147 | /*H:620 This is where the Guest asks us to load a new GDT entry | 172 | /*H:620 |
148 | * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. */ | 173 | * This is where the Guest asks us to load a new GDT entry |
174 | * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. | ||
175 | */ | ||
149 | void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) | 176 | void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) |
150 | { | 177 | { |
151 | /* We assume the Guest has the same number of GDT entries as the | 178 | /* |
152 | * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ | 179 | * We assume the Guest has the same number of GDT entries as the |
180 | * Host, otherwise we'd have to dynamically allocate the Guest GDT. | ||
181 | */ | ||
153 | if (num >= ARRAY_SIZE(cpu->arch.gdt)) | 182 | if (num >= ARRAY_SIZE(cpu->arch.gdt)) |
154 | kill_guest(cpu, "too many gdt entries %i", num); | 183 | kill_guest(cpu, "too many gdt entries %i", num); |
155 | 184 | ||
@@ -157,15 +186,19 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) | |||
157 | cpu->arch.gdt[num].a = lo; | 186 | cpu->arch.gdt[num].a = lo; |
158 | cpu->arch.gdt[num].b = hi; | 187 | cpu->arch.gdt[num].b = hi; |
159 | fixup_gdt_table(cpu, num, num+1); | 188 | fixup_gdt_table(cpu, num, num+1); |
160 | /* Mark that the GDT changed so the core knows it has to copy it again, | 189 | /* |
161 | * even if the Guest is run on the same CPU. */ | 190 | * Mark that the GDT changed so the core knows it has to copy it again, |
191 | * even if the Guest is run on the same CPU. | ||
192 | */ | ||
162 | cpu->changed |= CHANGED_GDT; | 193 | cpu->changed |= CHANGED_GDT; |
163 | } | 194 | } |
164 | 195 | ||
165 | /* This is the fast-track version for just changing the three TLS entries. | 196 | /* |
197 | * This is the fast-track version for just changing the three TLS entries. | ||
166 | * Remember that this happens on every context switch, so it's worth | 198 | * Remember that this happens on every context switch, so it's worth |
167 | * optimizing. But wouldn't it be neater to have a single hypercall to cover | 199 | * optimizing. But wouldn't it be neater to have a single hypercall to cover |
168 | * both cases? */ | 200 | * both cases? |
201 | */ | ||
169 | void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) | 202 | void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) |
170 | { | 203 | { |
171 | struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; | 204 | struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; |
@@ -175,7 +208,6 @@ void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) | |||
175 | /* Note that just the TLS entries have changed. */ | 208 | /* Note that just the TLS entries have changed. */ |
176 | cpu->changed |= CHANGED_GDT_TLS; | 209 | cpu->changed |= CHANGED_GDT_TLS; |
177 | } | 210 | } |
178 | /*:*/ | ||
179 | 211 | ||
180 | /*H:660 | 212 | /*H:660 |
181 | * With this, we have finished the Host. | 213 | * With this, we have finished the Host. |
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index eaf722fe309a..6ae388849a3b 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -17,13 +17,15 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | /*P:450 This file contains the x86-specific lguest code. It used to be all | 20 | /*P:450 |
21 | * This file contains the x86-specific lguest code. It used to be all | ||
21 | * mixed in with drivers/lguest/core.c but several foolhardy code slashers | 22 | * mixed in with drivers/lguest/core.c but several foolhardy code slashers |
22 | * wrestled most of the dependencies out to here in preparation for porting | 23 | * wrestled most of the dependencies out to here in preparation for porting |
23 | * lguest to other architectures (see what I mean by foolhardy?). | 24 | * lguest to other architectures (see what I mean by foolhardy?). |
24 | * | 25 | * |
25 | * This also contains a couple of non-obvious setup and teardown pieces which | 26 | * This also contains a couple of non-obvious setup and teardown pieces which |
26 | * were implemented after days of debugging pain. :*/ | 27 | * were implemented after days of debugging pain. |
28 | :*/ | ||
27 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
28 | #include <linux/start_kernel.h> | 30 | #include <linux/start_kernel.h> |
29 | #include <linux/string.h> | 31 | #include <linux/string.h> |
@@ -82,25 +84,33 @@ static DEFINE_PER_CPU(struct lg_cpu *, last_cpu); | |||
82 | */ | 84 | */ |
83 | static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) | 85 | static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) |
84 | { | 86 | { |
85 | /* Copying all this data can be quite expensive. We usually run the | 87 | /* |
88 | * Copying all this data can be quite expensive. We usually run the | ||
86 | * same Guest we ran last time (and that Guest hasn't run anywhere else | 89 | * same Guest we ran last time (and that Guest hasn't run anywhere else |
87 | * meanwhile). If that's not the case, we pretend everything in the | 90 | * meanwhile). If that's not the case, we pretend everything in the |
88 | * Guest has changed. */ | 91 | * Guest has changed. |
92 | */ | ||
89 | if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) { | 93 | if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) { |
90 | __get_cpu_var(last_cpu) = cpu; | 94 | __get_cpu_var(last_cpu) = cpu; |
91 | cpu->last_pages = pages; | 95 | cpu->last_pages = pages; |
92 | cpu->changed = CHANGED_ALL; | 96 | cpu->changed = CHANGED_ALL; |
93 | } | 97 | } |
94 | 98 | ||
95 | /* These copies are pretty cheap, so we do them unconditionally: */ | 99 | /* |
96 | /* Save the current Host top-level page directory. */ | 100 | * These copies are pretty cheap, so we do them unconditionally: */ |
101 | /* Save the current Host top-level page directory. | ||
102 | */ | ||
97 | pages->state.host_cr3 = __pa(current->mm->pgd); | 103 | pages->state.host_cr3 = __pa(current->mm->pgd); |
98 | /* Set up the Guest's page tables to see this CPU's pages (and no | 104 | /* |
99 | * other CPU's pages). */ | 105 | * Set up the Guest's page tables to see this CPU's pages (and no |
106 | * other CPU's pages). | ||
107 | */ | ||
100 | map_switcher_in_guest(cpu, pages); | 108 | map_switcher_in_guest(cpu, pages); |
101 | /* Set up the two "TSS" members which tell the CPU what stack to use | 109 | /* |
110 | * Set up the two "TSS" members which tell the CPU what stack to use | ||
102 | * for traps which do directly into the Guest (ie. traps at privilege | 111 | * for traps which do directly into the Guest (ie. traps at privilege |
103 | * level 1). */ | 112 | * level 1). |
113 | */ | ||
104 | pages->state.guest_tss.sp1 = cpu->esp1; | 114 | pages->state.guest_tss.sp1 = cpu->esp1; |
105 | pages->state.guest_tss.ss1 = cpu->ss1; | 115 | pages->state.guest_tss.ss1 = cpu->ss1; |
106 | 116 | ||
@@ -125,97 +135,126 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) | |||
125 | /* This is a dummy value we need for GCC's sake. */ | 135 | /* This is a dummy value we need for GCC's sake. */ |
126 | unsigned int clobber; | 136 | unsigned int clobber; |
127 | 137 | ||
128 | /* Copy the guest-specific information into this CPU's "struct | 138 | /* |
129 | * lguest_pages". */ | 139 | * Copy the guest-specific information into this CPU's "struct |
140 | * lguest_pages". | ||
141 | */ | ||
130 | copy_in_guest_info(cpu, pages); | 142 | copy_in_guest_info(cpu, pages); |
131 | 143 | ||
132 | /* Set the trap number to 256 (impossible value). If we fault while | 144 | /* |
145 | * Set the trap number to 256 (impossible value). If we fault while | ||
133 | * switching to the Guest (bad segment registers or bug), this will | 146 | * switching to the Guest (bad segment registers or bug), this will |
134 | * cause us to abort the Guest. */ | 147 | * cause us to abort the Guest. |
148 | */ | ||
135 | cpu->regs->trapnum = 256; | 149 | cpu->regs->trapnum = 256; |
136 | 150 | ||
137 | /* Now: we push the "eflags" register on the stack, then do an "lcall". | 151 | /* |
152 | * Now: we push the "eflags" register on the stack, then do an "lcall". | ||
138 | * This is how we change from using the kernel code segment to using | 153 | * This is how we change from using the kernel code segment to using |
139 | * the dedicated lguest code segment, as well as jumping into the | 154 | * the dedicated lguest code segment, as well as jumping into the |
140 | * Switcher. | 155 | * Switcher. |
141 | * | 156 | * |
142 | * The lcall also pushes the old code segment (KERNEL_CS) onto the | 157 | * The lcall also pushes the old code segment (KERNEL_CS) onto the |
143 | * stack, then the address of this call. This stack layout happens to | 158 | * stack, then the address of this call. This stack layout happens to |
144 | * exactly match the stack layout created by an interrupt... */ | 159 | * exactly match the stack layout created by an interrupt... |
160 | */ | ||
145 | asm volatile("pushf; lcall *lguest_entry" | 161 | asm volatile("pushf; lcall *lguest_entry" |
146 | /* This is how we tell GCC that %eax ("a") and %ebx ("b") | 162 | /* |
147 | * are changed by this routine. The "=" means output. */ | 163 | * This is how we tell GCC that %eax ("a") and %ebx ("b") |
164 | * are changed by this routine. The "=" means output. | ||
165 | */ | ||
148 | : "=a"(clobber), "=b"(clobber) | 166 | : "=a"(clobber), "=b"(clobber) |
149 | /* %eax contains the pages pointer. ("0" refers to the | 167 | /* |
168 | * %eax contains the pages pointer. ("0" refers to the | ||
150 | * 0-th argument above, ie "a"). %ebx contains the | 169 | * 0-th argument above, ie "a"). %ebx contains the |
151 | * physical address of the Guest's top-level page | 170 | * physical address of the Guest's top-level page |
152 | * directory. */ | 171 | * directory. |
172 | */ | ||
153 | : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) | 173 | : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) |
154 | /* We tell gcc that all these registers could change, | 174 | /* |
175 | * We tell gcc that all these registers could change, | ||
155 | * which means we don't have to save and restore them in | 176 | * which means we don't have to save and restore them in |
156 | * the Switcher. */ | 177 | * the Switcher. |
178 | */ | ||
157 | : "memory", "%edx", "%ecx", "%edi", "%esi"); | 179 | : "memory", "%edx", "%ecx", "%edi", "%esi"); |
158 | } | 180 | } |
159 | /*:*/ | 181 | /*:*/ |
160 | 182 | ||
161 | /*M:002 There are hooks in the scheduler which we can register to tell when we | 183 | /*M:002 |
184 | * There are hooks in the scheduler which we can register to tell when we | ||
162 | * get kicked off the CPU (preempt_notifier_register()). This would allow us | 185 | * get kicked off the CPU (preempt_notifier_register()). This would allow us |
163 | * to lazily disable SYSENTER which would regain some performance, and should | 186 | * to lazily disable SYSENTER which would regain some performance, and should |
164 | * also simplify copy_in_guest_info(). Note that we'd still need to restore | 187 | * also simplify copy_in_guest_info(). Note that we'd still need to restore |
165 | * things when we exit to Launcher userspace, but that's fairly easy. | 188 | * things when we exit to Launcher userspace, but that's fairly easy. |
166 | * | 189 | * |
167 | * We could also try using this hooks for PGE, but that might be too expensive. | 190 | * We could also try using these hooks for PGE, but that might be too expensive. |
168 | * | 191 | * |
169 | * The hooks were designed for KVM, but we can also put them to good use. :*/ | 192 | * The hooks were designed for KVM, but we can also put them to good use. |
193 | :*/ | ||
170 | 194 | ||
171 | /*H:040 This is the i386-specific code to setup and run the Guest. Interrupts | 195 | /*H:040 |
172 | * are disabled: we own the CPU. */ | 196 | * This is the i386-specific code to setup and run the Guest. Interrupts |
197 | * are disabled: we own the CPU. | ||
198 | */ | ||
173 | void lguest_arch_run_guest(struct lg_cpu *cpu) | 199 | void lguest_arch_run_guest(struct lg_cpu *cpu) |
174 | { | 200 | { |
175 | /* Remember the awfully-named TS bit? If the Guest has asked to set it | 201 | /* |
202 | * Remember the awfully-named TS bit? If the Guest has asked to set it | ||
176 | * we set it now, so we can trap and pass that trap to the Guest if it | 203 | * we set it now, so we can trap and pass that trap to the Guest if it |
177 | * uses the FPU. */ | 204 | * uses the FPU. |
205 | */ | ||
178 | if (cpu->ts) | 206 | if (cpu->ts) |
179 | unlazy_fpu(current); | 207 | unlazy_fpu(current); |
180 | 208 | ||
181 | /* SYSENTER is an optimized way of doing system calls. We can't allow | 209 | /* |
210 | * SYSENTER is an optimized way of doing system calls. We can't allow | ||
182 | * it because it always jumps to privilege level 0. A normal Guest | 211 | * it because it always jumps to privilege level 0. A normal Guest |
183 | * won't try it because we don't advertise it in CPUID, but a malicious | 212 | * won't try it because we don't advertise it in CPUID, but a malicious |
184 | * Guest (or malicious Guest userspace program) could, so we tell the | 213 | * Guest (or malicious Guest userspace program) could, so we tell the |
185 | * CPU to disable it before running the Guest. */ | 214 | * CPU to disable it before running the Guest. |
215 | */ | ||
186 | if (boot_cpu_has(X86_FEATURE_SEP)) | 216 | if (boot_cpu_has(X86_FEATURE_SEP)) |
187 | wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); | 217 | wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); |
188 | 218 | ||
189 | /* Now we actually run the Guest. It will return when something | 219 | /* |
220 | * Now we actually run the Guest. It will return when something | ||
190 | * interesting happens, and we can examine its registers to see what it | 221 | * interesting happens, and we can examine its registers to see what it |
191 | * was doing. */ | 222 | * was doing. |
223 | */ | ||
192 | run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); | 224 | run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); |
193 | 225 | ||
194 | /* Note that the "regs" structure contains two extra entries which are | 226 | /* |
227 | * Note that the "regs" structure contains two extra entries which are | ||
195 | * not really registers: a trap number which says what interrupt or | 228 | * not really registers: a trap number which says what interrupt or |
196 | * trap made the switcher code come back, and an error code which some | 229 | * trap made the switcher code come back, and an error code which some |
197 | * traps set. */ | 230 | * traps set. |
231 | */ | ||
198 | 232 | ||
199 | /* Restore SYSENTER if it's supposed to be on. */ | 233 | /* Restore SYSENTER if it's supposed to be on. */ |
200 | if (boot_cpu_has(X86_FEATURE_SEP)) | 234 | if (boot_cpu_has(X86_FEATURE_SEP)) |
201 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 235 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
202 | 236 | ||
203 | /* If the Guest page faulted, then the cr2 register will tell us the | 237 | /* |
238 | * If the Guest page faulted, then the cr2 register will tell us the | ||
204 | * bad virtual address. We have to grab this now, because once we | 239 | * bad virtual address. We have to grab this now, because once we |
205 | * re-enable interrupts an interrupt could fault and thus overwrite | 240 | * re-enable interrupts an interrupt could fault and thus overwrite |
206 | * cr2, or we could even move off to a different CPU. */ | 241 | * cr2, or we could even move off to a different CPU. |
242 | */ | ||
207 | if (cpu->regs->trapnum == 14) | 243 | if (cpu->regs->trapnum == 14) |
208 | cpu->arch.last_pagefault = read_cr2(); | 244 | cpu->arch.last_pagefault = read_cr2(); |
209 | /* Similarly, if we took a trap because the Guest used the FPU, | 245 | /* |
246 | * Similarly, if we took a trap because the Guest used the FPU, | ||
210 | * we have to restore the FPU it expects to see. | 247 | * we have to restore the FPU it expects to see. |
211 | * math_state_restore() may sleep and we may even move off to | 248 | * math_state_restore() may sleep and we may even move off to |
212 | * a different CPU. So all the critical stuff should be done | 249 | * a different CPU. So all the critical stuff should be done |
213 | * before this. */ | 250 | * before this. |
251 | */ | ||
214 | else if (cpu->regs->trapnum == 7) | 252 | else if (cpu->regs->trapnum == 7) |
215 | math_state_restore(); | 253 | math_state_restore(); |
216 | } | 254 | } |
217 | 255 | ||
218 | /*H:130 Now we've examined the hypercall code; our Guest can make requests. | 256 | /*H:130 |
257 | * Now we've examined the hypercall code; our Guest can make requests. | ||
219 | * Our Guest is usually so well behaved; it never tries to do things it isn't | 258 | * Our Guest is usually so well behaved; it never tries to do things it isn't |
220 | * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual | 259 | * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual |
221 | * infrastructure isn't quite complete, because it doesn't contain replacements | 260 | * infrastructure isn't quite complete, because it doesn't contain replacements |
@@ -225,26 +264,33 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
225 | * | 264 | * |
226 | * When the Guest uses one of these instructions, we get a trap (General | 265 | * When the Guest uses one of these instructions, we get a trap (General |
227 | * Protection Fault) and come here. We see if it's one of those troublesome | 266 | * Protection Fault) and come here. We see if it's one of those troublesome |
228 | * instructions and skip over it. We return true if we did. */ | 267 | * instructions and skip over it. We return true if we did. |
268 | */ | ||
229 | static int emulate_insn(struct lg_cpu *cpu) | 269 | static int emulate_insn(struct lg_cpu *cpu) |
230 | { | 270 | { |
231 | u8 insn; | 271 | u8 insn; |
232 | unsigned int insnlen = 0, in = 0, shift = 0; | 272 | unsigned int insnlen = 0, in = 0, shift = 0; |
233 | /* The eip contains the *virtual* address of the Guest's instruction: | 273 | /* |
234 | * guest_pa just subtracts the Guest's page_offset. */ | 274 | * The eip contains the *virtual* address of the Guest's instruction: |
275 | * guest_pa just subtracts the Guest's page_offset. | ||
276 | */ | ||
235 | unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); | 277 | unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); |
236 | 278 | ||
237 | /* This must be the Guest kernel trying to do something, not userspace! | 279 | /* |
280 | * This must be the Guest kernel trying to do something, not userspace! | ||
238 | * The bottom two bits of the CS segment register are the privilege | 281 | * The bottom two bits of the CS segment register are the privilege |
239 | * level. */ | 282 | * level. |
283 | */ | ||
240 | if ((cpu->regs->cs & 3) != GUEST_PL) | 284 | if ((cpu->regs->cs & 3) != GUEST_PL) |
241 | return 0; | 285 | return 0; |
242 | 286 | ||
243 | /* Decoding x86 instructions is icky. */ | 287 | /* Decoding x86 instructions is icky. */ |
244 | insn = lgread(cpu, physaddr, u8); | 288 | insn = lgread(cpu, physaddr, u8); |
245 | 289 | ||
246 | /* 0x66 is an "operand prefix". It means it's using the upper 16 bits | 290 | /* |
247 | of the eax register. */ | 291 | * 0x66 is an "operand prefix". It means it's using the upper 16 bits |
292 | * of the eax register. | ||
293 | */ | ||
248 | if (insn == 0x66) { | 294 | if (insn == 0x66) { |
249 | shift = 16; | 295 | shift = 16; |
250 | /* The instruction is 1 byte so far, read the next byte. */ | 296 | /* The instruction is 1 byte so far, read the next byte. */ |
@@ -252,8 +298,10 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
252 | insn = lgread(cpu, physaddr + insnlen, u8); | 298 | insn = lgread(cpu, physaddr + insnlen, u8); |
253 | } | 299 | } |
254 | 300 | ||
255 | /* We can ignore the lower bit for the moment and decode the 4 opcodes | 301 | /* |
256 | * we need to emulate. */ | 302 | * We can ignore the lower bit for the moment and decode the 4 opcodes |
303 | * we need to emulate. | ||
304 | */ | ||
257 | switch (insn & 0xFE) { | 305 | switch (insn & 0xFE) { |
258 | case 0xE4: /* in <next byte>,%al */ | 306 | case 0xE4: /* in <next byte>,%al */ |
259 | insnlen += 2; | 307 | insnlen += 2; |
@@ -274,9 +322,11 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
274 | return 0; | 322 | return 0; |
275 | } | 323 | } |
276 | 324 | ||
277 | /* If it was an "IN" instruction, they expect the result to be read | 325 | /* |
326 | * If it was an "IN" instruction, they expect the result to be read | ||
278 | * into %eax, so we change %eax. We always return all-ones, which | 327 | * into %eax, so we change %eax. We always return all-ones, which |
279 | * traditionally means "there's nothing there". */ | 328 | * traditionally means "there's nothing there". |
329 | */ | ||
280 | if (in) { | 330 | if (in) { |
281 | /* Lower bit tells is whether it's a 16 or 32 bit access */ | 331 | /* Lower bit tells is whether it's a 16 or 32 bit access */ |
282 | if (insn & 0x1) | 332 | if (insn & 0x1) |
@@ -290,7 +340,8 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
290 | return 1; | 340 | return 1; |
291 | } | 341 | } |
292 | 342 | ||
293 | /* Our hypercalls mechanism used to be based on direct software interrupts. | 343 | /* |
344 | * Our hypercalls mechanism used to be based on direct software interrupts. | ||
294 | * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to | 345 | * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to |
295 | * change over to using kvm hypercalls. | 346 | * change over to using kvm hypercalls. |
296 | * | 347 | * |
@@ -318,16 +369,20 @@ static int emulate_insn(struct lg_cpu *cpu) | |||
318 | */ | 369 | */ |
319 | static void rewrite_hypercall(struct lg_cpu *cpu) | 370 | static void rewrite_hypercall(struct lg_cpu *cpu) |
320 | { | 371 | { |
321 | /* This are the opcodes we use to patch the Guest. The opcode for "int | 372 | /* |
373 | * This are the opcodes we use to patch the Guest. The opcode for "int | ||
322 | * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we | 374 | * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we |
323 | * complete the sequence with a NOP (0x90). */ | 375 | * complete the sequence with a NOP (0x90). |
376 | */ | ||
324 | u8 insn[3] = {0xcd, 0x1f, 0x90}; | 377 | u8 insn[3] = {0xcd, 0x1f, 0x90}; |
325 | 378 | ||
326 | __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); | 379 | __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); |
327 | /* The above write might have caused a copy of that page to be made | 380 | /* |
381 | * The above write might have caused a copy of that page to be made | ||
328 | * (if it was read-only). We need to make sure the Guest has | 382 | * (if it was read-only). We need to make sure the Guest has |
329 | * up-to-date pagetables. As this doesn't happen often, we can just | 383 | * up-to-date pagetables. As this doesn't happen often, we can just |
330 | * drop them all. */ | 384 | * drop them all. |
385 | */ | ||
331 | guest_pagetable_clear_all(cpu); | 386 | guest_pagetable_clear_all(cpu); |
332 | } | 387 | } |
333 | 388 | ||
@@ -335,9 +390,11 @@ static bool is_hypercall(struct lg_cpu *cpu) | |||
335 | { | 390 | { |
336 | u8 insn[3]; | 391 | u8 insn[3]; |
337 | 392 | ||
338 | /* This must be the Guest kernel trying to do something. | 393 | /* |
394 | * This must be the Guest kernel trying to do something. | ||
339 | * The bottom two bits of the CS segment register are the privilege | 395 | * The bottom two bits of the CS segment register are the privilege |
340 | * level. */ | 396 | * level. |
397 | */ | ||
341 | if ((cpu->regs->cs & 3) != GUEST_PL) | 398 | if ((cpu->regs->cs & 3) != GUEST_PL) |
342 | return false; | 399 | return false; |
343 | 400 | ||
@@ -351,86 +408,105 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) | |||
351 | { | 408 | { |
352 | switch (cpu->regs->trapnum) { | 409 | switch (cpu->regs->trapnum) { |
353 | case 13: /* We've intercepted a General Protection Fault. */ | 410 | case 13: /* We've intercepted a General Protection Fault. */ |
354 | /* Check if this was one of those annoying IN or OUT | 411 | /* |
412 | * Check if this was one of those annoying IN or OUT | ||
355 | * instructions which we need to emulate. If so, we just go | 413 | * instructions which we need to emulate. If so, we just go |
356 | * back into the Guest after we've done it. */ | 414 | * back into the Guest after we've done it. |
415 | */ | ||
357 | if (cpu->regs->errcode == 0) { | 416 | if (cpu->regs->errcode == 0) { |
358 | if (emulate_insn(cpu)) | 417 | if (emulate_insn(cpu)) |
359 | return; | 418 | return; |
360 | } | 419 | } |
361 | /* If KVM is active, the vmcall instruction triggers a | 420 | /* |
362 | * General Protection Fault. Normally it triggers an | 421 | * If KVM is active, the vmcall instruction triggers a General |
363 | * invalid opcode fault (6): */ | 422 | * Protection Fault. Normally it triggers an invalid opcode |
423 | * fault (6): | ||
424 | */ | ||
364 | case 6: | 425 | case 6: |
365 | /* We need to check if ring == GUEST_PL and | 426 | /* |
366 | * faulting instruction == vmcall. */ | 427 | * We need to check if ring == GUEST_PL and faulting |
428 | * instruction == vmcall. | ||
429 | */ | ||
367 | if (is_hypercall(cpu)) { | 430 | if (is_hypercall(cpu)) { |
368 | rewrite_hypercall(cpu); | 431 | rewrite_hypercall(cpu); |
369 | return; | 432 | return; |
370 | } | 433 | } |
371 | break; | 434 | break; |
372 | case 14: /* We've intercepted a Page Fault. */ | 435 | case 14: /* We've intercepted a Page Fault. */ |
373 | /* The Guest accessed a virtual address that wasn't mapped. | 436 | /* |
437 | * The Guest accessed a virtual address that wasn't mapped. | ||
374 | * This happens a lot: we don't actually set up most of the page | 438 | * This happens a lot: we don't actually set up most of the page |
375 | * tables for the Guest at all when we start: as it runs it asks | 439 | * tables for the Guest at all when we start: as it runs it asks |
376 | * for more and more, and we set them up as required. In this | 440 | * for more and more, and we set them up as required. In this |
377 | * case, we don't even tell the Guest that the fault happened. | 441 | * case, we don't even tell the Guest that the fault happened. |
378 | * | 442 | * |
379 | * The errcode tells whether this was a read or a write, and | 443 | * The errcode tells whether this was a read or a write, and |
380 | * whether kernel or userspace code. */ | 444 | * whether kernel or userspace code. |
445 | */ | ||
381 | if (demand_page(cpu, cpu->arch.last_pagefault, | 446 | if (demand_page(cpu, cpu->arch.last_pagefault, |
382 | cpu->regs->errcode)) | 447 | cpu->regs->errcode)) |
383 | return; | 448 | return; |
384 | 449 | ||
385 | /* OK, it's really not there (or not OK): the Guest needs to | 450 | /* |
451 | * OK, it's really not there (or not OK): the Guest needs to | ||
386 | * know. We write out the cr2 value so it knows where the | 452 | * know. We write out the cr2 value so it knows where the |
387 | * fault occurred. | 453 | * fault occurred. |
388 | * | 454 | * |
389 | * Note that if the Guest were really messed up, this could | 455 | * Note that if the Guest were really messed up, this could |
390 | * happen before it's done the LHCALL_LGUEST_INIT hypercall, so | 456 | * happen before it's done the LHCALL_LGUEST_INIT hypercall, so |
391 | * lg->lguest_data could be NULL */ | 457 | * lg->lguest_data could be NULL |
458 | */ | ||
392 | if (cpu->lg->lguest_data && | 459 | if (cpu->lg->lguest_data && |
393 | put_user(cpu->arch.last_pagefault, | 460 | put_user(cpu->arch.last_pagefault, |
394 | &cpu->lg->lguest_data->cr2)) | 461 | &cpu->lg->lguest_data->cr2)) |
395 | kill_guest(cpu, "Writing cr2"); | 462 | kill_guest(cpu, "Writing cr2"); |
396 | break; | 463 | break; |
397 | case 7: /* We've intercepted a Device Not Available fault. */ | 464 | case 7: /* We've intercepted a Device Not Available fault. */ |
398 | /* If the Guest doesn't want to know, we already restored the | 465 | /* |
399 | * Floating Point Unit, so we just continue without telling | 466 | * If the Guest doesn't want to know, we already restored the |
400 | * it. */ | 467 | * Floating Point Unit, so we just continue without telling it. |
468 | */ | ||
401 | if (!cpu->ts) | 469 | if (!cpu->ts) |
402 | return; | 470 | return; |
403 | break; | 471 | break; |
404 | case 32 ... 255: | 472 | case 32 ... 255: |
405 | /* These values mean a real interrupt occurred, in which case | 473 | /* |
474 | * These values mean a real interrupt occurred, in which case | ||
406 | * the Host handler has already been run. We just do a | 475 | * the Host handler has already been run. We just do a |
407 | * friendly check if another process should now be run, then | 476 | * friendly check if another process should now be run, then |
408 | * return to run the Guest again */ | 477 | * return to run the Guest again |
478 | */ | ||
409 | cond_resched(); | 479 | cond_resched(); |
410 | return; | 480 | return; |
411 | case LGUEST_TRAP_ENTRY: | 481 | case LGUEST_TRAP_ENTRY: |
412 | /* Our 'struct hcall_args' maps directly over our regs: we set | 482 | /* |
413 | * up the pointer now to indicate a hypercall is pending. */ | 483 | * Our 'struct hcall_args' maps directly over our regs: we set |
484 | * up the pointer now to indicate a hypercall is pending. | ||
485 | */ | ||
414 | cpu->hcall = (struct hcall_args *)cpu->regs; | 486 | cpu->hcall = (struct hcall_args *)cpu->regs; |
415 | return; | 487 | return; |
416 | } | 488 | } |
417 | 489 | ||
418 | /* We didn't handle the trap, so it needs to go to the Guest. */ | 490 | /* We didn't handle the trap, so it needs to go to the Guest. */ |
419 | if (!deliver_trap(cpu, cpu->regs->trapnum)) | 491 | if (!deliver_trap(cpu, cpu->regs->trapnum)) |
420 | /* If the Guest doesn't have a handler (either it hasn't | 492 | /* |
493 | * If the Guest doesn't have a handler (either it hasn't | ||
421 | * registered any yet, or it's one of the faults we don't let | 494 | * registered any yet, or it's one of the faults we don't let |
422 | * it handle), it dies with this cryptic error message. */ | 495 | * it handle), it dies with this cryptic error message. |
496 | */ | ||
423 | kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", | 497 | kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", |
424 | cpu->regs->trapnum, cpu->regs->eip, | 498 | cpu->regs->trapnum, cpu->regs->eip, |
425 | cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault | 499 | cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault |
426 | : cpu->regs->errcode); | 500 | : cpu->regs->errcode); |
427 | } | 501 | } |
428 | 502 | ||
429 | /* Now we can look at each of the routines this calls, in increasing order of | 503 | /* |
504 | * Now we can look at each of the routines this calls, in increasing order of | ||
430 | * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(), | 505 | * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(), |
431 | * deliver_trap() and demand_page(). After all those, we'll be ready to | 506 | * deliver_trap() and demand_page(). After all those, we'll be ready to |
432 | * examine the Switcher, and our philosophical understanding of the Host/Guest | 507 | * examine the Switcher, and our philosophical understanding of the Host/Guest |
433 | * duality will be complete. :*/ | 508 | * duality will be complete. |
509 | :*/ | ||
434 | static void adjust_pge(void *on) | 510 | static void adjust_pge(void *on) |
435 | { | 511 | { |
436 | if (on) | 512 | if (on) |
@@ -439,13 +515,16 @@ static void adjust_pge(void *on) | |||
439 | write_cr4(read_cr4() & ~X86_CR4_PGE); | 515 | write_cr4(read_cr4() & ~X86_CR4_PGE); |
440 | } | 516 | } |
441 | 517 | ||
442 | /*H:020 Now the Switcher is mapped and every thing else is ready, we need to do | 518 | /*H:020 |
443 | * some more i386-specific initialization. */ | 519 | * Now the Switcher is mapped and every thing else is ready, we need to do |
520 | * some more i386-specific initialization. | ||
521 | */ | ||
444 | void __init lguest_arch_host_init(void) | 522 | void __init lguest_arch_host_init(void) |
445 | { | 523 | { |
446 | int i; | 524 | int i; |
447 | 525 | ||
448 | /* Most of the i386/switcher.S doesn't care that it's been moved; on | 526 | /* |
527 | * Most of the i386/switcher.S doesn't care that it's been moved; on | ||
449 | * Intel, jumps are relative, and it doesn't access any references to | 528 | * Intel, jumps are relative, and it doesn't access any references to |
450 | * external code or data. | 529 | * external code or data. |
451 | * | 530 | * |
@@ -453,7 +532,8 @@ void __init lguest_arch_host_init(void) | |||
453 | * addresses are placed in a table (default_idt_entries), so we need to | 532 | * addresses are placed in a table (default_idt_entries), so we need to |
454 | * update the table with the new addresses. switcher_offset() is a | 533 | * update the table with the new addresses. switcher_offset() is a |
455 | * convenience function which returns the distance between the | 534 | * convenience function which returns the distance between the |
456 | * compiled-in switcher code and the high-mapped copy we just made. */ | 535 | * compiled-in switcher code and the high-mapped copy we just made. |
536 | */ | ||
457 | for (i = 0; i < IDT_ENTRIES; i++) | 537 | for (i = 0; i < IDT_ENTRIES; i++) |
458 | default_idt_entries[i] += switcher_offset(); | 538 | default_idt_entries[i] += switcher_offset(); |
459 | 539 | ||
@@ -468,63 +548,81 @@ void __init lguest_arch_host_init(void) | |||
468 | for_each_possible_cpu(i) { | 548 | for_each_possible_cpu(i) { |
469 | /* lguest_pages() returns this CPU's two pages. */ | 549 | /* lguest_pages() returns this CPU's two pages. */ |
470 | struct lguest_pages *pages = lguest_pages(i); | 550 | struct lguest_pages *pages = lguest_pages(i); |
471 | /* This is a convenience pointer to make the code fit one | 551 | /* This is a convenience pointer to make the code neater. */ |
472 | * statement to a line. */ | ||
473 | struct lguest_ro_state *state = &pages->state; | 552 | struct lguest_ro_state *state = &pages->state; |
474 | 553 | ||
475 | /* The Global Descriptor Table: the Host has a different one | 554 | /* |
555 | * The Global Descriptor Table: the Host has a different one | ||
476 | * for each CPU. We keep a descriptor for the GDT which says | 556 | * for each CPU. We keep a descriptor for the GDT which says |
477 | * where it is and how big it is (the size is actually the last | 557 | * where it is and how big it is (the size is actually the last |
478 | * byte, not the size, hence the "-1"). */ | 558 | * byte, not the size, hence the "-1"). |
559 | */ | ||
479 | state->host_gdt_desc.size = GDT_SIZE-1; | 560 | state->host_gdt_desc.size = GDT_SIZE-1; |
480 | state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); | 561 | state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); |
481 | 562 | ||
482 | /* All CPUs on the Host use the same Interrupt Descriptor | 563 | /* |
564 | * All CPUs on the Host use the same Interrupt Descriptor | ||
483 | * Table, so we just use store_idt(), which gets this CPU's IDT | 565 | * Table, so we just use store_idt(), which gets this CPU's IDT |
484 | * descriptor. */ | 566 | * descriptor. |
567 | */ | ||
485 | store_idt(&state->host_idt_desc); | 568 | store_idt(&state->host_idt_desc); |
486 | 569 | ||
487 | /* The descriptors for the Guest's GDT and IDT can be filled | 570 | /* |
571 | * The descriptors for the Guest's GDT and IDT can be filled | ||
488 | * out now, too. We copy the GDT & IDT into ->guest_gdt and | 572 | * out now, too. We copy the GDT & IDT into ->guest_gdt and |
489 | * ->guest_idt before actually running the Guest. */ | 573 | * ->guest_idt before actually running the Guest. |
574 | */ | ||
490 | state->guest_idt_desc.size = sizeof(state->guest_idt)-1; | 575 | state->guest_idt_desc.size = sizeof(state->guest_idt)-1; |
491 | state->guest_idt_desc.address = (long)&state->guest_idt; | 576 | state->guest_idt_desc.address = (long)&state->guest_idt; |
492 | state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; | 577 | state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; |
493 | state->guest_gdt_desc.address = (long)&state->guest_gdt; | 578 | state->guest_gdt_desc.address = (long)&state->guest_gdt; |
494 | 579 | ||
495 | /* We know where we want the stack to be when the Guest enters | 580 | /* |
581 | * We know where we want the stack to be when the Guest enters | ||
496 | * the Switcher: in pages->regs. The stack grows upwards, so | 582 | * the Switcher: in pages->regs. The stack grows upwards, so |
497 | * we start it at the end of that structure. */ | 583 | * we start it at the end of that structure. |
584 | */ | ||
498 | state->guest_tss.sp0 = (long)(&pages->regs + 1); | 585 | state->guest_tss.sp0 = (long)(&pages->regs + 1); |
499 | /* And this is the GDT entry to use for the stack: we keep a | 586 | /* |
500 | * couple of special LGUEST entries. */ | 587 | * And this is the GDT entry to use for the stack: we keep a |
588 | * couple of special LGUEST entries. | ||
589 | */ | ||
501 | state->guest_tss.ss0 = LGUEST_DS; | 590 | state->guest_tss.ss0 = LGUEST_DS; |
502 | 591 | ||
503 | /* x86 can have a finegrained bitmap which indicates what I/O | 592 | /* |
593 | * x86 can have a finegrained bitmap which indicates what I/O | ||
504 | * ports the process can use. We set it to the end of our | 594 | * ports the process can use. We set it to the end of our |
505 | * structure, meaning "none". */ | 595 | * structure, meaning "none". |
596 | */ | ||
506 | state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); | 597 | state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); |
507 | 598 | ||
508 | /* Some GDT entries are the same across all Guests, so we can | 599 | /* |
509 | * set them up now. */ | 600 | * Some GDT entries are the same across all Guests, so we can |
601 | * set them up now. | ||
602 | */ | ||
510 | setup_default_gdt_entries(state); | 603 | setup_default_gdt_entries(state); |
511 | /* Most IDT entries are the same for all Guests, too.*/ | 604 | /* Most IDT entries are the same for all Guests, too.*/ |
512 | setup_default_idt_entries(state, default_idt_entries); | 605 | setup_default_idt_entries(state, default_idt_entries); |
513 | 606 | ||
514 | /* The Host needs to be able to use the LGUEST segments on this | 607 | /* |
515 | * CPU, too, so put them in the Host GDT. */ | 608 | * The Host needs to be able to use the LGUEST segments on this |
609 | * CPU, too, so put them in the Host GDT. | ||
610 | */ | ||
516 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; | 611 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; |
517 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; | 612 | get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; |
518 | } | 613 | } |
519 | 614 | ||
520 | /* In the Switcher, we want the %cs segment register to use the | 615 | /* |
616 | * In the Switcher, we want the %cs segment register to use the | ||
521 | * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so | 617 | * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so |
522 | * it will be undisturbed when we switch. To change %cs and jump we | 618 | * it will be undisturbed when we switch. To change %cs and jump we |
523 | * need this structure to feed to Intel's "lcall" instruction. */ | 619 | * need this structure to feed to Intel's "lcall" instruction. |
620 | */ | ||
524 | lguest_entry.offset = (long)switch_to_guest + switcher_offset(); | 621 | lguest_entry.offset = (long)switch_to_guest + switcher_offset(); |
525 | lguest_entry.segment = LGUEST_CS; | 622 | lguest_entry.segment = LGUEST_CS; |
526 | 623 | ||
527 | /* Finally, we need to turn off "Page Global Enable". PGE is an | 624 | /* |
625 | * Finally, we need to turn off "Page Global Enable". PGE is an | ||
528 | * optimization where page table entries are specially marked to show | 626 | * optimization where page table entries are specially marked to show |
529 | * they never change. The Host kernel marks all the kernel pages this | 627 | * they never change. The Host kernel marks all the kernel pages this |
530 | * way because it's always present, even when userspace is running. | 628 | * way because it's always present, even when userspace is running. |
@@ -534,16 +632,21 @@ void __init lguest_arch_host_init(void) | |||
534 | * you'll get really weird bugs that you'll chase for two days. | 632 | * you'll get really weird bugs that you'll chase for two days. |
535 | * | 633 | * |
536 | * I used to turn PGE off every time we switched to the Guest and back | 634 | * I used to turn PGE off every time we switched to the Guest and back |
537 | * on when we return, but that slowed the Switcher down noticibly. */ | 635 | * on when we return, but that slowed the Switcher down noticibly. |
636 | */ | ||
538 | 637 | ||
539 | /* We don't need the complexity of CPUs coming and going while we're | 638 | /* |
540 | * doing this. */ | 639 | * We don't need the complexity of CPUs coming and going while we're |
640 | * doing this. | ||
641 | */ | ||
541 | get_online_cpus(); | 642 | get_online_cpus(); |
542 | if (cpu_has_pge) { /* We have a broader idea of "global". */ | 643 | if (cpu_has_pge) { /* We have a broader idea of "global". */ |
543 | /* Remember that this was originally set (for cleanup). */ | 644 | /* Remember that this was originally set (for cleanup). */ |
544 | cpu_had_pge = 1; | 645 | cpu_had_pge = 1; |
545 | /* adjust_pge is a helper function which sets or unsets the PGE | 646 | /* |
546 | * bit on its CPU, depending on the argument (0 == unset). */ | 647 | * adjust_pge is a helper function which sets or unsets the PGE |
648 | * bit on its CPU, depending on the argument (0 == unset). | ||
649 | */ | ||
547 | on_each_cpu(adjust_pge, (void *)0, 1); | 650 | on_each_cpu(adjust_pge, (void *)0, 1); |
548 | /* Turn off the feature in the global feature set. */ | 651 | /* Turn off the feature in the global feature set. */ |
549 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); | 652 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); |
@@ -590,26 +693,32 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) | |||
590 | { | 693 | { |
591 | u32 tsc_speed; | 694 | u32 tsc_speed; |
592 | 695 | ||
593 | /* The pointer to the Guest's "struct lguest_data" is the only argument. | 696 | /* |
594 | * We check that address now. */ | 697 | * The pointer to the Guest's "struct lguest_data" is the only argument. |
698 | * We check that address now. | ||
699 | */ | ||
595 | if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, | 700 | if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, |
596 | sizeof(*cpu->lg->lguest_data))) | 701 | sizeof(*cpu->lg->lguest_data))) |
597 | return -EFAULT; | 702 | return -EFAULT; |
598 | 703 | ||
599 | /* Having checked it, we simply set lg->lguest_data to point straight | 704 | /* |
705 | * Having checked it, we simply set lg->lguest_data to point straight | ||
600 | * into the Launcher's memory at the right place and then use | 706 | * into the Launcher's memory at the right place and then use |
601 | * copy_to_user/from_user from now on, instead of lgread/write. I put | 707 | * copy_to_user/from_user from now on, instead of lgread/write. I put |
602 | * this in to show that I'm not immune to writing stupid | 708 | * this in to show that I'm not immune to writing stupid |
603 | * optimizations. */ | 709 | * optimizations. |
710 | */ | ||
604 | cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; | 711 | cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; |
605 | 712 | ||
606 | /* We insist that the Time Stamp Counter exist and doesn't change with | 713 | /* |
714 | * We insist that the Time Stamp Counter exist and doesn't change with | ||
607 | * cpu frequency. Some devious chip manufacturers decided that TSC | 715 | * cpu frequency. Some devious chip manufacturers decided that TSC |
608 | * changes could be handled in software. I decided that time going | 716 | * changes could be handled in software. I decided that time going |
609 | * backwards might be good for benchmarks, but it's bad for users. | 717 | * backwards might be good for benchmarks, but it's bad for users. |
610 | * | 718 | * |
611 | * We also insist that the TSC be stable: the kernel detects unreliable | 719 | * We also insist that the TSC be stable: the kernel detects unreliable |
612 | * TSCs for its own purposes, and we use that here. */ | 720 | * TSCs for its own purposes, and we use that here. |
721 | */ | ||
613 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) | 722 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) |
614 | tsc_speed = tsc_khz; | 723 | tsc_speed = tsc_khz; |
615 | else | 724 | else |
@@ -625,38 +734,47 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) | |||
625 | } | 734 | } |
626 | /*:*/ | 735 | /*:*/ |
627 | 736 | ||
628 | /*L:030 lguest_arch_setup_regs() | 737 | /*L:030 |
738 | * lguest_arch_setup_regs() | ||
629 | * | 739 | * |
630 | * Most of the Guest's registers are left alone: we used get_zeroed_page() to | 740 | * Most of the Guest's registers are left alone: we used get_zeroed_page() to |
631 | * allocate the structure, so they will be 0. */ | 741 | * allocate the structure, so they will be 0. |
742 | */ | ||
632 | void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) | 743 | void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) |
633 | { | 744 | { |
634 | struct lguest_regs *regs = cpu->regs; | 745 | struct lguest_regs *regs = cpu->regs; |
635 | 746 | ||
636 | /* There are four "segment" registers which the Guest needs to boot: | 747 | /* |
748 | * There are four "segment" registers which the Guest needs to boot: | ||
637 | * The "code segment" register (cs) refers to the kernel code segment | 749 | * The "code segment" register (cs) refers to the kernel code segment |
638 | * __KERNEL_CS, and the "data", "extra" and "stack" segment registers | 750 | * __KERNEL_CS, and the "data", "extra" and "stack" segment registers |
639 | * refer to the kernel data segment __KERNEL_DS. | 751 | * refer to the kernel data segment __KERNEL_DS. |
640 | * | 752 | * |
641 | * The privilege level is packed into the lower bits. The Guest runs | 753 | * The privilege level is packed into the lower bits. The Guest runs |
642 | * at privilege level 1 (GUEST_PL).*/ | 754 | * at privilege level 1 (GUEST_PL). |
755 | */ | ||
643 | regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; | 756 | regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; |
644 | regs->cs = __KERNEL_CS|GUEST_PL; | 757 | regs->cs = __KERNEL_CS|GUEST_PL; |
645 | 758 | ||
646 | /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002) | 759 | /* |
760 | * The "eflags" register contains miscellaneous flags. Bit 1 (0x002) | ||
647 | * is supposed to always be "1". Bit 9 (0x200) controls whether | 761 | * is supposed to always be "1". Bit 9 (0x200) controls whether |
648 | * interrupts are enabled. We always leave interrupts enabled while | 762 | * interrupts are enabled. We always leave interrupts enabled while |
649 | * running the Guest. */ | 763 | * running the Guest. |
764 | */ | ||
650 | regs->eflags = X86_EFLAGS_IF | 0x2; | 765 | regs->eflags = X86_EFLAGS_IF | 0x2; |
651 | 766 | ||
652 | /* The "Extended Instruction Pointer" register says where the Guest is | 767 | /* |
653 | * running. */ | 768 | * The "Extended Instruction Pointer" register says where the Guest is |
769 | * running. | ||
770 | */ | ||
654 | regs->eip = start; | 771 | regs->eip = start; |
655 | 772 | ||
656 | /* %esi points to our boot information, at physical address 0, so don't | 773 | /* |
657 | * touch it. */ | 774 | * %esi points to our boot information, at physical address 0, so don't |
775 | * touch it. | ||
776 | */ | ||
658 | 777 | ||
659 | /* There are a couple of GDT entries the Guest expects when first | 778 | /* There are a couple of GDT entries the Guest expects at boot. */ |
660 | * booting. */ | ||
661 | setup_guest_gdt(cpu); | 779 | setup_guest_gdt(cpu); |
662 | } | 780 | } |
diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S index 3fc15318a80f..40634b0db9f7 100644 --- a/drivers/lguest/x86/switcher_32.S +++ b/drivers/lguest/x86/switcher_32.S | |||
@@ -1,12 +1,15 @@ | |||
1 | /*P:900 This is the Switcher: code which sits at 0xFFC00000 astride both the | 1 | /*P:900 |
2 | * Host and Guest to do the low-level Guest<->Host switch. It is as simple as | 2 | * This is the Switcher: code which sits at 0xFFC00000 (or 0xFFE00000) astride |
3 | * it can be made, but it's naturally very specific to x86. | 3 | * both the Host and Guest to do the low-level Guest<->Host switch. It is as |
4 | * simple as it can be made, but it's naturally very specific to x86. | ||
4 | * | 5 | * |
5 | * You have now completed Preparation. If this has whet your appetite; if you | 6 | * You have now completed Preparation. If this has whet your appetite; if you |
6 | * are feeling invigorated and refreshed then the next, more challenging stage | 7 | * are feeling invigorated and refreshed then the next, more challenging stage |
7 | * can be found in "make Guest". :*/ | 8 | * can be found in "make Guest". |
9 | :*/ | ||
8 | 10 | ||
9 | /*M:012 Lguest is meant to be simple: my rule of thumb is that 1% more LOC must | 11 | /*M:012 |
12 | * Lguest is meant to be simple: my rule of thumb is that 1% more LOC must | ||
10 | * gain at least 1% more performance. Since neither LOC nor performance can be | 13 | * gain at least 1% more performance. Since neither LOC nor performance can be |
11 | * measured beforehand, it generally means implementing a feature then deciding | 14 | * measured beforehand, it generally means implementing a feature then deciding |
12 | * if it's worth it. And once it's implemented, who can say no? | 15 | * if it's worth it. And once it's implemented, who can say no? |
@@ -31,11 +34,14 @@ | |||
31 | * Host (which is actually really easy). | 34 | * Host (which is actually really easy). |
32 | * | 35 | * |
33 | * Two questions remain. Would the performance gain outweigh the complexity? | 36 | * Two questions remain. Would the performance gain outweigh the complexity? |
34 | * And who would write the verse documenting it? :*/ | 37 | * And who would write the verse documenting it? |
38 | :*/ | ||
35 | 39 | ||
36 | /*M:011 Lguest64 handles NMI. This gave me NMI envy (until I looked at their | 40 | /*M:011 |
41 | * Lguest64 handles NMI. This gave me NMI envy (until I looked at their | ||
37 | * code). It's worth doing though, since it would let us use oprofile in the | 42 | * code). It's worth doing though, since it would let us use oprofile in the |
38 | * Host when a Guest is running. :*/ | 43 | * Host when a Guest is running. |
44 | :*/ | ||
39 | 45 | ||
40 | /*S:100 | 46 | /*S:100 |
41 | * Welcome to the Switcher itself! | 47 | * Welcome to the Switcher itself! |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5810fa906af0..5fe39c2a3d2b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev) | |||
220 | mddev->queue->unplug_fn = linear_unplug; | 220 | mddev->queue->unplug_fn = linear_unplug; |
221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; | 221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; |
222 | mddev->queue->backing_dev_info.congested_data = mddev; | 222 | mddev->queue->backing_dev_info.congested_data = mddev; |
223 | md_integrity_register(mddev); | ||
223 | return 0; | 224 | return 0; |
224 | } | 225 | } |
225 | 226 | ||
@@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
256 | rcu_assign_pointer(mddev->private, newconf); | 257 | rcu_assign_pointer(mddev->private, newconf); |
257 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 258 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
258 | set_capacity(mddev->gendisk, mddev->array_sectors); | 259 | set_capacity(mddev->gendisk, mddev->array_sectors); |
260 | revalidate_disk(mddev->gendisk); | ||
259 | call_rcu(&oldconf->rcu, free_conf); | 261 | call_rcu(&oldconf->rcu, free_conf); |
260 | return 0; | 262 | return 0; |
261 | } | 263 | } |
diff --git a/drivers/md/md.c b/drivers/md/md.c index d4351ff0849f..5b98bea4ff9b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1308,7 +1308,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1308 | } | 1308 | } |
1309 | if (mddev->level != LEVEL_MULTIPATH) { | 1309 | if (mddev->level != LEVEL_MULTIPATH) { |
1310 | int role; | 1310 | int role; |
1311 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1311 | if (rdev->desc_nr < 0 || |
1312 | rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { | ||
1313 | role = 0xffff; | ||
1314 | rdev->desc_nr = -1; | ||
1315 | } else | ||
1316 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | ||
1312 | switch(role) { | 1317 | switch(role) { |
1313 | case 0xffff: /* spare */ | 1318 | case 0xffff: /* spare */ |
1314 | break; | 1319 | break; |
@@ -1394,8 +1399,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1394 | if (rdev2->desc_nr+1 > max_dev) | 1399 | if (rdev2->desc_nr+1 > max_dev) |
1395 | max_dev = rdev2->desc_nr+1; | 1400 | max_dev = rdev2->desc_nr+1; |
1396 | 1401 | ||
1397 | if (max_dev > le32_to_cpu(sb->max_dev)) | 1402 | if (max_dev > le32_to_cpu(sb->max_dev)) { |
1403 | int bmask; | ||
1398 | sb->max_dev = cpu_to_le32(max_dev); | 1404 | sb->max_dev = cpu_to_le32(max_dev); |
1405 | rdev->sb_size = max_dev * 2 + 256; | ||
1406 | bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; | ||
1407 | if (rdev->sb_size & bmask) | ||
1408 | rdev->sb_size = (rdev->sb_size | bmask) + 1; | ||
1409 | } | ||
1399 | for (i=0; i<max_dev;i++) | 1410 | for (i=0; i<max_dev;i++) |
1400 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1411 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1401 | 1412 | ||
@@ -1487,37 +1498,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1487 | 1498 | ||
1488 | static LIST_HEAD(pending_raid_disks); | 1499 | static LIST_HEAD(pending_raid_disks); |
1489 | 1500 | ||
1490 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | 1501 | /* |
1502 | * Try to register data integrity profile for an mddev | ||
1503 | * | ||
1504 | * This is called when an array is started and after a disk has been kicked | ||
1505 | * from the array. It only succeeds if all working and active component devices | ||
1506 | * are integrity capable with matching profiles. | ||
1507 | */ | ||
1508 | int md_integrity_register(mddev_t *mddev) | ||
1509 | { | ||
1510 | mdk_rdev_t *rdev, *reference = NULL; | ||
1511 | |||
1512 | if (list_empty(&mddev->disks)) | ||
1513 | return 0; /* nothing to do */ | ||
1514 | if (blk_get_integrity(mddev->gendisk)) | ||
1515 | return 0; /* already registered */ | ||
1516 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
1517 | /* skip spares and non-functional disks */ | ||
1518 | if (test_bit(Faulty, &rdev->flags)) | ||
1519 | continue; | ||
1520 | if (rdev->raid_disk < 0) | ||
1521 | continue; | ||
1522 | /* | ||
1523 | * If at least one rdev is not integrity capable, we can not | ||
1524 | * enable data integrity for the md device. | ||
1525 | */ | ||
1526 | if (!bdev_get_integrity(rdev->bdev)) | ||
1527 | return -EINVAL; | ||
1528 | if (!reference) { | ||
1529 | /* Use the first rdev as the reference */ | ||
1530 | reference = rdev; | ||
1531 | continue; | ||
1532 | } | ||
1533 | /* does this rdev's profile match the reference profile? */ | ||
1534 | if (blk_integrity_compare(reference->bdev->bd_disk, | ||
1535 | rdev->bdev->bd_disk) < 0) | ||
1536 | return -EINVAL; | ||
1537 | } | ||
1538 | /* | ||
1539 | * All component devices are integrity capable and have matching | ||
1540 | * profiles, register the common profile for the md device. | ||
1541 | */ | ||
1542 | if (blk_integrity_register(mddev->gendisk, | ||
1543 | bdev_get_integrity(reference->bdev)) != 0) { | ||
1544 | printk(KERN_ERR "md: failed to register integrity for %s\n", | ||
1545 | mdname(mddev)); | ||
1546 | return -EINVAL; | ||
1547 | } | ||
1548 | printk(KERN_NOTICE "md: data integrity on %s enabled\n", | ||
1549 | mdname(mddev)); | ||
1550 | return 0; | ||
1551 | } | ||
1552 | EXPORT_SYMBOL(md_integrity_register); | ||
1553 | |||
1554 | /* Disable data integrity if non-capable/non-matching disk is being added */ | ||
1555 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | ||
1491 | { | 1556 | { |
1492 | struct mdk_personality *pers = mddev->pers; | ||
1493 | struct gendisk *disk = mddev->gendisk; | ||
1494 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | 1557 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); |
1495 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | 1558 | struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); |
1496 | 1559 | ||
1497 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | 1560 | if (!bi_mddev) /* nothing to do */ |
1498 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
1499 | return; | 1561 | return; |
1500 | 1562 | if (rdev->raid_disk < 0) /* skip spares */ | |
1501 | /* If rdev is integrity capable, register profile for mddev */ | ||
1502 | if (!bi_mddev && bi_rdev) { | ||
1503 | if (blk_integrity_register(disk, bi_rdev)) | ||
1504 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
1505 | __func__, disk->disk_name); | ||
1506 | else | ||
1507 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
1508 | disk->disk_name); | ||
1509 | return; | 1563 | return; |
1510 | } | 1564 | if (bi_rdev && blk_integrity_compare(mddev->gendisk, |
1511 | 1565 | rdev->bdev->bd_disk) >= 0) | |
1512 | /* Check that mddev and rdev have matching profiles */ | 1566 | return; |
1513 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | 1567 | printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); |
1514 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | 1568 | blk_integrity_unregister(mddev->gendisk); |
1515 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
1516 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
1517 | disk->disk_name); | ||
1518 | blk_integrity_unregister(disk); | ||
1519 | } | ||
1520 | } | 1569 | } |
1570 | EXPORT_SYMBOL(md_integrity_add_rdev); | ||
1521 | 1571 | ||
1522 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1572 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
1523 | { | 1573 | { |
@@ -1591,7 +1641,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1591 | /* May as well allow recovery to be retried once */ | 1641 | /* May as well allow recovery to be retried once */ |
1592 | mddev->recovery_disabled = 0; | 1642 | mddev->recovery_disabled = 0; |
1593 | 1643 | ||
1594 | md_integrity_check(rdev, mddev); | ||
1595 | return 0; | 1644 | return 0; |
1596 | 1645 | ||
1597 | fail: | 1646 | fail: |
@@ -2657,6 +2706,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2657 | ssize_t rv = len; | 2706 | ssize_t rv = len; |
2658 | struct mdk_personality *pers; | 2707 | struct mdk_personality *pers; |
2659 | void *priv; | 2708 | void *priv; |
2709 | mdk_rdev_t *rdev; | ||
2660 | 2710 | ||
2661 | if (mddev->pers == NULL) { | 2711 | if (mddev->pers == NULL) { |
2662 | if (len == 0) | 2712 | if (len == 0) |
@@ -2736,6 +2786,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2736 | mddev_suspend(mddev); | 2786 | mddev_suspend(mddev); |
2737 | mddev->pers->stop(mddev); | 2787 | mddev->pers->stop(mddev); |
2738 | module_put(mddev->pers->owner); | 2788 | module_put(mddev->pers->owner); |
2789 | /* Invalidate devices that are now superfluous */ | ||
2790 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
2791 | if (rdev->raid_disk >= mddev->raid_disks) { | ||
2792 | rdev->raid_disk = -1; | ||
2793 | clear_bit(In_sync, &rdev->flags); | ||
2794 | } | ||
2739 | mddev->pers = pers; | 2795 | mddev->pers = pers; |
2740 | mddev->private = priv; | 2796 | mddev->private = priv; |
2741 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2797 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
@@ -3685,17 +3741,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
3685 | 3741 | ||
3686 | mddev->array_sectors = sectors; | 3742 | mddev->array_sectors = sectors; |
3687 | set_capacity(mddev->gendisk, mddev->array_sectors); | 3743 | set_capacity(mddev->gendisk, mddev->array_sectors); |
3688 | if (mddev->pers) { | 3744 | if (mddev->pers) |
3689 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | 3745 | revalidate_disk(mddev->gendisk); |
3690 | |||
3691 | if (bdev) { | ||
3692 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
3693 | i_size_write(bdev->bd_inode, | ||
3694 | (loff_t)mddev->array_sectors << 9); | ||
3695 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
3696 | bdput(bdev); | ||
3697 | } | ||
3698 | } | ||
3699 | 3746 | ||
3700 | return len; | 3747 | return len; |
3701 | } | 3748 | } |
@@ -4048,10 +4095,6 @@ static int do_md_run(mddev_t * mddev) | |||
4048 | } | 4095 | } |
4049 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4096 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
4050 | 4097 | ||
4051 | if (pers->level >= 4 && pers->level <= 6) | ||
4052 | /* Cannot support integrity (yet) */ | ||
4053 | blk_integrity_unregister(mddev->gendisk); | ||
4054 | |||
4055 | if (mddev->reshape_position != MaxSector && | 4098 | if (mddev->reshape_position != MaxSector && |
4056 | pers->start_reshape == NULL) { | 4099 | pers->start_reshape == NULL) { |
4057 | /* This personality cannot handle reshaping... */ | 4100 | /* This personality cannot handle reshaping... */ |
@@ -4189,6 +4232,7 @@ static int do_md_run(mddev_t * mddev) | |||
4189 | md_wakeup_thread(mddev->thread); | 4232 | md_wakeup_thread(mddev->thread); |
4190 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4233 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
4191 | 4234 | ||
4235 | revalidate_disk(mddev->gendisk); | ||
4192 | mddev->changed = 1; | 4236 | mddev->changed = 1; |
4193 | md_new_event(mddev); | 4237 | md_new_event(mddev); |
4194 | sysfs_notify_dirent(mddev->sysfs_state); | 4238 | sysfs_notify_dirent(mddev->sysfs_state); |
@@ -5087,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
5087 | return -ENOSPC; | 5131 | return -ENOSPC; |
5088 | } | 5132 | } |
5089 | rv = mddev->pers->resize(mddev, num_sectors); | 5133 | rv = mddev->pers->resize(mddev, num_sectors); |
5090 | if (!rv) { | 5134 | if (!rv) |
5091 | struct block_device *bdev; | 5135 | revalidate_disk(mddev->gendisk); |
5092 | |||
5093 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5094 | if (bdev) { | ||
5095 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5096 | i_size_write(bdev->bd_inode, | ||
5097 | (loff_t)mddev->array_sectors << 9); | ||
5098 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5099 | bdput(bdev); | ||
5100 | } | ||
5101 | } | ||
5102 | return rv; | 5136 | return rv; |
5103 | } | 5137 | } |
5104 | 5138 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index 9430a110db93..78f03168baf9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -431,5 +431,7 @@ extern int md_allow_write(mddev_t *mddev); | |||
431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); | 432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); |
433 | extern int md_check_no_bitmap(mddev_t *mddev); | 433 | extern int md_check_no_bitmap(mddev_t *mddev); |
434 | extern int md_integrity_register(mddev_t *mddev); | ||
435 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | ||
434 | 436 | ||
435 | #endif /* _MD_MD_H */ | 437 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 237fe3fd235c..7140909f6662 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -313,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
313 | set_bit(In_sync, &rdev->flags); | 313 | set_bit(In_sync, &rdev->flags); |
314 | rcu_assign_pointer(p->rdev, rdev); | 314 | rcu_assign_pointer(p->rdev, rdev); |
315 | err = 0; | 315 | err = 0; |
316 | md_integrity_add_rdev(rdev, mddev); | ||
316 | break; | 317 | break; |
317 | } | 318 | } |
318 | 319 | ||
@@ -345,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number) | |||
345 | /* lost the race, try later */ | 346 | /* lost the race, try later */ |
346 | err = -EBUSY; | 347 | err = -EBUSY; |
347 | p->rdev = rdev; | 348 | p->rdev = rdev; |
349 | goto abort; | ||
348 | } | 350 | } |
351 | md_integrity_register(mddev); | ||
349 | } | 352 | } |
350 | abort: | 353 | abort: |
351 | 354 | ||
@@ -519,7 +522,7 @@ static int multipath_run (mddev_t *mddev) | |||
519 | mddev->queue->unplug_fn = multipath_unplug; | 522 | mddev->queue->unplug_fn = multipath_unplug; |
520 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; | 523 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; |
521 | mddev->queue->backing_dev_info.congested_data = mddev; | 524 | mddev->queue->backing_dev_info.congested_data = mddev; |
522 | 525 | md_integrity_register(mddev); | |
523 | return 0; | 526 | return 0; |
524 | 527 | ||
525 | out_free_conf: | 528 | out_free_conf: |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 335f490dcad6..898e2bdfee47 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -351,6 +351,7 @@ static int raid0_run(mddev_t *mddev) | |||
351 | 351 | ||
352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | 352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); |
353 | dump_zones(mddev); | 353 | dump_zones(mddev); |
354 | md_integrity_register(mddev); | ||
354 | return 0; | 355 | return 0; |
355 | } | 356 | } |
356 | 357 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0569efba0c02..8726fd7ebce5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1144 | rcu_assign_pointer(p->rdev, rdev); | 1144 | rcu_assign_pointer(p->rdev, rdev); |
1145 | break; | 1145 | break; |
1146 | } | 1146 | } |
1147 | 1147 | md_integrity_add_rdev(rdev, mddev); | |
1148 | print_conf(conf); | 1148 | print_conf(conf); |
1149 | return err; | 1149 | return err; |
1150 | } | 1150 | } |
@@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
1178 | /* lost the race, try later */ | 1178 | /* lost the race, try later */ |
1179 | err = -EBUSY; | 1179 | err = -EBUSY; |
1180 | p->rdev = rdev; | 1180 | p->rdev = rdev; |
1181 | goto abort; | ||
1181 | } | 1182 | } |
1183 | md_integrity_register(mddev); | ||
1182 | } | 1184 | } |
1183 | abort: | 1185 | abort: |
1184 | 1186 | ||
@@ -2067,7 +2069,7 @@ static int run(mddev_t *mddev) | |||
2067 | mddev->queue->unplug_fn = raid1_unplug; | 2069 | mddev->queue->unplug_fn = raid1_unplug; |
2068 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2070 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
2069 | mddev->queue->backing_dev_info.congested_data = mddev; | 2071 | mddev->queue->backing_dev_info.congested_data = mddev; |
2070 | 2072 | md_integrity_register(mddev); | |
2071 | return 0; | 2073 | return 0; |
2072 | 2074 | ||
2073 | out_no_mem: | 2075 | out_no_mem: |
@@ -2132,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) | |||
2132 | return -EINVAL; | 2134 | return -EINVAL; |
2133 | set_capacity(mddev->gendisk, mddev->array_sectors); | 2135 | set_capacity(mddev->gendisk, mddev->array_sectors); |
2134 | mddev->changed = 1; | 2136 | mddev->changed = 1; |
2137 | revalidate_disk(mddev->gendisk); | ||
2135 | if (sectors > mddev->dev_sectors && | 2138 | if (sectors > mddev->dev_sectors && |
2136 | mddev->recovery_cp == MaxSector) { | 2139 | mddev->recovery_cp == MaxSector) { |
2137 | mddev->recovery_cp = mddev->dev_sectors; | 2140 | mddev->recovery_cp = mddev->dev_sectors; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7298a5e5a183..3d9020cf6f6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1170 | break; | 1170 | break; |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | md_integrity_add_rdev(rdev, mddev); | ||
1173 | print_conf(conf); | 1174 | print_conf(conf); |
1174 | return err; | 1175 | return err; |
1175 | } | 1176 | } |
@@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) | |||
1203 | /* lost the race, try later */ | 1204 | /* lost the race, try later */ |
1204 | err = -EBUSY; | 1205 | err = -EBUSY; |
1205 | p->rdev = rdev; | 1206 | p->rdev = rdev; |
1207 | goto abort; | ||
1206 | } | 1208 | } |
1209 | md_integrity_register(mddev); | ||
1207 | } | 1210 | } |
1208 | abort: | 1211 | abort: |
1209 | 1212 | ||
@@ -2225,6 +2228,7 @@ static int run(mddev_t *mddev) | |||
2225 | 2228 | ||
2226 | if (conf->near_copies < mddev->raid_disks) | 2229 | if (conf->near_copies < mddev->raid_disks) |
2227 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | 2230 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
2231 | md_integrity_register(mddev); | ||
2228 | return 0; | 2232 | return 0; |
2229 | 2233 | ||
2230 | out_free_conf: | 2234 | out_free_conf: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37835538b58e..2b521ee67dfa 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3999 | return 0; | 3999 | return 0; |
4000 | } | 4000 | } |
4001 | 4001 | ||
4002 | /* Allow raid5_quiesce to complete */ | ||
4003 | wait_event(conf->wait_for_overlap, conf->quiesce != 2); | ||
4004 | |||
4002 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4005 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4003 | return reshape_request(mddev, sector_nr, skipped); | 4006 | return reshape_request(mddev, sector_nr, skipped); |
4004 | 4007 | ||
@@ -4316,6 +4319,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
4316 | return sectors * (raid_disks - conf->max_degraded); | 4319 | return sectors * (raid_disks - conf->max_degraded); |
4317 | } | 4320 | } |
4318 | 4321 | ||
4322 | static void free_conf(raid5_conf_t *conf) | ||
4323 | { | ||
4324 | shrink_stripes(conf); | ||
4325 | safe_put_page(conf->spare_page); | ||
4326 | kfree(conf->disks); | ||
4327 | kfree(conf->stripe_hashtbl); | ||
4328 | kfree(conf); | ||
4329 | } | ||
4330 | |||
4319 | static raid5_conf_t *setup_conf(mddev_t *mddev) | 4331 | static raid5_conf_t *setup_conf(mddev_t *mddev) |
4320 | { | 4332 | { |
4321 | raid5_conf_t *conf; | 4333 | raid5_conf_t *conf; |
@@ -4447,11 +4459,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4447 | 4459 | ||
4448 | abort: | 4460 | abort: |
4449 | if (conf) { | 4461 | if (conf) { |
4450 | shrink_stripes(conf); | 4462 | free_conf(conf); |
4451 | safe_put_page(conf->spare_page); | ||
4452 | kfree(conf->disks); | ||
4453 | kfree(conf->stripe_hashtbl); | ||
4454 | kfree(conf); | ||
4455 | return ERR_PTR(-EIO); | 4463 | return ERR_PTR(-EIO); |
4456 | } else | 4464 | } else |
4457 | return ERR_PTR(-ENOMEM); | 4465 | return ERR_PTR(-ENOMEM); |
@@ -4629,12 +4637,8 @@ abort: | |||
4629 | md_unregister_thread(mddev->thread); | 4637 | md_unregister_thread(mddev->thread); |
4630 | mddev->thread = NULL; | 4638 | mddev->thread = NULL; |
4631 | if (conf) { | 4639 | if (conf) { |
4632 | shrink_stripes(conf); | ||
4633 | print_raid5_conf(conf); | 4640 | print_raid5_conf(conf); |
4634 | safe_put_page(conf->spare_page); | 4641 | free_conf(conf); |
4635 | kfree(conf->disks); | ||
4636 | kfree(conf->stripe_hashtbl); | ||
4637 | kfree(conf); | ||
4638 | } | 4642 | } |
4639 | mddev->private = NULL; | 4643 | mddev->private = NULL; |
4640 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); | 4644 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); |
@@ -4649,13 +4653,10 @@ static int stop(mddev_t *mddev) | |||
4649 | 4653 | ||
4650 | md_unregister_thread(mddev->thread); | 4654 | md_unregister_thread(mddev->thread); |
4651 | mddev->thread = NULL; | 4655 | mddev->thread = NULL; |
4652 | shrink_stripes(conf); | ||
4653 | kfree(conf->stripe_hashtbl); | ||
4654 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4656 | mddev->queue->backing_dev_info.congested_fn = NULL; |
4655 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 4657 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
4656 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); | 4658 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); |
4657 | kfree(conf->disks); | 4659 | free_conf(conf); |
4658 | kfree(conf); | ||
4659 | mddev->private = NULL; | 4660 | mddev->private = NULL; |
4660 | return 0; | 4661 | return 0; |
4661 | } | 4662 | } |
@@ -4857,6 +4858,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
4857 | return -EINVAL; | 4858 | return -EINVAL; |
4858 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4859 | set_capacity(mddev->gendisk, mddev->array_sectors); |
4859 | mddev->changed = 1; | 4860 | mddev->changed = 1; |
4861 | revalidate_disk(mddev->gendisk); | ||
4860 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { | 4862 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { |
4861 | mddev->recovery_cp = mddev->dev_sectors; | 4863 | mddev->recovery_cp = mddev->dev_sectors; |
4862 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4864 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -5002,7 +5004,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5002 | spin_unlock_irqrestore(&conf->device_lock, flags); | 5004 | spin_unlock_irqrestore(&conf->device_lock, flags); |
5003 | } | 5005 | } |
5004 | mddev->raid_disks = conf->raid_disks; | 5006 | mddev->raid_disks = conf->raid_disks; |
5005 | mddev->reshape_position = 0; | 5007 | mddev->reshape_position = conf->reshape_progress; |
5006 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5008 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
5007 | 5009 | ||
5008 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 5010 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
@@ -5057,7 +5059,6 @@ static void end_reshape(raid5_conf_t *conf) | |||
5057 | */ | 5059 | */ |
5058 | static void raid5_finish_reshape(mddev_t *mddev) | 5060 | static void raid5_finish_reshape(mddev_t *mddev) |
5059 | { | 5061 | { |
5060 | struct block_device *bdev; | ||
5061 | raid5_conf_t *conf = mddev->private; | 5062 | raid5_conf_t *conf = mddev->private; |
5062 | 5063 | ||
5063 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5064 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
@@ -5066,15 +5067,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5066 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5067 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
5067 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5068 | set_capacity(mddev->gendisk, mddev->array_sectors); |
5068 | mddev->changed = 1; | 5069 | mddev->changed = 1; |
5069 | 5070 | revalidate_disk(mddev->gendisk); | |
5070 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5071 | if (bdev) { | ||
5072 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5073 | i_size_write(bdev->bd_inode, | ||
5074 | (loff_t)mddev->array_sectors << 9); | ||
5075 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5076 | bdput(bdev); | ||
5077 | } | ||
5078 | } else { | 5071 | } else { |
5079 | int d; | 5072 | int d; |
5080 | mddev->degraded = conf->raid_disks; | 5073 | mddev->degraded = conf->raid_disks; |
@@ -5106,12 +5099,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) | |||
5106 | 5099 | ||
5107 | case 1: /* stop all writes */ | 5100 | case 1: /* stop all writes */ |
5108 | spin_lock_irq(&conf->device_lock); | 5101 | spin_lock_irq(&conf->device_lock); |
5109 | conf->quiesce = 1; | 5102 | /* '2' tells resync/reshape to pause so that all |
5103 | * active stripes can drain | ||
5104 | */ | ||
5105 | conf->quiesce = 2; | ||
5110 | wait_event_lock_irq(conf->wait_for_stripe, | 5106 | wait_event_lock_irq(conf->wait_for_stripe, |
5111 | atomic_read(&conf->active_stripes) == 0 && | 5107 | atomic_read(&conf->active_stripes) == 0 && |
5112 | atomic_read(&conf->active_aligned_reads) == 0, | 5108 | atomic_read(&conf->active_aligned_reads) == 0, |
5113 | conf->device_lock, /* nothing */); | 5109 | conf->device_lock, /* nothing */); |
5110 | conf->quiesce = 1; | ||
5114 | spin_unlock_irq(&conf->device_lock); | 5111 | spin_unlock_irq(&conf->device_lock); |
5112 | /* allow reshape to continue */ | ||
5113 | wake_up(&conf->wait_for_overlap); | ||
5115 | break; | 5114 | break; |
5116 | 5115 | ||
5117 | case 0: /* re-enable writes */ | 5116 | case 0: /* re-enable writes */ |
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index bae61b22501c..7d430835655f 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c | |||
@@ -180,14 +180,9 @@ static struct completion irq_event; | |||
180 | static int twl4030_irq_thread(void *data) | 180 | static int twl4030_irq_thread(void *data) |
181 | { | 181 | { |
182 | long irq = (long)data; | 182 | long irq = (long)data; |
183 | struct irq_desc *desc = irq_to_desc(irq); | ||
184 | static unsigned i2c_errors; | 183 | static unsigned i2c_errors; |
185 | static const unsigned max_i2c_errors = 100; | 184 | static const unsigned max_i2c_errors = 100; |
186 | 185 | ||
187 | if (!desc) { | ||
188 | pr_err("twl4030: Invalid IRQ: %ld\n", irq); | ||
189 | return -EINVAL; | ||
190 | } | ||
191 | 186 | ||
192 | current->flags |= PF_NOFREEZE; | 187 | current->flags |= PF_NOFREEZE; |
193 | 188 | ||
@@ -240,7 +235,7 @@ static int twl4030_irq_thread(void *data) | |||
240 | } | 235 | } |
241 | local_irq_enable(); | 236 | local_irq_enable(); |
242 | 237 | ||
243 | desc->chip->unmask(irq); | 238 | enable_irq(irq); |
244 | } | 239 | } |
245 | 240 | ||
246 | return 0; | 241 | return 0; |
@@ -255,25 +250,13 @@ static int twl4030_irq_thread(void *data) | |||
255 | * thread. All we do here is acknowledge and mask the interrupt and wakeup | 250 | * thread. All we do here is acknowledge and mask the interrupt and wakeup |
256 | * the kernel thread. | 251 | * the kernel thread. |
257 | */ | 252 | */ |
258 | static void handle_twl4030_pih(unsigned int irq, struct irq_desc *desc) | 253 | static irqreturn_t handle_twl4030_pih(int irq, void *devid) |
259 | { | 254 | { |
260 | /* Acknowledge, clear *AND* mask the interrupt... */ | 255 | /* Acknowledge, clear *AND* mask the interrupt... */ |
261 | desc->chip->ack(irq); | 256 | disable_irq_nosync(irq); |
262 | complete(&irq_event); | 257 | complete(devid); |
263 | } | 258 | return IRQ_HANDLED; |
264 | |||
265 | static struct task_struct *start_twl4030_irq_thread(long irq) | ||
266 | { | ||
267 | struct task_struct *thread; | ||
268 | |||
269 | init_completion(&irq_event); | ||
270 | thread = kthread_run(twl4030_irq_thread, (void *)irq, "twl4030-irq"); | ||
271 | if (!thread) | ||
272 | pr_err("twl4030: could not create irq %ld thread!\n", irq); | ||
273 | |||
274 | return thread; | ||
275 | } | 259 | } |
276 | |||
277 | /*----------------------------------------------------------------------*/ | 260 | /*----------------------------------------------------------------------*/ |
278 | 261 | ||
279 | /* | 262 | /* |
@@ -734,18 +717,28 @@ int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end) | |||
734 | } | 717 | } |
735 | 718 | ||
736 | /* install an irq handler to demultiplex the TWL4030 interrupt */ | 719 | /* install an irq handler to demultiplex the TWL4030 interrupt */ |
737 | task = start_twl4030_irq_thread(irq_num); | ||
738 | if (!task) { | ||
739 | pr_err("twl4030: irq thread FAIL\n"); | ||
740 | status = -ESRCH; | ||
741 | goto fail; | ||
742 | } | ||
743 | 720 | ||
744 | set_irq_data(irq_num, task); | ||
745 | set_irq_chained_handler(irq_num, handle_twl4030_pih); | ||
746 | 721 | ||
747 | return status; | 722 | init_completion(&irq_event); |
748 | 723 | ||
724 | status = request_irq(irq_num, handle_twl4030_pih, IRQF_DISABLED, | ||
725 | "TWL4030-PIH", &irq_event); | ||
726 | if (status < 0) { | ||
727 | pr_err("twl4030: could not claim irq%d: %d\n", irq_num, status); | ||
728 | goto fail_rqirq; | ||
729 | } | ||
730 | |||
731 | task = kthread_run(twl4030_irq_thread, (void *)irq_num, "twl4030-irq"); | ||
732 | if (IS_ERR(task)) { | ||
733 | pr_err("twl4030: could not create irq %d thread!\n", irq_num); | ||
734 | status = PTR_ERR(task); | ||
735 | goto fail_kthread; | ||
736 | } | ||
737 | return status; | ||
738 | fail_kthread: | ||
739 | free_irq(irq_num, &irq_event); | ||
740 | fail_rqirq: | ||
741 | /* clean up twl4030_sih_setup */ | ||
749 | fail: | 742 | fail: |
750 | for (i = irq_base; i < irq_end; i++) | 743 | for (i = irq_base; i < irq_end; i++) |
751 | set_irq_chip_and_handler(i, NULL, NULL); | 744 | set_irq_chip_and_handler(i, NULL, NULL); |
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c index d38a7acdb6ec..d019746551f3 100644 --- a/drivers/misc/cb710/sgbuf2.c +++ b/drivers/misc/cb710/sgbuf2.c | |||
@@ -114,7 +114,6 @@ static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data) | |||
114 | if (!left) | 114 | if (!left) |
115 | return; | 115 | return; |
116 | addr += len; | 116 | addr += len; |
117 | flush_kernel_dcache_page(miter->page); | ||
118 | } while (sg_dwiter_next(miter)); | 117 | } while (sg_dwiter_next(miter)); |
119 | } | 118 | } |
120 | 119 | ||
@@ -142,9 +141,6 @@ void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t da | |||
142 | return; | 141 | return; |
143 | } else | 142 | } else |
144 | sg_dwiter_write_slow(miter, data); | 143 | sg_dwiter_write_slow(miter, data); |
145 | |||
146 | if (miter->length == miter->consumed) | ||
147 | flush_kernel_dcache_page(miter->page); | ||
148 | } | 144 | } |
149 | EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); | 145 | EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); |
150 | 146 | ||
diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c index 11efefb1af51..4e72964a7b43 100644 --- a/drivers/mmc/host/cb710-mmc.c +++ b/drivers/mmc/host/cb710-mmc.c | |||
@@ -278,7 +278,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data) | |||
278 | if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8))) | 278 | if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8))) |
279 | return -EINVAL; | 279 | return -EINVAL; |
280 | 280 | ||
281 | sg_miter_start(&miter, data->sg, data->sg_len, 0); | 281 | sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_TO_SG); |
282 | 282 | ||
283 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, | 283 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, |
284 | 15, CB710_MMC_C2_READ_PIO_SIZE_MASK); | 284 | 15, CB710_MMC_C2_READ_PIO_SIZE_MASK); |
@@ -307,7 +307,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data) | |||
307 | goto out; | 307 | goto out; |
308 | } | 308 | } |
309 | out: | 309 | out: |
310 | cb710_sg_miter_stop_writing(&miter); | 310 | sg_miter_stop(&miter); |
311 | return err; | 311 | return err; |
312 | } | 312 | } |
313 | 313 | ||
@@ -322,7 +322,7 @@ static int cb710_mmc_send(struct cb710_slot *slot, struct mmc_data *data) | |||
322 | if (unlikely(data->blocks > 1 && data->blksz & 15)) | 322 | if (unlikely(data->blocks > 1 && data->blksz & 15)) |
323 | return -EINVAL; | 323 | return -EINVAL; |
324 | 324 | ||
325 | sg_miter_start(&miter, data->sg, data->sg_len, 0); | 325 | sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_FROM_SG); |
326 | 326 | ||
327 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, | 327 | cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, |
328 | 0, CB710_MMC_C2_READ_PIO_SIZE_MASK); | 328 | 0, CB710_MMC_C2_READ_PIO_SIZE_MASK); |
diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c index e0be21a4a696..bf98d7cc928a 100644 --- a/drivers/mmc/host/imxmmc.c +++ b/drivers/mmc/host/imxmmc.c | |||
@@ -652,7 +652,7 @@ static irqreturn_t imxmci_irq(int irq, void *devid) | |||
652 | set_bit(IMXMCI_PEND_STARTED_b, &host->pending_events); | 652 | set_bit(IMXMCI_PEND_STARTED_b, &host->pending_events); |
653 | tasklet_schedule(&host->tasklet); | 653 | tasklet_schedule(&host->tasklet); |
654 | 654 | ||
655 | return IRQ_RETVAL(handled);; | 655 | return IRQ_RETVAL(handled); |
656 | } | 656 | } |
657 | 657 | ||
658 | static void imxmci_tasklet_fnc(unsigned long data) | 658 | static void imxmci_tasklet_fnc(unsigned long data) |
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 62041c7e9246..fc96f8cb9c0b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c | |||
@@ -773,8 +773,14 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) | |||
773 | } | 773 | } |
774 | 774 | ||
775 | if (!(host->flags & SDHCI_REQ_USE_DMA)) { | 775 | if (!(host->flags & SDHCI_REQ_USE_DMA)) { |
776 | sg_miter_start(&host->sg_miter, | 776 | int flags; |
777 | data->sg, data->sg_len, SG_MITER_ATOMIC); | 777 | |
778 | flags = SG_MITER_ATOMIC; | ||
779 | if (host->data->flags & MMC_DATA_READ) | ||
780 | flags |= SG_MITER_TO_SG; | ||
781 | else | ||
782 | flags |= SG_MITER_FROM_SG; | ||
783 | sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags); | ||
778 | host->blocks = data->blocks; | 784 | host->blocks = data->blocks; |
779 | } | 785 | } |
780 | 786 | ||
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c index 3e00fa8ea65f..4a7c32895be5 100644 --- a/drivers/net/3c515.c +++ b/drivers/net/3c515.c | |||
@@ -832,7 +832,9 @@ static int corkscrew_open(struct net_device *dev) | |||
832 | skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ | 832 | skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ |
833 | vp->rx_ring[i].addr = isa_virt_to_bus(skb->data); | 833 | vp->rx_ring[i].addr = isa_virt_to_bus(skb->data); |
834 | } | 834 | } |
835 | vp->rx_ring[i - 1].next = isa_virt_to_bus(&vp->rx_ring[0]); /* Wrap the ring. */ | 835 | if (i != 0) |
836 | vp->rx_ring[i - 1].next = | ||
837 | isa_virt_to_bus(&vp->rx_ring[0]); /* Wrap the ring. */ | ||
836 | outl(isa_virt_to_bus(&vp->rx_ring[0]), ioaddr + UpListPtr); | 838 | outl(isa_virt_to_bus(&vp->rx_ring[0]), ioaddr + UpListPtr); |
837 | } | 839 | } |
838 | if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */ | 840 | if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */ |
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index c34aee91250b..c20416850948 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c | |||
@@ -2721,13 +2721,15 @@ dump_tx_ring(struct net_device *dev) | |||
2721 | &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]); | 2721 | &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]); |
2722 | issue_and_wait(dev, DownStall); | 2722 | issue_and_wait(dev, DownStall); |
2723 | for (i = 0; i < TX_RING_SIZE; i++) { | 2723 | for (i = 0; i < TX_RING_SIZE; i++) { |
2724 | pr_err(" %d: @%p length %8.8x status %8.8x\n", i, | 2724 | unsigned int length; |
2725 | &vp->tx_ring[i], | 2725 | |
2726 | #if DO_ZEROCOPY | 2726 | #if DO_ZEROCOPY |
2727 | le32_to_cpu(vp->tx_ring[i].frag[0].length), | 2727 | length = le32_to_cpu(vp->tx_ring[i].frag[0].length); |
2728 | #else | 2728 | #else |
2729 | le32_to_cpu(vp->tx_ring[i].length), | 2729 | length = le32_to_cpu(vp->tx_ring[i].length); |
2730 | #endif | 2730 | #endif |
2731 | pr_err(" %d: @%p length %8.8x status %8.8x\n", | ||
2732 | i, &vp->tx_ring[i], length, | ||
2731 | le32_to_cpu(vp->tx_ring[i].status)); | 2733 | le32_to_cpu(vp->tx_ring[i].status)); |
2732 | } | 2734 | } |
2733 | if (!stalled) | 2735 | if (!stalled) |
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index 1686dca28748..1f016d66684a 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c | |||
@@ -1474,13 +1474,13 @@ static void eexp_hw_init586(struct net_device *dev) | |||
1474 | outw(0x0000, ioaddr + 0x800c); | 1474 | outw(0x0000, ioaddr + 0x800c); |
1475 | outw(0x0000, ioaddr + 0x800e); | 1475 | outw(0x0000, ioaddr + 0x800e); |
1476 | 1476 | ||
1477 | for (i = 0; i < (sizeof(start_code)); i+=32) { | 1477 | for (i = 0; i < ARRAY_SIZE(start_code) * 2; i+=32) { |
1478 | int j; | 1478 | int j; |
1479 | outw(i, ioaddr + SM_PTR); | 1479 | outw(i, ioaddr + SM_PTR); |
1480 | for (j = 0; j < 16; j+=2) | 1480 | for (j = 0; j < 16 && (i+j)/2 < ARRAY_SIZE(start_code); j+=2) |
1481 | outw(start_code[(i+j)/2], | 1481 | outw(start_code[(i+j)/2], |
1482 | ioaddr+0x4000+j); | 1482 | ioaddr+0x4000+j); |
1483 | for (j = 0; j < 16; j+=2) | 1483 | for (j = 0; j < 16 && (i+j+16)/2 < ARRAY_SIZE(start_code); j+=2) |
1484 | outw(start_code[(i+j+16)/2], | 1484 | outw(start_code[(i+j+16)/2], |
1485 | ioaddr+0x8000+j); | 1485 | ioaddr+0x8000+j); |
1486 | } | 1486 | } |
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index 78952f8324e2..fa311a950996 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h | |||
@@ -40,7 +40,7 @@ | |||
40 | #include <asm/io.h> | 40 | #include <asm/io.h> |
41 | 41 | ||
42 | #define DRV_NAME "ehea" | 42 | #define DRV_NAME "ehea" |
43 | #define DRV_VERSION "EHEA_0101" | 43 | #define DRV_VERSION "EHEA_0102" |
44 | 44 | ||
45 | /* eHEA capability flags */ | 45 | /* eHEA capability flags */ |
46 | #define DLPAR_PORT_ADD_REM 1 | 46 | #define DLPAR_PORT_ADD_REM 1 |
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index e8d46cc1bec2..977c3d358279 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c | |||
@@ -1545,6 +1545,9 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr) | |||
1545 | { | 1545 | { |
1546 | int ret, i; | 1546 | int ret, i; |
1547 | 1547 | ||
1548 | if (pr->qp) | ||
1549 | netif_napi_del(&pr->napi); | ||
1550 | |||
1548 | ret = ehea_destroy_qp(pr->qp); | 1551 | ret = ehea_destroy_qp(pr->qp); |
1549 | 1552 | ||
1550 | if (!ret) { | 1553 | if (!ret) { |
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c index dbf06e9313cc..2234118eedbb 100644 --- a/drivers/net/gianfar_ethtool.c +++ b/drivers/net/gianfar_ethtool.c | |||
@@ -366,9 +366,8 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals | |||
366 | return -EINVAL; | 366 | return -EINVAL; |
367 | } | 367 | } |
368 | 368 | ||
369 | priv->rxic = mk_ic_value( | 369 | priv->rxic = mk_ic_value(cvals->rx_max_coalesced_frames, |
370 | gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs), | 370 | gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs)); |
371 | cvals->rx_max_coalesced_frames); | ||
372 | 371 | ||
373 | /* Set up tx coalescing */ | 372 | /* Set up tx coalescing */ |
374 | if ((cvals->tx_coalesce_usecs == 0) || | 373 | if ((cvals->tx_coalesce_usecs == 0) || |
@@ -390,9 +389,8 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals | |||
390 | return -EINVAL; | 389 | return -EINVAL; |
391 | } | 390 | } |
392 | 391 | ||
393 | priv->txic = mk_ic_value( | 392 | priv->txic = mk_ic_value(cvals->tx_max_coalesced_frames, |
394 | gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs), | 393 | gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs)); |
395 | cvals->tx_max_coalesced_frames); | ||
396 | 394 | ||
397 | gfar_write(&priv->regs->rxic, 0); | 395 | gfar_write(&priv->regs->rxic, 0); |
398 | if (priv->rxcoalescing) | 396 | if (priv->rxcoalescing) |
diff --git a/drivers/net/igbvf/vf.c b/drivers/net/igbvf/vf.c index 2a4faf9ade69..a9a61efa964c 100644 --- a/drivers/net/igbvf/vf.c +++ b/drivers/net/igbvf/vf.c | |||
@@ -274,6 +274,8 @@ static s32 e1000_set_vfta_vf(struct e1000_hw *hw, u16 vid, bool set) | |||
274 | 274 | ||
275 | err = mbx->ops.read_posted(hw, msgbuf, 2); | 275 | err = mbx->ops.read_posted(hw, msgbuf, 2); |
276 | 276 | ||
277 | msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS; | ||
278 | |||
277 | /* if nacked the vlan was rejected */ | 279 | /* if nacked the vlan was rejected */ |
278 | if (!err && (msgbuf[0] == (E1000_VF_SET_VLAN | E1000_VT_MSGTYPE_NACK))) | 280 | if (!err && (msgbuf[0] == (E1000_VF_SET_VLAN | E1000_VT_MSGTYPE_NACK))) |
279 | err = -E1000_ERR_MAC_INIT; | 281 | err = -E1000_ERR_MAC_INIT; |
@@ -317,6 +319,8 @@ static void e1000_rar_set_vf(struct e1000_hw *hw, u8 * addr, u32 index) | |||
317 | if (!ret_val) | 319 | if (!ret_val) |
318 | ret_val = mbx->ops.read_posted(hw, msgbuf, 3); | 320 | ret_val = mbx->ops.read_posted(hw, msgbuf, 3); |
319 | 321 | ||
322 | msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS; | ||
323 | |||
320 | /* if nacked the address was rejected, use "perm_addr" */ | 324 | /* if nacked the address was rejected, use "perm_addr" */ |
321 | if (!ret_val && | 325 | if (!ret_val && |
322 | (msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK))) | 326 | (msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK))) |
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 1b12c7ba275f..e11d83d5852b 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h | |||
@@ -96,6 +96,8 @@ | |||
96 | #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 | 96 | #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 |
97 | #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 | 97 | #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 |
98 | 98 | ||
99 | #define IXGBE_MAX_RSC_INT_RATE 162760 | ||
100 | |||
99 | /* wrapper around a pointer to a socket buffer, | 101 | /* wrapper around a pointer to a socket buffer, |
100 | * so a DMA handle can be stored along with the buffer */ | 102 | * so a DMA handle can be stored along with the buffer */ |
101 | struct ixgbe_tx_buffer { | 103 | struct ixgbe_tx_buffer { |
diff --git a/drivers/net/ixgbe/ixgbe_82598.c b/drivers/net/ixgbe/ixgbe_82598.c index b9923047ce11..522c03bc1dad 100644 --- a/drivers/net/ixgbe/ixgbe_82598.c +++ b/drivers/net/ixgbe/ixgbe_82598.c | |||
@@ -50,6 +50,51 @@ static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, | |||
50 | u8 *eeprom_data); | 50 | u8 *eeprom_data); |
51 | 51 | ||
52 | /** | 52 | /** |
53 | * ixgbe_set_pcie_completion_timeout - set pci-e completion timeout | ||
54 | * @hw: pointer to the HW structure | ||
55 | * | ||
56 | * The defaults for 82598 should be in the range of 50us to 50ms, | ||
57 | * however the hardware default for these parts is 500us to 1ms which is less | ||
58 | * than the 10ms recommended by the pci-e spec. To address this we need to | ||
59 | * increase the value to either 10ms to 250ms for capability version 1 config, | ||
60 | * or 16ms to 55ms for version 2. | ||
61 | **/ | ||
62 | void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw) | ||
63 | { | ||
64 | struct ixgbe_adapter *adapter = hw->back; | ||
65 | u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR); | ||
66 | u16 pcie_devctl2; | ||
67 | |||
68 | /* only take action if timeout value is defaulted to 0 */ | ||
69 | if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK) | ||
70 | goto out; | ||
71 | |||
72 | /* | ||
73 | * if capababilities version is type 1 we can write the | ||
74 | * timeout of 10ms to 250ms through the GCR register | ||
75 | */ | ||
76 | if (!(gcr & IXGBE_GCR_CAP_VER2)) { | ||
77 | gcr |= IXGBE_GCR_CMPL_TMOUT_10ms; | ||
78 | goto out; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * for version 2 capabilities we need to write the config space | ||
83 | * directly in order to set the completion timeout value for | ||
84 | * 16ms to 55ms | ||
85 | */ | ||
86 | pci_read_config_word(adapter->pdev, | ||
87 | IXGBE_PCI_DEVICE_CONTROL2, &pcie_devctl2); | ||
88 | pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms; | ||
89 | pci_write_config_word(adapter->pdev, | ||
90 | IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2); | ||
91 | out: | ||
92 | /* disable completion timeout resend */ | ||
93 | gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND; | ||
94 | IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr); | ||
95 | } | ||
96 | |||
97 | /** | ||
53 | * ixgbe_get_pcie_msix_count_82598 - Gets MSI-X vector count | 98 | * ixgbe_get_pcie_msix_count_82598 - Gets MSI-X vector count |
54 | * @hw: pointer to hardware structure | 99 | * @hw: pointer to hardware structure |
55 | * | 100 | * |
@@ -153,6 +198,26 @@ out: | |||
153 | } | 198 | } |
154 | 199 | ||
155 | /** | 200 | /** |
201 | * ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx | ||
202 | * @hw: pointer to hardware structure | ||
203 | * | ||
204 | * Starts the hardware using the generic start_hw function. | ||
205 | * Then set pcie completion timeout | ||
206 | **/ | ||
207 | s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) | ||
208 | { | ||
209 | s32 ret_val = 0; | ||
210 | |||
211 | ret_val = ixgbe_start_hw_generic(hw); | ||
212 | |||
213 | /* set the completion timeout for interface */ | ||
214 | if (ret_val == 0) | ||
215 | ixgbe_set_pcie_completion_timeout(hw); | ||
216 | |||
217 | return ret_val; | ||
218 | } | ||
219 | |||
220 | /** | ||
156 | * ixgbe_get_link_capabilities_82598 - Determines link capabilities | 221 | * ixgbe_get_link_capabilities_82598 - Determines link capabilities |
157 | * @hw: pointer to hardware structure | 222 | * @hw: pointer to hardware structure |
158 | * @speed: pointer to link speed | 223 | * @speed: pointer to link speed |
@@ -1085,7 +1150,7 @@ out: | |||
1085 | static struct ixgbe_mac_operations mac_ops_82598 = { | 1150 | static struct ixgbe_mac_operations mac_ops_82598 = { |
1086 | .init_hw = &ixgbe_init_hw_generic, | 1151 | .init_hw = &ixgbe_init_hw_generic, |
1087 | .reset_hw = &ixgbe_reset_hw_82598, | 1152 | .reset_hw = &ixgbe_reset_hw_82598, |
1088 | .start_hw = &ixgbe_start_hw_generic, | 1153 | .start_hw = &ixgbe_start_hw_82598, |
1089 | .clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic, | 1154 | .clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic, |
1090 | .get_media_type = &ixgbe_get_media_type_82598, | 1155 | .get_media_type = &ixgbe_get_media_type_82598, |
1091 | .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82598, | 1156 | .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82598, |
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 2a978008fd6e..79144e950a34 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c | |||
@@ -1975,7 +1975,10 @@ static int ixgbe_set_coalesce(struct net_device *netdev, | |||
1975 | * any other value means disable eitr, which is best | 1975 | * any other value means disable eitr, which is best |
1976 | * served by setting the interrupt rate very high | 1976 | * served by setting the interrupt rate very high |
1977 | */ | 1977 | */ |
1978 | adapter->eitr_param = IXGBE_MAX_INT_RATE; | 1978 | if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) |
1979 | adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE; | ||
1980 | else | ||
1981 | adapter->eitr_param = IXGBE_MAX_INT_RATE; | ||
1979 | adapter->itr_setting = 0; | 1982 | adapter->itr_setting = 0; |
1980 | } | 1983 | } |
1981 | 1984 | ||
@@ -1999,13 +2002,13 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data) | |||
1999 | 2002 | ||
2000 | ethtool_op_set_flags(netdev, data); | 2003 | ethtool_op_set_flags(netdev, data); |
2001 | 2004 | ||
2002 | if (!(adapter->flags & IXGBE_FLAG2_RSC_CAPABLE)) | 2005 | if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)) |
2003 | return 0; | 2006 | return 0; |
2004 | 2007 | ||
2005 | /* if state changes we need to update adapter->flags and reset */ | 2008 | /* if state changes we need to update adapter->flags and reset */ |
2006 | if ((!!(data & ETH_FLAG_LRO)) != | 2009 | if ((!!(data & ETH_FLAG_LRO)) != |
2007 | (!!(adapter->flags & IXGBE_FLAG2_RSC_ENABLED))) { | 2010 | (!!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))) { |
2008 | adapter->flags ^= IXGBE_FLAG2_RSC_ENABLED; | 2011 | adapter->flags2 ^= IXGBE_FLAG2_RSC_ENABLED; |
2009 | if (netif_running(netdev)) | 2012 | if (netif_running(netdev)) |
2010 | ixgbe_reinit_locked(adapter); | 2013 | ixgbe_reinit_locked(adapter); |
2011 | else | 2014 | else |
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 200454f30f6a..110c65ab5cb5 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c | |||
@@ -780,7 +780,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, | |||
780 | prefetch(next_rxd); | 780 | prefetch(next_rxd); |
781 | cleaned_count++; | 781 | cleaned_count++; |
782 | 782 | ||
783 | if (adapter->flags & IXGBE_FLAG2_RSC_CAPABLE) | 783 | if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) |
784 | rsc_count = ixgbe_get_rsc_count(rx_desc); | 784 | rsc_count = ixgbe_get_rsc_count(rx_desc); |
785 | 785 | ||
786 | if (rsc_count) { | 786 | if (rsc_count) { |
@@ -2036,7 +2036,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) | |||
2036 | IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); | 2036 | IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); |
2037 | } | 2037 | } |
2038 | } else { | 2038 | } else { |
2039 | if (!(adapter->flags & IXGBE_FLAG2_RSC_ENABLED) && | 2039 | if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && |
2040 | (netdev->mtu <= ETH_DATA_LEN)) | 2040 | (netdev->mtu <= ETH_DATA_LEN)) |
2041 | rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; | 2041 | rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; |
2042 | else | 2042 | else |
@@ -2165,7 +2165,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) | |||
2165 | IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); | 2165 | IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); |
2166 | } | 2166 | } |
2167 | 2167 | ||
2168 | if (adapter->flags & IXGBE_FLAG2_RSC_ENABLED) { | 2168 | if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { |
2169 | /* Enable 82599 HW-RSC */ | 2169 | /* Enable 82599 HW-RSC */ |
2170 | for (i = 0; i < adapter->num_rx_queues; i++) { | 2170 | for (i = 0; i < adapter->num_rx_queues; i++) { |
2171 | j = adapter->rx_ring[i].reg_idx; | 2171 | j = adapter->rx_ring[i].reg_idx; |
@@ -3812,8 +3812,8 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) | |||
3812 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82598; | 3812 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82598; |
3813 | } else if (hw->mac.type == ixgbe_mac_82599EB) { | 3813 | } else if (hw->mac.type == ixgbe_mac_82599EB) { |
3814 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82599; | 3814 | adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82599; |
3815 | adapter->flags |= IXGBE_FLAG2_RSC_CAPABLE; | 3815 | adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; |
3816 | adapter->flags |= IXGBE_FLAG2_RSC_ENABLED; | 3816 | adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; |
3817 | adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; | 3817 | adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; |
3818 | adapter->ring_feature[RING_F_FDIR].indices = | 3818 | adapter->ring_feature[RING_F_FDIR].indices = |
3819 | IXGBE_MAX_FDIR_INDICES; | 3819 | IXGBE_MAX_FDIR_INDICES; |
@@ -5360,12 +5360,19 @@ static int ixgbe_del_sanmac_netdev(struct net_device *dev) | |||
5360 | static void ixgbe_netpoll(struct net_device *netdev) | 5360 | static void ixgbe_netpoll(struct net_device *netdev) |
5361 | { | 5361 | { |
5362 | struct ixgbe_adapter *adapter = netdev_priv(netdev); | 5362 | struct ixgbe_adapter *adapter = netdev_priv(netdev); |
5363 | int i; | ||
5363 | 5364 | ||
5364 | disable_irq(adapter->pdev->irq); | ||
5365 | adapter->flags |= IXGBE_FLAG_IN_NETPOLL; | 5365 | adapter->flags |= IXGBE_FLAG_IN_NETPOLL; |
5366 | ixgbe_intr(adapter->pdev->irq, netdev); | 5366 | if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { |
5367 | int num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; | ||
5368 | for (i = 0; i < num_q_vectors; i++) { | ||
5369 | struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; | ||
5370 | ixgbe_msix_clean_many(0, q_vector); | ||
5371 | } | ||
5372 | } else { | ||
5373 | ixgbe_intr(adapter->pdev->irq, netdev); | ||
5374 | } | ||
5367 | adapter->flags &= ~IXGBE_FLAG_IN_NETPOLL; | 5375 | adapter->flags &= ~IXGBE_FLAG_IN_NETPOLL; |
5368 | enable_irq(adapter->pdev->irq); | ||
5369 | } | 5376 | } |
5370 | #endif | 5377 | #endif |
5371 | 5378 | ||
@@ -5611,7 +5618,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, | |||
5611 | if (pci_using_dac) | 5618 | if (pci_using_dac) |
5612 | netdev->features |= NETIF_F_HIGHDMA; | 5619 | netdev->features |= NETIF_F_HIGHDMA; |
5613 | 5620 | ||
5614 | if (adapter->flags & IXGBE_FLAG2_RSC_ENABLED) | 5621 | if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) |
5615 | netdev->features |= NETIF_F_LRO; | 5622 | netdev->features |= NETIF_F_LRO; |
5616 | 5623 | ||
5617 | /* make sure the EEPROM is good */ | 5624 | /* make sure the EEPROM is good */ |
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index fa87309dc087..be90eb4575f6 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h | |||
@@ -718,6 +718,12 @@ | |||
718 | #define IXGBE_ECC_STATUS_82599 0x110E0 | 718 | #define IXGBE_ECC_STATUS_82599 0x110E0 |
719 | #define IXGBE_BAR_CTRL_82599 0x110F4 | 719 | #define IXGBE_BAR_CTRL_82599 0x110F4 |
720 | 720 | ||
721 | /* PCI Express Control */ | ||
722 | #define IXGBE_GCR_CMPL_TMOUT_MASK 0x0000F000 | ||
723 | #define IXGBE_GCR_CMPL_TMOUT_10ms 0x00001000 | ||
724 | #define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000 | ||
725 | #define IXGBE_GCR_CAP_VER2 0x00040000 | ||
726 | |||
721 | /* Time Sync Registers */ | 727 | /* Time Sync Registers */ |
722 | #define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ | 728 | #define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ |
723 | #define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ | 729 | #define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ |
@@ -1521,6 +1527,7 @@ | |||
1521 | 1527 | ||
1522 | /* PCI Bus Info */ | 1528 | /* PCI Bus Info */ |
1523 | #define IXGBE_PCI_LINK_STATUS 0xB2 | 1529 | #define IXGBE_PCI_LINK_STATUS 0xB2 |
1530 | #define IXGBE_PCI_DEVICE_CONTROL2 0xC8 | ||
1524 | #define IXGBE_PCI_LINK_WIDTH 0x3F0 | 1531 | #define IXGBE_PCI_LINK_WIDTH 0x3F0 |
1525 | #define IXGBE_PCI_LINK_WIDTH_1 0x10 | 1532 | #define IXGBE_PCI_LINK_WIDTH_1 0x10 |
1526 | #define IXGBE_PCI_LINK_WIDTH_2 0x20 | 1533 | #define IXGBE_PCI_LINK_WIDTH_2 0x20 |
@@ -1531,6 +1538,7 @@ | |||
1531 | #define IXGBE_PCI_LINK_SPEED_5000 0x2 | 1538 | #define IXGBE_PCI_LINK_SPEED_5000 0x2 |
1532 | #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E | 1539 | #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E |
1533 | #define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 | 1540 | #define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 |
1541 | #define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 | ||
1534 | 1542 | ||
1535 | /* Number of 100 microseconds we wait for PCI Express master disable */ | 1543 | /* Number of 100 microseconds we wait for PCI Express master disable */ |
1536 | #define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 | 1544 | #define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 |
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 08c43f2ae72b..5a88b3f57693 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c | |||
@@ -249,6 +249,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, | |||
249 | pci_unmap_page(mdev->pdev, | 249 | pci_unmap_page(mdev->pdev, |
250 | (dma_addr_t) be64_to_cpu(data->addr), | 250 | (dma_addr_t) be64_to_cpu(data->addr), |
251 | frag->size, PCI_DMA_TODEVICE); | 251 | frag->size, PCI_DMA_TODEVICE); |
252 | ++data; | ||
252 | } | 253 | } |
253 | } | 254 | } |
254 | /* Stamp the freed descriptor */ | 255 | /* Stamp the freed descriptor */ |
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 637ac8b89bac..3cd8cfcf627b 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c | |||
@@ -221,7 +221,7 @@ netxen_napi_disable(struct netxen_adapter *adapter) | |||
221 | } | 221 | } |
222 | } | 222 | } |
223 | 223 | ||
224 | static int nx_set_dma_mask(struct netxen_adapter *adapter, uint8_t revision_id) | 224 | static int nx_set_dma_mask(struct netxen_adapter *adapter) |
225 | { | 225 | { |
226 | struct pci_dev *pdev = adapter->pdev; | 226 | struct pci_dev *pdev = adapter->pdev; |
227 | uint64_t mask, cmask; | 227 | uint64_t mask, cmask; |
@@ -229,19 +229,17 @@ static int nx_set_dma_mask(struct netxen_adapter *adapter, uint8_t revision_id) | |||
229 | adapter->pci_using_dac = 0; | 229 | adapter->pci_using_dac = 0; |
230 | 230 | ||
231 | mask = DMA_BIT_MASK(32); | 231 | mask = DMA_BIT_MASK(32); |
232 | /* | ||
233 | * Consistent DMA mask is set to 32 bit because it cannot be set to | ||
234 | * 35 bits. For P3 also leave it at 32 bits for now. Only the rings | ||
235 | * come off this pool. | ||
236 | */ | ||
237 | cmask = DMA_BIT_MASK(32); | 232 | cmask = DMA_BIT_MASK(32); |
238 | 233 | ||
234 | if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { | ||
239 | #ifndef CONFIG_IA64 | 235 | #ifndef CONFIG_IA64 |
240 | if (revision_id >= NX_P3_B0) | ||
241 | mask = DMA_BIT_MASK(39); | ||
242 | else if (revision_id == NX_P2_C1) | ||
243 | mask = DMA_BIT_MASK(35); | 236 | mask = DMA_BIT_MASK(35); |
244 | #endif | 237 | #endif |
238 | } else { | ||
239 | mask = DMA_BIT_MASK(39); | ||
240 | cmask = mask; | ||
241 | } | ||
242 | |||
245 | if (pci_set_dma_mask(pdev, mask) == 0 && | 243 | if (pci_set_dma_mask(pdev, mask) == 0 && |
246 | pci_set_consistent_dma_mask(pdev, cmask) == 0) { | 244 | pci_set_consistent_dma_mask(pdev, cmask) == 0) { |
247 | adapter->pci_using_dac = 1; | 245 | adapter->pci_using_dac = 1; |
@@ -256,7 +254,7 @@ static int | |||
256 | nx_update_dma_mask(struct netxen_adapter *adapter) | 254 | nx_update_dma_mask(struct netxen_adapter *adapter) |
257 | { | 255 | { |
258 | int change, shift, err; | 256 | int change, shift, err; |
259 | uint64_t mask, old_mask; | 257 | uint64_t mask, old_mask, old_cmask; |
260 | struct pci_dev *pdev = adapter->pdev; | 258 | struct pci_dev *pdev = adapter->pdev; |
261 | 259 | ||
262 | change = 0; | 260 | change = 0; |
@@ -272,14 +270,29 @@ nx_update_dma_mask(struct netxen_adapter *adapter) | |||
272 | 270 | ||
273 | if (change) { | 271 | if (change) { |
274 | old_mask = pdev->dma_mask; | 272 | old_mask = pdev->dma_mask; |
273 | old_cmask = pdev->dev.coherent_dma_mask; | ||
274 | |||
275 | mask = (1ULL<<(32+shift)) - 1; | 275 | mask = (1ULL<<(32+shift)) - 1; |
276 | 276 | ||
277 | err = pci_set_dma_mask(pdev, mask); | 277 | err = pci_set_dma_mask(pdev, mask); |
278 | if (err) | 278 | if (err) |
279 | return pci_set_dma_mask(pdev, old_mask); | 279 | goto err_out; |
280 | |||
281 | if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) { | ||
282 | |||
283 | err = pci_set_consistent_dma_mask(pdev, mask); | ||
284 | if (err) | ||
285 | goto err_out; | ||
286 | } | ||
287 | dev_info(&pdev->dev, "using %d-bit dma mask\n", 32+shift); | ||
280 | } | 288 | } |
281 | 289 | ||
282 | return 0; | 290 | return 0; |
291 | |||
292 | err_out: | ||
293 | pci_set_dma_mask(pdev, old_mask); | ||
294 | pci_set_consistent_dma_mask(pdev, old_cmask); | ||
295 | return err; | ||
283 | } | 296 | } |
284 | 297 | ||
285 | static void netxen_check_options(struct netxen_adapter *adapter) | 298 | static void netxen_check_options(struct netxen_adapter *adapter) |
@@ -1006,7 +1019,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
1006 | revision_id = pdev->revision; | 1019 | revision_id = pdev->revision; |
1007 | adapter->ahw.revision_id = revision_id; | 1020 | adapter->ahw.revision_id = revision_id; |
1008 | 1021 | ||
1009 | err = nx_set_dma_mask(adapter, revision_id); | 1022 | err = nx_set_dma_mask(adapter); |
1010 | if (err) | 1023 | if (err) |
1011 | goto err_out_free_netdev; | 1024 | goto err_out_free_netdev; |
1012 | 1025 | ||
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 28368157dac4..a646a445fda9 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c | |||
@@ -1611,8 +1611,11 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
1611 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 | 1611 | if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 |
1612 | && pcnet32_dwio_check(ioaddr)) { | 1612 | && pcnet32_dwio_check(ioaddr)) { |
1613 | a = &pcnet32_dwio; | 1613 | a = &pcnet32_dwio; |
1614 | } else | 1614 | } else { |
1615 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1616 | printk(KERN_ERR PFX "No access methods\n"); | ||
1615 | goto err_release_region; | 1617 | goto err_release_region; |
1618 | } | ||
1616 | } | 1619 | } |
1617 | 1620 | ||
1618 | chip_version = | 1621 | chip_version = |
@@ -1719,7 +1722,9 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
1719 | ret = -ENOMEM; | 1722 | ret = -ENOMEM; |
1720 | goto err_release_region; | 1723 | goto err_release_region; |
1721 | } | 1724 | } |
1722 | SET_NETDEV_DEV(dev, &pdev->dev); | 1725 | |
1726 | if (pdev) | ||
1727 | SET_NETDEV_DEV(dev, &pdev->dev); | ||
1723 | 1728 | ||
1724 | if (pcnet32_debug & NETIF_MSG_PROBE) | 1729 | if (pcnet32_debug & NETIF_MSG_PROBE) |
1725 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); | 1730 | printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); |
@@ -1818,7 +1823,6 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
1818 | 1823 | ||
1819 | spin_lock_init(&lp->lock); | 1824 | spin_lock_init(&lp->lock); |
1820 | 1825 | ||
1821 | SET_NETDEV_DEV(dev, &pdev->dev); | ||
1822 | lp->name = chipname; | 1826 | lp->name = chipname; |
1823 | lp->shared_irq = shared; | 1827 | lp->shared_irq = shared; |
1824 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ | 1828 | lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ |
@@ -1852,12 +1856,6 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
1852 | ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) | 1856 | ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) |
1853 | lp->options |= PCNET32_PORT_FD; | 1857 | lp->options |= PCNET32_PORT_FD; |
1854 | 1858 | ||
1855 | if (!a) { | ||
1856 | if (pcnet32_debug & NETIF_MSG_PROBE) | ||
1857 | printk(KERN_ERR PFX "No access methods\n"); | ||
1858 | ret = -ENODEV; | ||
1859 | goto err_free_consistent; | ||
1860 | } | ||
1861 | lp->a = *a; | 1859 | lp->a = *a; |
1862 | 1860 | ||
1863 | /* prior to register_netdev, dev->name is not yet correct */ | 1861 | /* prior to register_netdev, dev->name is not yet correct */ |
@@ -1973,14 +1971,13 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) | |||
1973 | 1971 | ||
1974 | return 0; | 1972 | return 0; |
1975 | 1973 | ||
1976 | err_free_ring: | 1974 | err_free_ring: |
1977 | pcnet32_free_ring(dev); | 1975 | pcnet32_free_ring(dev); |
1978 | err_free_consistent: | ||
1979 | pci_free_consistent(lp->pci_dev, sizeof(*lp->init_block), | 1976 | pci_free_consistent(lp->pci_dev, sizeof(*lp->init_block), |
1980 | lp->init_block, lp->init_dma_addr); | 1977 | lp->init_block, lp->init_dma_addr); |
1981 | err_free_netdev: | 1978 | err_free_netdev: |
1982 | free_netdev(dev); | 1979 | free_netdev(dev); |
1983 | err_release_region: | 1980 | err_release_region: |
1984 | release_region(ioaddr, PCNET32_TOTAL_SIZE); | 1981 | release_region(ioaddr, PCNET32_TOTAL_SIZE); |
1985 | return ret; | 1982 | return ret; |
1986 | } | 1983 | } |
@@ -2089,6 +2086,7 @@ static void pcnet32_free_ring(struct net_device *dev) | |||
2089 | static int pcnet32_open(struct net_device *dev) | 2086 | static int pcnet32_open(struct net_device *dev) |
2090 | { | 2087 | { |
2091 | struct pcnet32_private *lp = netdev_priv(dev); | 2088 | struct pcnet32_private *lp = netdev_priv(dev); |
2089 | struct pci_dev *pdev = lp->pci_dev; | ||
2092 | unsigned long ioaddr = dev->base_addr; | 2090 | unsigned long ioaddr = dev->base_addr; |
2093 | u16 val; | 2091 | u16 val; |
2094 | int i; | 2092 | int i; |
@@ -2149,9 +2147,9 @@ static int pcnet32_open(struct net_device *dev) | |||
2149 | lp->a.write_csr(ioaddr, 124, val); | 2147 | lp->a.write_csr(ioaddr, 124, val); |
2150 | 2148 | ||
2151 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ | 2149 | /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ |
2152 | if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT && | 2150 | if (pdev && pdev->subsystem_vendor == PCI_VENDOR_ID_AT && |
2153 | (lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || | 2151 | (pdev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || |
2154 | lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { | 2152 | pdev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { |
2155 | if (lp->options & PCNET32_PORT_ASEL) { | 2153 | if (lp->options & PCNET32_PORT_ASEL) { |
2156 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; | 2154 | lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; |
2157 | if (netif_msg_link(lp)) | 2155 | if (netif_msg_link(lp)) |
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 639d11bc444e..cd37d739ac74 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c | |||
@@ -1384,7 +1384,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
1384 | 1384 | ||
1385 | /* create a fragment for each channel */ | 1385 | /* create a fragment for each channel */ |
1386 | bits = B; | 1386 | bits = B; |
1387 | while (nfree > 0 && len > 0) { | 1387 | while (len > 0) { |
1388 | list = list->next; | 1388 | list = list->next; |
1389 | if (list == &ppp->channels) { | 1389 | if (list == &ppp->channels) { |
1390 | i = 0; | 1390 | i = 0; |
@@ -1431,29 +1431,31 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
1431 | *otherwise divide it according to the speed | 1431 | *otherwise divide it according to the speed |
1432 | *of the channel we are going to transmit on | 1432 | *of the channel we are going to transmit on |
1433 | */ | 1433 | */ |
1434 | if (pch->speed == 0) { | 1434 | if (nfree > 0) { |
1435 | flen = totlen/nfree ; | 1435 | if (pch->speed == 0) { |
1436 | if (nbigger > 0) { | 1436 | flen = totlen/nfree ; |
1437 | flen++; | 1437 | if (nbigger > 0) { |
1438 | nbigger--; | 1438 | flen++; |
1439 | } | 1439 | nbigger--; |
1440 | } else { | 1440 | } |
1441 | flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) / | 1441 | } else { |
1442 | ((totspeed*totfree)/pch->speed)) - hdrlen; | 1442 | flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) / |
1443 | if (nbigger > 0) { | 1443 | ((totspeed*totfree)/pch->speed)) - hdrlen; |
1444 | flen += ((totfree - nzero)*pch->speed)/totspeed; | 1444 | if (nbigger > 0) { |
1445 | nbigger -= ((totfree - nzero)*pch->speed)/ | 1445 | flen += ((totfree - nzero)*pch->speed)/totspeed; |
1446 | nbigger -= ((totfree - nzero)*pch->speed)/ | ||
1446 | totspeed; | 1447 | totspeed; |
1448 | } | ||
1447 | } | 1449 | } |
1450 | nfree--; | ||
1448 | } | 1451 | } |
1449 | nfree--; | ||
1450 | 1452 | ||
1451 | /* | 1453 | /* |
1452 | *check if we are on the last channel or | 1454 | *check if we are on the last channel or |
1453 | *we exceded the lenght of the data to | 1455 | *we exceded the lenght of the data to |
1454 | *fragment | 1456 | *fragment |
1455 | */ | 1457 | */ |
1456 | if ((nfree == 0) || (flen > len)) | 1458 | if ((nfree <= 0) || (flen > len)) |
1457 | flen = len; | 1459 | flen = len; |
1458 | /* | 1460 | /* |
1459 | *it is not worth to tx on slow channels: | 1461 | *it is not worth to tx on slow channels: |
@@ -1467,7 +1469,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
1467 | continue; | 1469 | continue; |
1468 | } | 1470 | } |
1469 | 1471 | ||
1470 | mtu = pch->chan->mtu + 2 - hdrlen; | 1472 | mtu = pch->chan->mtu - hdrlen; |
1471 | if (mtu < 4) | 1473 | if (mtu < 4) |
1472 | mtu = 4; | 1474 | mtu = 4; |
1473 | if (flen > mtu) | 1475 | if (flen > mtu) |
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index f0031f1f97e5..5f2090233d7b 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c | |||
@@ -1063,6 +1063,7 @@ static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1063 | else { | 1063 | else { |
1064 | int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); | 1064 | int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); |
1065 | 1065 | ||
1066 | po = NULL; | ||
1066 | while (++hash < PPPOE_HASH_SIZE) { | 1067 | while (++hash < PPPOE_HASH_SIZE) { |
1067 | po = pn->hash_table[hash]; | 1068 | po = pn->hash_table[hash]; |
1068 | if (po) | 1069 | if (po) |
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index e7935d09c896..e0f9219a0aea 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c | |||
@@ -2680,6 +2680,7 @@ out_unregister_pppol2tp_proto: | |||
2680 | static void __exit pppol2tp_exit(void) | 2680 | static void __exit pppol2tp_exit(void) |
2681 | { | 2681 | { |
2682 | unregister_pppox_proto(PX_PROTO_OL2TP); | 2682 | unregister_pppox_proto(PX_PROTO_OL2TP); |
2683 | unregister_pernet_gen_device(pppol2tp_net_id, &pppol2tp_net_ops); | ||
2683 | proto_unregister(&pppol2tp_sk_proto); | 2684 | proto_unregister(&pppol2tp_sk_proto); |
2684 | } | 2685 | } |
2685 | 2686 | ||
diff --git a/drivers/net/s6gmac.c b/drivers/net/s6gmac.c index 5345e47b35ac..4525cbe8dd69 100644 --- a/drivers/net/s6gmac.c +++ b/drivers/net/s6gmac.c | |||
@@ -793,7 +793,7 @@ static inline int s6gmac_phy_start(struct net_device *dev) | |||
793 | struct s6gmac *pd = netdev_priv(dev); | 793 | struct s6gmac *pd = netdev_priv(dev); |
794 | int i = 0; | 794 | int i = 0; |
795 | struct phy_device *p = NULL; | 795 | struct phy_device *p = NULL; |
796 | while ((!(p = pd->mii.bus->phy_map[i])) && (i < PHY_MAX_ADDR)) | 796 | while ((i < PHY_MAX_ADDR) && (!(p = pd->mii.bus->phy_map[i]))) |
797 | i++; | 797 | i++; |
798 | p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0, | 798 | p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0, |
799 | PHY_INTERFACE_MODE_RGMII); | 799 | PHY_INTERFACE_MODE_RGMII); |
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 3550c5dcd93c..0a551d8f5d95 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c | |||
@@ -1488,6 +1488,8 @@ static int sky2_up(struct net_device *dev) | |||
1488 | sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL); | 1488 | sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL); |
1489 | #endif | 1489 | #endif |
1490 | 1490 | ||
1491 | sky2->restarting = 0; | ||
1492 | |||
1491 | err = sky2_rx_start(sky2); | 1493 | err = sky2_rx_start(sky2); |
1492 | if (err) | 1494 | if (err) |
1493 | goto err_out; | 1495 | goto err_out; |
@@ -1500,6 +1502,9 @@ static int sky2_up(struct net_device *dev) | |||
1500 | 1502 | ||
1501 | sky2_set_multicast(dev); | 1503 | sky2_set_multicast(dev); |
1502 | 1504 | ||
1505 | /* wake queue incase we are restarting */ | ||
1506 | netif_wake_queue(dev); | ||
1507 | |||
1503 | if (netif_msg_ifup(sky2)) | 1508 | if (netif_msg_ifup(sky2)) |
1504 | printk(KERN_INFO PFX "%s: enabling interface\n", dev->name); | 1509 | printk(KERN_INFO PFX "%s: enabling interface\n", dev->name); |
1505 | return 0; | 1510 | return 0; |
@@ -1533,6 +1538,8 @@ static inline int tx_dist(unsigned tail, unsigned head) | |||
1533 | /* Number of list elements available for next tx */ | 1538 | /* Number of list elements available for next tx */ |
1534 | static inline int tx_avail(const struct sky2_port *sky2) | 1539 | static inline int tx_avail(const struct sky2_port *sky2) |
1535 | { | 1540 | { |
1541 | if (unlikely(sky2->restarting)) | ||
1542 | return 0; | ||
1536 | return sky2->tx_pending - tx_dist(sky2->tx_cons, sky2->tx_prod); | 1543 | return sky2->tx_pending - tx_dist(sky2->tx_cons, sky2->tx_prod); |
1537 | } | 1544 | } |
1538 | 1545 | ||
@@ -1818,6 +1825,10 @@ static int sky2_down(struct net_device *dev) | |||
1818 | if (netif_msg_ifdown(sky2)) | 1825 | if (netif_msg_ifdown(sky2)) |
1819 | printk(KERN_INFO PFX "%s: disabling interface\n", dev->name); | 1826 | printk(KERN_INFO PFX "%s: disabling interface\n", dev->name); |
1820 | 1827 | ||
1828 | /* explicitly shut off tx incase we're restarting */ | ||
1829 | sky2->restarting = 1; | ||
1830 | netif_tx_disable(dev); | ||
1831 | |||
1821 | /* Force flow control off */ | 1832 | /* Force flow control off */ |
1822 | sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF); | 1833 | sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF); |
1823 | 1834 | ||
@@ -2359,7 +2370,7 @@ static inline void sky2_tx_done(struct net_device *dev, u16 last) | |||
2359 | { | 2370 | { |
2360 | struct sky2_port *sky2 = netdev_priv(dev); | 2371 | struct sky2_port *sky2 = netdev_priv(dev); |
2361 | 2372 | ||
2362 | if (netif_running(dev)) { | 2373 | if (likely(netif_running(dev) && !sky2->restarting)) { |
2363 | netif_tx_lock(dev); | 2374 | netif_tx_lock(dev); |
2364 | sky2_tx_complete(sky2, last); | 2375 | sky2_tx_complete(sky2, last); |
2365 | netif_tx_unlock(dev); | 2376 | netif_tx_unlock(dev); |
@@ -4283,6 +4294,7 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, | |||
4283 | spin_lock_init(&sky2->phy_lock); | 4294 | spin_lock_init(&sky2->phy_lock); |
4284 | sky2->tx_pending = TX_DEF_PENDING; | 4295 | sky2->tx_pending = TX_DEF_PENDING; |
4285 | sky2->rx_pending = RX_DEF_PENDING; | 4296 | sky2->rx_pending = RX_DEF_PENDING; |
4297 | sky2->restarting = 0; | ||
4286 | 4298 | ||
4287 | hw->dev[port] = dev; | 4299 | hw->dev[port] = dev; |
4288 | 4300 | ||
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index b5549c9e5107..4486b066b43f 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h | |||
@@ -2051,6 +2051,7 @@ struct sky2_port { | |||
2051 | u8 duplex; /* DUPLEX_HALF, DUPLEX_FULL */ | 2051 | u8 duplex; /* DUPLEX_HALF, DUPLEX_FULL */ |
2052 | u8 rx_csum; | 2052 | u8 rx_csum; |
2053 | u8 wol; | 2053 | u8 wol; |
2054 | u8 restarting; | ||
2054 | enum flow_control flow_mode; | 2055 | enum flow_control flow_mode; |
2055 | enum flow_control flow_status; | 2056 | enum flow_control flow_status; |
2056 | 2057 | ||
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index eb72d2e9ab3d..acfdccd44567 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c | |||
@@ -5059,7 +5059,7 @@ mii_get_phy(struct net_device *dev) | |||
5059 | if ((id == 0) || (id == 65535)) continue; /* Valid ID? */ | 5059 | if ((id == 0) || (id == 65535)) continue; /* Valid ID? */ |
5060 | for (j=0; j<limit; j++) { /* Search PHY table */ | 5060 | for (j=0; j<limit; j++) { /* Search PHY table */ |
5061 | if (id != phy_info[j].id) continue; /* ID match? */ | 5061 | if (id != phy_info[j].id) continue; /* ID match? */ |
5062 | for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++); | 5062 | for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); |
5063 | if (k < DE4X5_MAX_PHY) { | 5063 | if (k < DE4X5_MAX_PHY) { |
5064 | memcpy((char *)&lp->phy[k], | 5064 | memcpy((char *)&lp->phy[k], |
5065 | (char *)&phy_info[j], sizeof(struct phy_table)); | 5065 | (char *)&phy_info[j], sizeof(struct phy_table)); |
@@ -5072,7 +5072,7 @@ mii_get_phy(struct net_device *dev) | |||
5072 | break; | 5072 | break; |
5073 | } | 5073 | } |
5074 | if ((j == limit) && (i < DE4X5_MAX_MII)) { | 5074 | if ((j == limit) && (i < DE4X5_MAX_MII)) { |
5075 | for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++); | 5075 | for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); |
5076 | lp->phy[k].addr = i; | 5076 | lp->phy[k].addr = i; |
5077 | lp->phy[k].id = id; | 5077 | lp->phy[k].id = id; |
5078 | lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ | 5078 | lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ |
@@ -5091,7 +5091,7 @@ mii_get_phy(struct net_device *dev) | |||
5091 | purgatory: | 5091 | purgatory: |
5092 | lp->active = 0; | 5092 | lp->active = 0; |
5093 | if (lp->phy[0].id) { /* Reset the PHY devices */ | 5093 | if (lp->phy[0].id) { /* Reset the PHY devices */ |
5094 | for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++) { /*For each PHY*/ | 5094 | for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++) { /*For each PHY*/ |
5095 | mii_wr(MII_CR_RST, MII_CR, lp->phy[k].addr, DE4X5_MII); | 5095 | mii_wr(MII_CR_RST, MII_CR, lp->phy[k].addr, DE4X5_MII); |
5096 | while (mii_rd(MII_CR, lp->phy[k].addr, DE4X5_MII) & MII_CR_RST); | 5096 | while (mii_rd(MII_CR, lp->phy[k].addr, DE4X5_MII) & MII_CR_RST); |
5097 | 5097 | ||
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index c70604f0329e..8ce5e4cee168 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c | |||
@@ -5918,20 +5918,19 @@ static int airo_set_essid(struct net_device *dev, | |||
5918 | readSsidRid(local, &SSID_rid); | 5918 | readSsidRid(local, &SSID_rid); |
5919 | 5919 | ||
5920 | /* Check if we asked for `any' */ | 5920 | /* Check if we asked for `any' */ |
5921 | if(dwrq->flags == 0) { | 5921 | if (dwrq->flags == 0) { |
5922 | /* Just send an empty SSID list */ | 5922 | /* Just send an empty SSID list */ |
5923 | memset(&SSID_rid, 0, sizeof(SSID_rid)); | 5923 | memset(&SSID_rid, 0, sizeof(SSID_rid)); |
5924 | } else { | 5924 | } else { |
5925 | int index = (dwrq->flags & IW_ENCODE_INDEX) - 1; | 5925 | unsigned index = (dwrq->flags & IW_ENCODE_INDEX) - 1; |
5926 | 5926 | ||
5927 | /* Check the size of the string */ | 5927 | /* Check the size of the string */ |
5928 | if(dwrq->length > IW_ESSID_MAX_SIZE) { | 5928 | if (dwrq->length > IW_ESSID_MAX_SIZE) |
5929 | return -E2BIG ; | 5929 | return -E2BIG ; |
5930 | } | 5930 | |
5931 | /* Check if index is valid */ | 5931 | /* Check if index is valid */ |
5932 | if((index < 0) || (index >= 4)) { | 5932 | if (index >= ARRAY_SIZE(SSID_rid.ssids)) |
5933 | return -EINVAL; | 5933 | return -EINVAL; |
5934 | } | ||
5935 | 5934 | ||
5936 | /* Set the SSID */ | 5935 | /* Set the SSID */ |
5937 | memset(SSID_rid.ssids[index].ssid, 0, | 5936 | memset(SSID_rid.ssids[index].ssid, 0, |
@@ -6819,7 +6818,7 @@ static int airo_set_txpow(struct net_device *dev, | |||
6819 | return -EINVAL; | 6818 | return -EINVAL; |
6820 | } | 6819 | } |
6821 | clear_bit (FLAG_RADIO_OFF, &local->flags); | 6820 | clear_bit (FLAG_RADIO_OFF, &local->flags); |
6822 | for (i = 0; cap_rid.txPowerLevels[i] && (i < 8); i++) | 6821 | for (i = 0; i < 8 && cap_rid.txPowerLevels[i]; i++) |
6823 | if (v == cap_rid.txPowerLevels[i]) { | 6822 | if (v == cap_rid.txPowerLevels[i]) { |
6824 | readConfigRid(local, 1); | 6823 | readConfigRid(local, 1); |
6825 | local->config.txPower = v; | 6824 | local->config.txPower = v; |
diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c index a2fda702b620..ce0e86c36a82 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.c +++ b/drivers/net/wireless/ath/ath9k/eeprom.c | |||
@@ -460,7 +460,7 @@ static int ath9k_hw_4k_check_eeprom(struct ath_hw *ah) | |||
460 | integer = swab32(eep->modalHeader.antCtrlCommon); | 460 | integer = swab32(eep->modalHeader.antCtrlCommon); |
461 | eep->modalHeader.antCtrlCommon = integer; | 461 | eep->modalHeader.antCtrlCommon = integer; |
462 | 462 | ||
463 | for (i = 0; i < AR5416_MAX_CHAINS; i++) { | 463 | for (i = 0; i < AR5416_EEP4K_MAX_CHAINS; i++) { |
464 | integer = swab32(eep->modalHeader.antCtrlChain[i]); | 464 | integer = swab32(eep->modalHeader.antCtrlChain[i]); |
465 | eep->modalHeader.antCtrlChain[i] = integer; | 465 | eep->modalHeader.antCtrlChain[i] = integer; |
466 | } | 466 | } |
@@ -914,7 +914,7 @@ static void ath9k_hw_set_4k_power_per_rate_table(struct ath_hw *ah, | |||
914 | ctlMode, numCtlModes, isHt40CtlMode, | 914 | ctlMode, numCtlModes, isHt40CtlMode, |
915 | (pCtlMode[ctlMode] & EXT_ADDITIVE)); | 915 | (pCtlMode[ctlMode] & EXT_ADDITIVE)); |
916 | 916 | ||
917 | for (i = 0; (i < AR5416_NUM_CTLS) && | 917 | for (i = 0; (i < AR5416_EEP4K_NUM_CTLS) && |
918 | pEepData->ctlIndex[i]; i++) { | 918 | pEepData->ctlIndex[i]; i++) { |
919 | DPRINTF(ah->ah_sc, ATH_DBG_EEPROM, | 919 | DPRINTF(ah->ah_sc, ATH_DBG_EEPROM, |
920 | " LOOP-Ctlidx %d: cfgCtl 0x%2.2x " | 920 | " LOOP-Ctlidx %d: cfgCtl 0x%2.2x " |
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h index fbb3a573463e..2de6471d4be9 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945.h | |||
@@ -112,7 +112,7 @@ enum iwl3945_antenna { | |||
112 | #define IWL_TX_FIFO_NONE 7 | 112 | #define IWL_TX_FIFO_NONE 7 |
113 | 113 | ||
114 | /* Minimum number of queues. MAX_NUM is defined in hw specific files */ | 114 | /* Minimum number of queues. MAX_NUM is defined in hw specific files */ |
115 | #define IWL_MIN_NUM_QUEUES 4 | 115 | #define IWL39_MIN_NUM_QUEUES 4 |
116 | 116 | ||
117 | #define IEEE80211_DATA_LEN 2304 | 117 | #define IEEE80211_DATA_LEN 2304 |
118 | #define IEEE80211_4ADDR_LEN 30 | 118 | #define IEEE80211_4ADDR_LEN 30 |
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 6ab07165ea28..18b135f510e5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c | |||
@@ -1332,6 +1332,9 @@ int iwl_setup_mac(struct iwl_priv *priv) | |||
1332 | 1332 | ||
1333 | hw->wiphy->custom_regulatory = true; | 1333 | hw->wiphy->custom_regulatory = true; |
1334 | 1334 | ||
1335 | /* Firmware does not support this */ | ||
1336 | hw->wiphy->disable_beacon_hints = true; | ||
1337 | |||
1335 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; | 1338 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; |
1336 | /* we create the 802.11 header and a zero-length SSID element */ | 1339 | /* we create the 802.11 header and a zero-length SSID element */ |
1337 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; | 1340 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; |
diff --git a/drivers/net/wireless/iwlwifi/iwl-debugfs.c b/drivers/net/wireless/iwlwifi/iwl-debugfs.c index 11e08c068917..ca00cc8ad4c7 100644 --- a/drivers/net/wireless/iwlwifi/iwl-debugfs.c +++ b/drivers/net/wireless/iwlwifi/iwl-debugfs.c | |||
@@ -308,18 +308,18 @@ static ssize_t iwl_dbgfs_nvm_read(struct file *file, | |||
308 | return -ENODATA; | 308 | return -ENODATA; |
309 | } | 309 | } |
310 | 310 | ||
311 | ptr = priv->eeprom; | ||
312 | if (!ptr) { | ||
313 | IWL_ERR(priv, "Invalid EEPROM/OTP memory\n"); | ||
314 | return -ENOMEM; | ||
315 | } | ||
316 | |||
311 | /* 4 characters for byte 0xYY */ | 317 | /* 4 characters for byte 0xYY */ |
312 | buf = kzalloc(buf_size, GFP_KERNEL); | 318 | buf = kzalloc(buf_size, GFP_KERNEL); |
313 | if (!buf) { | 319 | if (!buf) { |
314 | IWL_ERR(priv, "Can not allocate Buffer\n"); | 320 | IWL_ERR(priv, "Can not allocate Buffer\n"); |
315 | return -ENOMEM; | 321 | return -ENOMEM; |
316 | } | 322 | } |
317 | |||
318 | ptr = priv->eeprom; | ||
319 | if (!ptr) { | ||
320 | IWL_ERR(priv, "Invalid EEPROM/OTP memory\n"); | ||
321 | return -ENOMEM; | ||
322 | } | ||
323 | pos += scnprintf(buf + pos, buf_size - pos, "NVM Type: %s\n", | 323 | pos += scnprintf(buf + pos, buf_size - pos, "NVM Type: %s\n", |
324 | (priv->nvm_device_type == NVM_DEVICE_TYPE_OTP) | 324 | (priv->nvm_device_type == NVM_DEVICE_TYPE_OTP) |
325 | ? "OTP" : "EEPROM"); | 325 | ? "OTP" : "EEPROM"); |
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index e2d620f0b6e8..650e20af20fa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h | |||
@@ -258,8 +258,10 @@ struct iwl_channel_info { | |||
258 | #define IWL_TX_FIFO_HCCA_2 6 | 258 | #define IWL_TX_FIFO_HCCA_2 6 |
259 | #define IWL_TX_FIFO_NONE 7 | 259 | #define IWL_TX_FIFO_NONE 7 |
260 | 260 | ||
261 | /* Minimum number of queues. MAX_NUM is defined in hw specific files */ | 261 | /* Minimum number of queues. MAX_NUM is defined in hw specific files. |
262 | #define IWL_MIN_NUM_QUEUES 4 | 262 | * Set the minimum to accommodate the 4 standard TX queues, 1 command |
263 | * queue, 2 (unused) HCCA queues, and 4 HT queues (one for each AC) */ | ||
264 | #define IWL_MIN_NUM_QUEUES 10 | ||
263 | 265 | ||
264 | /* Power management (not Tx power) structures */ | 266 | /* Power management (not Tx power) structures */ |
265 | 267 | ||
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 2addf735b193..ffd5c61a7553 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c | |||
@@ -566,6 +566,8 @@ int iwl_remove_default_wep_key(struct iwl_priv *priv, | |||
566 | unsigned long flags; | 566 | unsigned long flags; |
567 | 567 | ||
568 | spin_lock_irqsave(&priv->sta_lock, flags); | 568 | spin_lock_irqsave(&priv->sta_lock, flags); |
569 | IWL_DEBUG_WEP(priv, "Removing default WEP key: idx=%d\n", | ||
570 | keyconf->keyidx); | ||
569 | 571 | ||
570 | if (!test_and_clear_bit(keyconf->keyidx, &priv->ucode_key_table)) | 572 | if (!test_and_clear_bit(keyconf->keyidx, &priv->ucode_key_table)) |
571 | IWL_ERR(priv, "index %d not used in uCode key table.\n", | 573 | IWL_ERR(priv, "index %d not used in uCode key table.\n", |
@@ -573,6 +575,11 @@ int iwl_remove_default_wep_key(struct iwl_priv *priv, | |||
573 | 575 | ||
574 | priv->default_wep_key--; | 576 | priv->default_wep_key--; |
575 | memset(&priv->wep_keys[keyconf->keyidx], 0, sizeof(priv->wep_keys[0])); | 577 | memset(&priv->wep_keys[keyconf->keyidx], 0, sizeof(priv->wep_keys[0])); |
578 | if (iwl_is_rfkill(priv)) { | ||
579 | IWL_DEBUG_WEP(priv, "Not sending REPLY_WEPKEY command due to RFKILL.\n"); | ||
580 | spin_unlock_irqrestore(&priv->sta_lock, flags); | ||
581 | return 0; | ||
582 | } | ||
576 | ret = iwl_send_static_wepkey_cmd(priv, 1); | 583 | ret = iwl_send_static_wepkey_cmd(priv, 1); |
577 | IWL_DEBUG_WEP(priv, "Remove default WEP key: idx=%d ret=%d\n", | 584 | IWL_DEBUG_WEP(priv, "Remove default WEP key: idx=%d ret=%d\n", |
578 | keyconf->keyidx, ret); | 585 | keyconf->keyidx, ret); |
@@ -853,6 +860,11 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv, | |||
853 | priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; | 860 | priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; |
854 | priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; | 861 | priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; |
855 | 862 | ||
863 | if (iwl_is_rfkill(priv)) { | ||
864 | IWL_DEBUG_WEP(priv, "Not sending REPLY_ADD_STA command because RFKILL enabled. \n"); | ||
865 | spin_unlock_irqrestore(&priv->sta_lock, flags); | ||
866 | return 0; | ||
867 | } | ||
856 | ret = iwl_send_add_sta(priv, &priv->stations[sta_id].sta, CMD_ASYNC); | 868 | ret = iwl_send_add_sta(priv, &priv->stations[sta_id].sta, CMD_ASYNC); |
857 | spin_unlock_irqrestore(&priv->sta_lock, flags); | 869 | spin_unlock_irqrestore(&priv->sta_lock, flags); |
858 | return ret; | 870 | return ret; |
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 9bbeec9427f0..2e89040e63be 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c | |||
@@ -720,8 +720,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
720 | goto drop_unlock; | 720 | goto drop_unlock; |
721 | } | 721 | } |
722 | 722 | ||
723 | spin_unlock_irqrestore(&priv->lock, flags); | ||
724 | |||
725 | hdr_len = ieee80211_hdrlen(fc); | 723 | hdr_len = ieee80211_hdrlen(fc); |
726 | 724 | ||
727 | /* Find (or create) index into station table for destination station */ | 725 | /* Find (or create) index into station table for destination station */ |
@@ -729,7 +727,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
729 | if (sta_id == IWL_INVALID_STATION) { | 727 | if (sta_id == IWL_INVALID_STATION) { |
730 | IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n", | 728 | IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n", |
731 | hdr->addr1); | 729 | hdr->addr1); |
732 | goto drop; | 730 | goto drop_unlock; |
733 | } | 731 | } |
734 | 732 | ||
735 | IWL_DEBUG_TX(priv, "station Id %d\n", sta_id); | 733 | IWL_DEBUG_TX(priv, "station Id %d\n", sta_id); |
@@ -750,14 +748,17 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
750 | txq_id = priv->stations[sta_id].tid[tid].agg.txq_id; | 748 | txq_id = priv->stations[sta_id].tid[tid].agg.txq_id; |
751 | swq_id = iwl_virtual_agg_queue_num(swq_id, txq_id); | 749 | swq_id = iwl_virtual_agg_queue_num(swq_id, txq_id); |
752 | } | 750 | } |
753 | priv->stations[sta_id].tid[tid].tfds_in_queue++; | ||
754 | } | 751 | } |
755 | 752 | ||
756 | txq = &priv->txq[txq_id]; | 753 | txq = &priv->txq[txq_id]; |
757 | q = &txq->q; | 754 | q = &txq->q; |
758 | txq->swq_id = swq_id; | 755 | txq->swq_id = swq_id; |
759 | 756 | ||
760 | spin_lock_irqsave(&priv->lock, flags); | 757 | if (unlikely(iwl_queue_space(q) < q->high_mark)) |
758 | goto drop_unlock; | ||
759 | |||
760 | if (ieee80211_is_data_qos(fc)) | ||
761 | priv->stations[sta_id].tid[tid].tfds_in_queue++; | ||
761 | 762 | ||
762 | /* Set up driver data for this TFD */ | 763 | /* Set up driver data for this TFD */ |
763 | memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info)); | 764 | memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info)); |
@@ -902,7 +903,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) | |||
902 | 903 | ||
903 | drop_unlock: | 904 | drop_unlock: |
904 | spin_unlock_irqrestore(&priv->lock, flags); | 905 | spin_unlock_irqrestore(&priv->lock, flags); |
905 | drop: | ||
906 | return -1; | 906 | return -1; |
907 | } | 907 | } |
908 | EXPORT_SYMBOL(iwl_tx_skb); | 908 | EXPORT_SYMBOL(iwl_tx_skb); |
@@ -1171,6 +1171,8 @@ int iwl_tx_agg_start(struct iwl_priv *priv, const u8 *ra, u16 tid, u16 *ssn) | |||
1171 | IWL_ERR(priv, "Start AGG on invalid station\n"); | 1171 | IWL_ERR(priv, "Start AGG on invalid station\n"); |
1172 | return -ENXIO; | 1172 | return -ENXIO; |
1173 | } | 1173 | } |
1174 | if (unlikely(tid >= MAX_TID_COUNT)) | ||
1175 | return -EINVAL; | ||
1174 | 1176 | ||
1175 | if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_OFF) { | 1177 | if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_OFF) { |
1176 | IWL_ERR(priv, "Start AGG when state is not IWL_AGG_OFF !\n"); | 1178 | IWL_ERR(priv, "Start AGG when state is not IWL_AGG_OFF !\n"); |
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 956798f2c80c..523843369ca2 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c | |||
@@ -3968,6 +3968,9 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) | |||
3968 | 3968 | ||
3969 | hw->wiphy->custom_regulatory = true; | 3969 | hw->wiphy->custom_regulatory = true; |
3970 | 3970 | ||
3971 | /* Firmware does not support this */ | ||
3972 | hw->wiphy->disable_beacon_hints = true; | ||
3973 | |||
3971 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; | 3974 | hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; |
3972 | /* we create the 802.11 header and a zero-length SSID element */ | 3975 | /* we create the 802.11 header and a zero-length SSID element */ |
3973 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; | 3976 | hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; |
@@ -4018,10 +4021,10 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e | |||
4018 | SET_IEEE80211_DEV(hw, &pdev->dev); | 4021 | SET_IEEE80211_DEV(hw, &pdev->dev); |
4019 | 4022 | ||
4020 | if ((iwl3945_mod_params.num_of_queues > IWL39_MAX_NUM_QUEUES) || | 4023 | if ((iwl3945_mod_params.num_of_queues > IWL39_MAX_NUM_QUEUES) || |
4021 | (iwl3945_mod_params.num_of_queues < IWL_MIN_NUM_QUEUES)) { | 4024 | (iwl3945_mod_params.num_of_queues < IWL39_MIN_NUM_QUEUES)) { |
4022 | IWL_ERR(priv, | 4025 | IWL_ERR(priv, |
4023 | "invalid queues_num, should be between %d and %d\n", | 4026 | "invalid queues_num, should be between %d and %d\n", |
4024 | IWL_MIN_NUM_QUEUES, IWL39_MAX_NUM_QUEUES); | 4027 | IWL39_MIN_NUM_QUEUES, IWL39_MAX_NUM_QUEUES); |
4025 | err = -EINVAL; | 4028 | err = -EINVAL; |
4026 | goto out_ieee80211_free_hw; | 4029 | goto out_ieee80211_free_hw; |
4027 | } | 4030 | } |
diff --git a/drivers/net/wireless/iwmc3200wifi/commands.c b/drivers/net/wireless/iwmc3200wifi/commands.c index 834a7f544e5d..e2334d123599 100644 --- a/drivers/net/wireless/iwmc3200wifi/commands.c +++ b/drivers/net/wireless/iwmc3200wifi/commands.c | |||
@@ -220,6 +220,7 @@ int iwm_store_rxiq_calib_result(struct iwm_priv *iwm) | |||
220 | eeprom_rxiq = iwm_eeprom_access(iwm, IWM_EEPROM_CALIB_RXIQ); | 220 | eeprom_rxiq = iwm_eeprom_access(iwm, IWM_EEPROM_CALIB_RXIQ); |
221 | if (IS_ERR(eeprom_rxiq)) { | 221 | if (IS_ERR(eeprom_rxiq)) { |
222 | IWM_ERR(iwm, "Couldn't access EEPROM RX IQ entry\n"); | 222 | IWM_ERR(iwm, "Couldn't access EEPROM RX IQ entry\n"); |
223 | kfree(rxiq); | ||
223 | return PTR_ERR(eeprom_rxiq); | 224 | return PTR_ERR(eeprom_rxiq); |
224 | } | 225 | } |
225 | 226 | ||
diff --git a/drivers/net/wireless/iwmc3200wifi/netdev.c b/drivers/net/wireless/iwmc3200wifi/netdev.c index aea5ccf24ccf..bf294e41753b 100644 --- a/drivers/net/wireless/iwmc3200wifi/netdev.c +++ b/drivers/net/wireless/iwmc3200wifi/netdev.c | |||
@@ -106,10 +106,8 @@ void *iwm_if_alloc(int sizeof_bus, struct device *dev, | |||
106 | int ret = 0; | 106 | int ret = 0; |
107 | 107 | ||
108 | wdev = iwm_wdev_alloc(sizeof_bus, dev); | 108 | wdev = iwm_wdev_alloc(sizeof_bus, dev); |
109 | if (!wdev) { | 109 | if (IS_ERR(wdev)) |
110 | dev_err(dev, "no memory for wireless device instance\n"); | 110 | return wdev; |
111 | return ERR_PTR(-ENOMEM); | ||
112 | } | ||
113 | 111 | ||
114 | iwm = wdev_to_iwm(wdev); | 112 | iwm = wdev_to_iwm(wdev); |
115 | iwm->bus_ops = if_ops; | 113 | iwm->bus_ops = if_ops; |
diff --git a/drivers/net/wireless/libertas/11d.c b/drivers/net/wireless/libertas/11d.c index 9a5408e7d94a..5c6968101f0d 100644 --- a/drivers/net/wireless/libertas/11d.c +++ b/drivers/net/wireless/libertas/11d.c | |||
@@ -47,7 +47,7 @@ static u8 lbs_region_2_code(u8 *region) | |||
47 | { | 47 | { |
48 | u8 i; | 48 | u8 i; |
49 | 49 | ||
50 | for (i = 0; region[i] && i < COUNTRY_CODE_LEN; i++) | 50 | for (i = 0; i < COUNTRY_CODE_LEN && region[i]; i++) |
51 | region[i] = toupper(region[i]); | 51 | region[i] = toupper(region[i]); |
52 | 52 | ||
53 | for (i = 0; i < ARRAY_SIZE(region_code_mapping); i++) { | 53 | for (i = 0; i < ARRAY_SIZE(region_code_mapping); i++) { |
diff --git a/drivers/net/wireless/libertas/assoc.c b/drivers/net/wireless/libertas/assoc.c index b9b374119033..d6997371c27e 100644 --- a/drivers/net/wireless/libertas/assoc.c +++ b/drivers/net/wireless/libertas/assoc.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* Copyright (C) 2006, Red Hat, Inc. */ | 1 | /* Copyright (C) 2006, Red Hat, Inc. */ |
2 | 2 | ||
3 | #include <linux/types.h> | 3 | #include <linux/types.h> |
4 | #include <linux/kernel.h> | ||
4 | #include <linux/etherdevice.h> | 5 | #include <linux/etherdevice.h> |
5 | #include <linux/ieee80211.h> | 6 | #include <linux/ieee80211.h> |
6 | #include <linux/if_arp.h> | 7 | #include <linux/if_arp.h> |
@@ -43,21 +44,21 @@ static int get_common_rates(struct lbs_private *priv, | |||
43 | u16 *rates_size) | 44 | u16 *rates_size) |
44 | { | 45 | { |
45 | u8 *card_rates = lbs_bg_rates; | 46 | u8 *card_rates = lbs_bg_rates; |
46 | size_t num_card_rates = sizeof(lbs_bg_rates); | ||
47 | int ret = 0, i, j; | 47 | int ret = 0, i, j; |
48 | u8 tmp[30]; | 48 | u8 tmp[(ARRAY_SIZE(lbs_bg_rates) - 1) * (*rates_size - 1)]; |
49 | size_t tmp_size = 0; | 49 | size_t tmp_size = 0; |
50 | 50 | ||
51 | /* For each rate in card_rates that exists in rate1, copy to tmp */ | 51 | /* For each rate in card_rates that exists in rate1, copy to tmp */ |
52 | for (i = 0; card_rates[i] && (i < num_card_rates); i++) { | 52 | for (i = 0; i < ARRAY_SIZE(lbs_bg_rates) && card_rates[i]; i++) { |
53 | for (j = 0; rates[j] && (j < *rates_size); j++) { | 53 | for (j = 0; j < *rates_size && rates[j]; j++) { |
54 | if (rates[j] == card_rates[i]) | 54 | if (rates[j] == card_rates[i]) |
55 | tmp[tmp_size++] = card_rates[i]; | 55 | tmp[tmp_size++] = card_rates[i]; |
56 | } | 56 | } |
57 | } | 57 | } |
58 | 58 | ||
59 | lbs_deb_hex(LBS_DEB_JOIN, "AP rates ", rates, *rates_size); | 59 | lbs_deb_hex(LBS_DEB_JOIN, "AP rates ", rates, *rates_size); |
60 | lbs_deb_hex(LBS_DEB_JOIN, "card rates ", card_rates, num_card_rates); | 60 | lbs_deb_hex(LBS_DEB_JOIN, "card rates ", card_rates, |
61 | ARRAY_SIZE(lbs_bg_rates)); | ||
61 | lbs_deb_hex(LBS_DEB_JOIN, "common rates", tmp, tmp_size); | 62 | lbs_deb_hex(LBS_DEB_JOIN, "common rates", tmp, tmp_size); |
62 | lbs_deb_join("TX data rate 0x%02x\n", priv->cur_rate); | 63 | lbs_deb_join("TX data rate 0x%02x\n", priv->cur_rate); |
63 | 64 | ||
@@ -69,10 +70,7 @@ static int get_common_rates(struct lbs_private *priv, | |||
69 | lbs_pr_alert("Previously set fixed data rate %#x isn't " | 70 | lbs_pr_alert("Previously set fixed data rate %#x isn't " |
70 | "compatible with the network.\n", priv->cur_rate); | 71 | "compatible with the network.\n", priv->cur_rate); |
71 | ret = -1; | 72 | ret = -1; |
72 | goto done; | ||
73 | } | 73 | } |
74 | ret = 0; | ||
75 | |||
76 | done: | 74 | done: |
77 | memset(rates, 0, *rates_size); | 75 | memset(rates, 0, *rates_size); |
78 | *rates_size = min_t(int, tmp_size, *rates_size); | 76 | *rates_size = min_t(int, tmp_size, *rates_size); |
@@ -322,7 +320,7 @@ static int lbs_associate(struct lbs_private *priv, | |||
322 | rates = (struct mrvl_ie_rates_param_set *) pos; | 320 | rates = (struct mrvl_ie_rates_param_set *) pos; |
323 | rates->header.type = cpu_to_le16(TLV_TYPE_RATES); | 321 | rates->header.type = cpu_to_le16(TLV_TYPE_RATES); |
324 | memcpy(&rates->rates, &bss->rates, MAX_RATES); | 322 | memcpy(&rates->rates, &bss->rates, MAX_RATES); |
325 | tmplen = MAX_RATES; | 323 | tmplen = min_t(u16, ARRAY_SIZE(rates->rates), MAX_RATES); |
326 | if (get_common_rates(priv, rates->rates, &tmplen)) { | 324 | if (get_common_rates(priv, rates->rates, &tmplen)) { |
327 | ret = -1; | 325 | ret = -1; |
328 | goto done; | 326 | goto done; |
@@ -598,7 +596,7 @@ static int lbs_adhoc_join(struct lbs_private *priv, | |||
598 | 596 | ||
599 | /* Copy Data rates from the rates recorded in scan response */ | 597 | /* Copy Data rates from the rates recorded in scan response */ |
600 | memset(cmd.bss.rates, 0, sizeof(cmd.bss.rates)); | 598 | memset(cmd.bss.rates, 0, sizeof(cmd.bss.rates)); |
601 | ratesize = min_t(u16, sizeof(cmd.bss.rates), MAX_RATES); | 599 | ratesize = min_t(u16, ARRAY_SIZE(cmd.bss.rates), MAX_RATES); |
602 | memcpy(cmd.bss.rates, bss->rates, ratesize); | 600 | memcpy(cmd.bss.rates, bss->rates, ratesize); |
603 | if (get_common_rates(priv, cmd.bss.rates, &ratesize)) { | 601 | if (get_common_rates(priv, cmd.bss.rates, &ratesize)) { |
604 | lbs_deb_join("ADHOC_JOIN: get_common_rates returned error.\n"); | 602 | lbs_deb_join("ADHOC_JOIN: get_common_rates returned error.\n"); |
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index 601b54249677..6c95af3023cc 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * for sending scan commands to the firmware. | 5 | * for sending scan commands to the firmware. |
6 | */ | 6 | */ |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/kernel.h> | ||
8 | #include <linux/etherdevice.h> | 9 | #include <linux/etherdevice.h> |
9 | #include <linux/if_arp.h> | 10 | #include <linux/if_arp.h> |
10 | #include <asm/unaligned.h> | 11 | #include <asm/unaligned.h> |
@@ -876,7 +877,7 @@ static inline char *lbs_translate_scan(struct lbs_private *priv, | |||
876 | iwe.u.bitrate.disabled = 0; | 877 | iwe.u.bitrate.disabled = 0; |
877 | iwe.u.bitrate.value = 0; | 878 | iwe.u.bitrate.value = 0; |
878 | 879 | ||
879 | for (j = 0; bss->rates[j] && (j < sizeof(bss->rates)); j++) { | 880 | for (j = 0; j < ARRAY_SIZE(bss->rates) && bss->rates[j]; j++) { |
880 | /* Bit rate given in 500 kb/s units */ | 881 | /* Bit rate given in 500 kb/s units */ |
881 | iwe.u.bitrate.value = bss->rates[j] * 500000; | 882 | iwe.u.bitrate.value = bss->rates[j] * 500000; |
882 | current_val = iwe_stream_add_value(info, start, current_val, | 883 | current_val = iwe_stream_add_value(info, start, current_val, |
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 40b07b988224..3bd3c779fff3 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c | |||
@@ -698,7 +698,7 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length) | |||
698 | && !mac->pass_ctrl) | 698 | && !mac->pass_ctrl) |
699 | return 0; | 699 | return 0; |
700 | 700 | ||
701 | fc = *(__le16 *)buffer; | 701 | fc = get_unaligned((__le16*)buffer); |
702 | need_padding = ieee80211_is_data_qos(fc) ^ ieee80211_has_a4(fc); | 702 | need_padding = ieee80211_is_data_qos(fc) ^ ieee80211_has_a4(fc); |
703 | 703 | ||
704 | skb = dev_alloc_skb(length + (need_padding ? 2 : 0)); | 704 | skb = dev_alloc_skb(length + (need_padding ? 2 : 0)); |
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 0f0e0b919ef4..a45b0c0d574e 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c | |||
@@ -70,7 +70,6 @@ | |||
70 | #undef CCIO_COLLECT_STATS | 70 | #undef CCIO_COLLECT_STATS |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | #include <linux/proc_fs.h> | ||
74 | #include <asm/runway.h> /* for proc_runway_root */ | 73 | #include <asm/runway.h> /* for proc_runway_root */ |
75 | 74 | ||
76 | #ifdef DEBUG_CCIO_INIT | 75 | #ifdef DEBUG_CCIO_INIT |
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index c590974e9815..d69bde6a2343 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c | |||
@@ -614,7 +614,7 @@ dino_fixup_bus(struct pci_bus *bus) | |||
614 | dev_name(&bus->self->dev), i, | 614 | dev_name(&bus->self->dev), i, |
615 | bus->self->resource[i].start, | 615 | bus->self->resource[i].start, |
616 | bus->self->resource[i].end); | 616 | bus->self->resource[i].end); |
617 | pci_assign_resource(bus->self, i); | 617 | WARN_ON(pci_assign_resource(bus->self, i)); |
618 | DBG("DEBUG %s after assign %d [0x%lx,0x%lx]\n", | 618 | DBG("DEBUG %s after assign %d [0x%lx,0x%lx]\n", |
619 | dev_name(&bus->self->dev), i, | 619 | dev_name(&bus->self->dev), i, |
620 | bus->self->resource[i].start, | 620 | bus->self->resource[i].start, |
diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 685d94e69d44..8c0b26e9b98a 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c | |||
@@ -55,7 +55,7 @@ static ssize_t eisa_eeprom_read(struct file * file, | |||
55 | ssize_t ret; | 55 | ssize_t ret; |
56 | int i; | 56 | int i; |
57 | 57 | ||
58 | if (*ppos >= HPEE_MAX_LENGTH) | 58 | if (*ppos < 0 || *ppos >= HPEE_MAX_LENGTH) |
59 | return 0; | 59 | return 0; |
60 | 60 | ||
61 | count = *ppos + count < HPEE_MAX_LENGTH ? count : HPEE_MAX_LENGTH - *ppos; | 61 | count = *ppos + count < HPEE_MAX_LENGTH ? count : HPEE_MAX_LENGTH - *ppos; |
diff --git a/drivers/parisc/hppb.c b/drivers/parisc/hppb.c index 13856415b432..815db175d427 100644 --- a/drivers/parisc/hppb.c +++ b/drivers/parisc/hppb.c | |||
@@ -62,7 +62,8 @@ static int hppb_probe(struct parisc_device *dev) | |||
62 | } | 62 | } |
63 | card = card->next; | 63 | card = card->next; |
64 | } | 64 | } |
65 | printk(KERN_INFO "Found GeckoBoa at 0x%x\n", dev->hpa.start); | 65 | printk(KERN_INFO "Found GeckoBoa at 0x%llx\n", |
66 | (unsigned long long) dev->hpa.start); | ||
66 | 67 | ||
67 | card->hpa = dev->hpa.start; | 68 | card->hpa = dev->hpa.start; |
68 | card->mmio_region.name = "HP-PB Bus"; | 69 | card->mmio_region.name = "HP-PB Bus"; |
@@ -73,8 +74,10 @@ static int hppb_probe(struct parisc_device *dev) | |||
73 | 74 | ||
74 | status = ccio_request_resource(dev, &card->mmio_region); | 75 | status = ccio_request_resource(dev, &card->mmio_region); |
75 | if(status < 0) { | 76 | if(status < 0) { |
76 | printk(KERN_ERR "%s: failed to claim HP-PB bus space (%08x, %08x)\n", | 77 | printk(KERN_ERR "%s: failed to claim HP-PB " |
77 | __FILE__, card->mmio_region.start, card->mmio_region.end); | 78 | "bus space (0x%08llx, 0x%08llx)\n", |
79 | __FILE__, (unsigned long long) card->mmio_region.start, | ||
80 | (unsigned long long) card->mmio_region.end); | ||
78 | } | 81 | } |
79 | 82 | ||
80 | return 0; | 83 | return 0; |
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index ede614616f8e..3aeb3279c92a 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c | |||
@@ -992,7 +992,7 @@ lba_pat_resources(struct parisc_device *pa_dev, struct lba_device *lba_dev) | |||
992 | return; | 992 | return; |
993 | 993 | ||
994 | io_pdc_cell = kzalloc(sizeof(pdc_pat_cell_mod_maddr_block_t), GFP_KERNEL); | 994 | io_pdc_cell = kzalloc(sizeof(pdc_pat_cell_mod_maddr_block_t), GFP_KERNEL); |
995 | if (!pa_pdc_cell) { | 995 | if (!io_pdc_cell) { |
996 | kfree(pa_pdc_cell); | 996 | kfree(pa_pdc_cell); |
997 | return; | 997 | return; |
998 | } | 998 | } |
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index f9f9a5f1bbd0..13a64bc081b6 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c | |||
@@ -370,7 +370,7 @@ pdcspath_layer_read(struct pdcspath_entry *entry, char *buf) | |||
370 | if (!i) /* entry is not ready */ | 370 | if (!i) /* entry is not ready */ |
371 | return -ENODATA; | 371 | return -ENODATA; |
372 | 372 | ||
373 | for (i = 0; devpath->layers[i] && (likely(i < 6)); i++) | 373 | for (i = 0; i < 6 && devpath->layers[i]; i++) |
374 | out += sprintf(out, "%u ", devpath->layers[i]); | 374 | out += sprintf(out, "%u ", devpath->layers[i]); |
375 | 375 | ||
376 | out += sprintf(out, "\n"); | 376 | out += sprintf(out, "\n"); |
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index b711fb7181e2..1898c7b47907 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c | |||
@@ -100,16 +100,16 @@ int pci_claim_resource(struct pci_dev *dev, int resource) | |||
100 | { | 100 | { |
101 | struct resource *res = &dev->resource[resource]; | 101 | struct resource *res = &dev->resource[resource]; |
102 | struct resource *root; | 102 | struct resource *root; |
103 | char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; | ||
104 | int err; | 103 | int err; |
105 | 104 | ||
106 | root = pci_find_parent_resource(dev, res); | 105 | root = pci_find_parent_resource(dev, res); |
107 | 106 | ||
108 | err = -EINVAL; | 107 | err = -EINVAL; |
109 | if (root != NULL) | 108 | if (root != NULL) |
110 | err = insert_resource(root, res); | 109 | err = request_resource(root, res); |
111 | 110 | ||
112 | if (err) { | 111 | if (err) { |
112 | const char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; | ||
113 | dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", | 113 | dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", |
114 | resource, | 114 | resource, |
115 | root ? "address space collision on" : | 115 | root ? "address space collision on" : |
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 46dad12f952f..77c6097ced80 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig | |||
@@ -277,31 +277,6 @@ config THINKPAD_ACPI_UNSAFE_LEDS | |||
277 | Say N here, unless you are building a kernel for your own | 277 | Say N here, unless you are building a kernel for your own |
278 | use, and need to control the important firmware LEDs. | 278 | use, and need to control the important firmware LEDs. |
279 | 279 | ||
280 | config THINKPAD_ACPI_DOCK | ||
281 | bool "Legacy Docking Station Support" | ||
282 | depends on THINKPAD_ACPI | ||
283 | depends on ACPI_DOCK=n | ||
284 | default n | ||
285 | ---help--- | ||
286 | Allows the thinkpad_acpi driver to handle docking station events. | ||
287 | This support was made obsolete by the generic ACPI docking station | ||
288 | support (CONFIG_ACPI_DOCK). It will allow locking and removing the | ||
289 | laptop from the docking station, but will not properly connect PCI | ||
290 | devices. | ||
291 | |||
292 | If you are not sure, say N here. | ||
293 | |||
294 | config THINKPAD_ACPI_BAY | ||
295 | bool "Legacy Removable Bay Support" | ||
296 | depends on THINKPAD_ACPI | ||
297 | default y | ||
298 | ---help--- | ||
299 | Allows the thinkpad_acpi driver to handle removable bays. It will | ||
300 | electrically disable the device in the bay, and also generate | ||
301 | notifications when the bay lever is ejected or inserted. | ||
302 | |||
303 | If you are not sure, say Y here. | ||
304 | |||
305 | config THINKPAD_ACPI_VIDEO | 280 | config THINKPAD_ACPI_VIDEO |
306 | bool "Video output control support" | 281 | bool "Video output control support" |
307 | depends on THINKPAD_ACPI | 282 | depends on THINKPAD_ACPI |
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index ec560f16d720..222ffb892f22 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c | |||
@@ -143,6 +143,7 @@ struct eeepc_hotk { | |||
143 | struct rfkill *bluetooth_rfkill; | 143 | struct rfkill *bluetooth_rfkill; |
144 | struct rfkill *wwan3g_rfkill; | 144 | struct rfkill *wwan3g_rfkill; |
145 | struct hotplug_slot *hotplug_slot; | 145 | struct hotplug_slot *hotplug_slot; |
146 | struct work_struct hotplug_work; | ||
146 | }; | 147 | }; |
147 | 148 | ||
148 | /* The actual device the driver binds to */ | 149 | /* The actual device the driver binds to */ |
@@ -660,7 +661,7 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot, | |||
660 | return 0; | 661 | return 0; |
661 | } | 662 | } |
662 | 663 | ||
663 | static void eeepc_rfkill_hotplug(void) | 664 | static void eeepc_hotplug_work(struct work_struct *work) |
664 | { | 665 | { |
665 | struct pci_dev *dev; | 666 | struct pci_dev *dev; |
666 | struct pci_bus *bus = pci_find_bus(0, 1); | 667 | struct pci_bus *bus = pci_find_bus(0, 1); |
@@ -701,7 +702,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) | |||
701 | if (event != ACPI_NOTIFY_BUS_CHECK) | 702 | if (event != ACPI_NOTIFY_BUS_CHECK) |
702 | return; | 703 | return; |
703 | 704 | ||
704 | eeepc_rfkill_hotplug(); | 705 | schedule_work(&ehotk->hotplug_work); |
705 | } | 706 | } |
706 | 707 | ||
707 | static void eeepc_hotk_notify(struct acpi_device *device, u32 event) | 708 | static void eeepc_hotk_notify(struct acpi_device *device, u32 event) |
@@ -892,7 +893,7 @@ static int eeepc_hotk_resume(struct acpi_device *device) | |||
892 | 893 | ||
893 | rfkill_set_sw_state(ehotk->wlan_rfkill, wlan != 1); | 894 | rfkill_set_sw_state(ehotk->wlan_rfkill, wlan != 1); |
894 | 895 | ||
895 | eeepc_rfkill_hotplug(); | 896 | schedule_work(&ehotk->hotplug_work); |
896 | } | 897 | } |
897 | 898 | ||
898 | if (ehotk->bluetooth_rfkill) | 899 | if (ehotk->bluetooth_rfkill) |
@@ -1093,6 +1094,8 @@ static int eeepc_rfkill_init(struct device *dev) | |||
1093 | { | 1094 | { |
1094 | int result = 0; | 1095 | int result = 0; |
1095 | 1096 | ||
1097 | INIT_WORK(&ehotk->hotplug_work, eeepc_hotplug_work); | ||
1098 | |||
1096 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); | 1099 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); |
1097 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); | 1100 | eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); |
1098 | 1101 | ||
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index ca508564a181..a2ad53e15874 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c | |||
@@ -520,11 +520,13 @@ static int hp_wmi_resume_handler(struct platform_device *device) | |||
520 | * the input layer will only actually pass it on if the state | 520 | * the input layer will only actually pass it on if the state |
521 | * changed. | 521 | * changed. |
522 | */ | 522 | */ |
523 | 523 | if (hp_wmi_input_dev) { | |
524 | input_report_switch(hp_wmi_input_dev, SW_DOCK, hp_wmi_dock_state()); | 524 | input_report_switch(hp_wmi_input_dev, SW_DOCK, |
525 | input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, | 525 | hp_wmi_dock_state()); |
526 | hp_wmi_tablet_state()); | 526 | input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, |
527 | input_sync(hp_wmi_input_dev); | 527 | hp_wmi_tablet_state()); |
528 | input_sync(hp_wmi_input_dev); | ||
529 | } | ||
528 | 530 | ||
529 | return 0; | 531 | return 0; |
530 | } | 532 | } |
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a463fd72c495..e85600852502 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c | |||
@@ -239,12 +239,6 @@ struct ibm_init_struct { | |||
239 | }; | 239 | }; |
240 | 240 | ||
241 | static struct { | 241 | static struct { |
242 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
243 | u32 bay_status:1; | ||
244 | u32 bay_eject:1; | ||
245 | u32 bay_status2:1; | ||
246 | u32 bay_eject2:1; | ||
247 | #endif | ||
248 | u32 bluetooth:1; | 242 | u32 bluetooth:1; |
249 | u32 hotkey:1; | 243 | u32 hotkey:1; |
250 | u32 hotkey_mask:1; | 244 | u32 hotkey_mask:1; |
@@ -589,18 +583,6 @@ static int acpi_ec_write(int i, u8 v) | |||
589 | return 1; | 583 | return 1; |
590 | } | 584 | } |
591 | 585 | ||
592 | #if defined(CONFIG_THINKPAD_ACPI_DOCK) || defined(CONFIG_THINKPAD_ACPI_BAY) | ||
593 | static int _sta(acpi_handle handle) | ||
594 | { | ||
595 | int status; | ||
596 | |||
597 | if (!handle || !acpi_evalf(handle, &status, "_STA", "d")) | ||
598 | status = 0; | ||
599 | |||
600 | return status; | ||
601 | } | ||
602 | #endif | ||
603 | |||
604 | static int issue_thinkpad_cmos_command(int cmos_cmd) | 586 | static int issue_thinkpad_cmos_command(int cmos_cmd) |
605 | { | 587 | { |
606 | if (!cmos_handle) | 588 | if (!cmos_handle) |
@@ -784,6 +766,8 @@ static int dispatch_procfs_write(struct file *file, | |||
784 | 766 | ||
785 | if (!ibm || !ibm->write) | 767 | if (!ibm || !ibm->write) |
786 | return -EINVAL; | 768 | return -EINVAL; |
769 | if (count > PAGE_SIZE - 2) | ||
770 | return -EINVAL; | ||
787 | 771 | ||
788 | kernbuf = kmalloc(count + 2, GFP_KERNEL); | 772 | kernbuf = kmalloc(count + 2, GFP_KERNEL); |
789 | if (!kernbuf) | 773 | if (!kernbuf) |
@@ -4442,293 +4426,6 @@ static struct ibm_struct light_driver_data = { | |||
4442 | }; | 4426 | }; |
4443 | 4427 | ||
4444 | /************************************************************************* | 4428 | /************************************************************************* |
4445 | * Dock subdriver | ||
4446 | */ | ||
4447 | |||
4448 | #ifdef CONFIG_THINKPAD_ACPI_DOCK | ||
4449 | |||
4450 | static void dock_notify(struct ibm_struct *ibm, u32 event); | ||
4451 | static int dock_read(char *p); | ||
4452 | static int dock_write(char *buf); | ||
4453 | |||
4454 | TPACPI_HANDLE(dock, root, "\\_SB.GDCK", /* X30, X31, X40 */ | ||
4455 | "\\_SB.PCI0.DOCK", /* 600e/x,770e,770x,A2xm/p,T20-22,X20-21 */ | ||
4456 | "\\_SB.PCI0.PCI1.DOCK", /* all others */ | ||
4457 | "\\_SB.PCI.ISA.SLCE", /* 570 */ | ||
4458 | ); /* A21e,G4x,R30,R31,R32,R40,R40e,R50e */ | ||
4459 | |||
4460 | /* don't list other alternatives as we install a notify handler on the 570 */ | ||
4461 | TPACPI_HANDLE(pci, root, "\\_SB.PCI"); /* 570 */ | ||
4462 | |||
4463 | static const struct acpi_device_id ibm_pci_device_ids[] = { | ||
4464 | {PCI_ROOT_HID_STRING, 0}, | ||
4465 | {"", 0}, | ||
4466 | }; | ||
4467 | |||
4468 | static struct tp_acpi_drv_struct ibm_dock_acpidriver[2] = { | ||
4469 | { | ||
4470 | .notify = dock_notify, | ||
4471 | .handle = &dock_handle, | ||
4472 | .type = ACPI_SYSTEM_NOTIFY, | ||
4473 | }, | ||
4474 | { | ||
4475 | /* THIS ONE MUST NEVER BE USED FOR DRIVER AUTOLOADING. | ||
4476 | * We just use it to get notifications of dock hotplug | ||
4477 | * in very old thinkpads */ | ||
4478 | .hid = ibm_pci_device_ids, | ||
4479 | .notify = dock_notify, | ||
4480 | .handle = &pci_handle, | ||
4481 | .type = ACPI_SYSTEM_NOTIFY, | ||
4482 | }, | ||
4483 | }; | ||
4484 | |||
4485 | static struct ibm_struct dock_driver_data[2] = { | ||
4486 | { | ||
4487 | .name = "dock", | ||
4488 | .read = dock_read, | ||
4489 | .write = dock_write, | ||
4490 | .acpi = &ibm_dock_acpidriver[0], | ||
4491 | }, | ||
4492 | { | ||
4493 | .name = "dock", | ||
4494 | .acpi = &ibm_dock_acpidriver[1], | ||
4495 | }, | ||
4496 | }; | ||
4497 | |||
4498 | #define dock_docked() (_sta(dock_handle) & 1) | ||
4499 | |||
4500 | static int __init dock_init(struct ibm_init_struct *iibm) | ||
4501 | { | ||
4502 | vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver\n"); | ||
4503 | |||
4504 | TPACPI_ACPIHANDLE_INIT(dock); | ||
4505 | |||
4506 | vdbg_printk(TPACPI_DBG_INIT, "dock is %s\n", | ||
4507 | str_supported(dock_handle != NULL)); | ||
4508 | |||
4509 | return (dock_handle)? 0 : 1; | ||
4510 | } | ||
4511 | |||
4512 | static int __init dock_init2(struct ibm_init_struct *iibm) | ||
4513 | { | ||
4514 | int dock2_needed; | ||
4515 | |||
4516 | vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver part 2\n"); | ||
4517 | |||
4518 | if (dock_driver_data[0].flags.acpi_driver_registered && | ||
4519 | dock_driver_data[0].flags.acpi_notify_installed) { | ||
4520 | TPACPI_ACPIHANDLE_INIT(pci); | ||
4521 | dock2_needed = (pci_handle != NULL); | ||
4522 | vdbg_printk(TPACPI_DBG_INIT, | ||
4523 | "dock PCI handler for the TP 570 is %s\n", | ||
4524 | str_supported(dock2_needed)); | ||
4525 | } else { | ||
4526 | vdbg_printk(TPACPI_DBG_INIT, | ||
4527 | "dock subdriver part 2 not required\n"); | ||
4528 | dock2_needed = 0; | ||
4529 | } | ||
4530 | |||
4531 | return (dock2_needed)? 0 : 1; | ||
4532 | } | ||
4533 | |||
4534 | static void dock_notify(struct ibm_struct *ibm, u32 event) | ||
4535 | { | ||
4536 | int docked = dock_docked(); | ||
4537 | int pci = ibm->acpi->hid && ibm->acpi->device && | ||
4538 | acpi_match_device_ids(ibm->acpi->device, ibm_pci_device_ids); | ||
4539 | int data; | ||
4540 | |||
4541 | if (event == 1 && !pci) /* 570 */ | ||
4542 | data = 1; /* button */ | ||
4543 | else if (event == 1 && pci) /* 570 */ | ||
4544 | data = 3; /* dock */ | ||
4545 | else if (event == 3 && docked) | ||
4546 | data = 1; /* button */ | ||
4547 | else if (event == 3 && !docked) | ||
4548 | data = 2; /* undock */ | ||
4549 | else if (event == 0 && docked) | ||
4550 | data = 3; /* dock */ | ||
4551 | else { | ||
4552 | printk(TPACPI_ERR "unknown dock event %d, status %d\n", | ||
4553 | event, _sta(dock_handle)); | ||
4554 | data = 0; /* unknown */ | ||
4555 | } | ||
4556 | acpi_bus_generate_proc_event(ibm->acpi->device, event, data); | ||
4557 | acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, | ||
4558 | dev_name(&ibm->acpi->device->dev), | ||
4559 | event, data); | ||
4560 | } | ||
4561 | |||
4562 | static int dock_read(char *p) | ||
4563 | { | ||
4564 | int len = 0; | ||
4565 | int docked = dock_docked(); | ||
4566 | |||
4567 | if (!dock_handle) | ||
4568 | len += sprintf(p + len, "status:\t\tnot supported\n"); | ||
4569 | else if (!docked) | ||
4570 | len += sprintf(p + len, "status:\t\tundocked\n"); | ||
4571 | else { | ||
4572 | len += sprintf(p + len, "status:\t\tdocked\n"); | ||
4573 | len += sprintf(p + len, "commands:\tdock, undock\n"); | ||
4574 | } | ||
4575 | |||
4576 | return len; | ||
4577 | } | ||
4578 | |||
4579 | static int dock_write(char *buf) | ||
4580 | { | ||
4581 | char *cmd; | ||
4582 | |||
4583 | if (!dock_docked()) | ||
4584 | return -ENODEV; | ||
4585 | |||
4586 | while ((cmd = next_cmd(&buf))) { | ||
4587 | if (strlencmp(cmd, "undock") == 0) { | ||
4588 | if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 0) || | ||
4589 | !acpi_evalf(dock_handle, NULL, "_EJ0", "vd", 1)) | ||
4590 | return -EIO; | ||
4591 | } else if (strlencmp(cmd, "dock") == 0) { | ||
4592 | if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 1)) | ||
4593 | return -EIO; | ||
4594 | } else | ||
4595 | return -EINVAL; | ||
4596 | } | ||
4597 | |||
4598 | return 0; | ||
4599 | } | ||
4600 | |||
4601 | #endif /* CONFIG_THINKPAD_ACPI_DOCK */ | ||
4602 | |||
4603 | /************************************************************************* | ||
4604 | * Bay subdriver | ||
4605 | */ | ||
4606 | |||
4607 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
4608 | |||
4609 | TPACPI_HANDLE(bay, root, "\\_SB.PCI.IDE.SECN.MAST", /* 570 */ | ||
4610 | "\\_SB.PCI0.IDE0.IDES.IDSM", /* 600e/x, 770e, 770x */ | ||
4611 | "\\_SB.PCI0.SATA.SCND.MSTR", /* T60, X60, Z60 */ | ||
4612 | "\\_SB.PCI0.IDE0.SCND.MSTR", /* all others */ | ||
4613 | ); /* A21e, R30, R31 */ | ||
4614 | TPACPI_HANDLE(bay_ej, bay, "_EJ3", /* 600e/x, A2xm/p, A3x */ | ||
4615 | "_EJ0", /* all others */ | ||
4616 | ); /* 570,A21e,G4x,R30,R31,R32,R40e,R50e */ | ||
4617 | TPACPI_HANDLE(bay2, root, "\\_SB.PCI0.IDE0.PRIM.SLAV", /* A3x, R32 */ | ||
4618 | "\\_SB.PCI0.IDE0.IDEP.IDPS", /* 600e/x, 770e, 770x */ | ||
4619 | ); /* all others */ | ||
4620 | TPACPI_HANDLE(bay2_ej, bay2, "_EJ3", /* 600e/x, 770e, A3x */ | ||
4621 | "_EJ0", /* 770x */ | ||
4622 | ); /* all others */ | ||
4623 | |||
4624 | static int __init bay_init(struct ibm_init_struct *iibm) | ||
4625 | { | ||
4626 | vdbg_printk(TPACPI_DBG_INIT, "initializing bay subdriver\n"); | ||
4627 | |||
4628 | TPACPI_ACPIHANDLE_INIT(bay); | ||
4629 | if (bay_handle) | ||
4630 | TPACPI_ACPIHANDLE_INIT(bay_ej); | ||
4631 | TPACPI_ACPIHANDLE_INIT(bay2); | ||
4632 | if (bay2_handle) | ||
4633 | TPACPI_ACPIHANDLE_INIT(bay2_ej); | ||
4634 | |||
4635 | tp_features.bay_status = bay_handle && | ||
4636 | acpi_evalf(bay_handle, NULL, "_STA", "qv"); | ||
4637 | tp_features.bay_status2 = bay2_handle && | ||
4638 | acpi_evalf(bay2_handle, NULL, "_STA", "qv"); | ||
4639 | |||
4640 | tp_features.bay_eject = bay_handle && bay_ej_handle && | ||
4641 | (strlencmp(bay_ej_path, "_EJ0") == 0 || experimental); | ||
4642 | tp_features.bay_eject2 = bay2_handle && bay2_ej_handle && | ||
4643 | (strlencmp(bay2_ej_path, "_EJ0") == 0 || experimental); | ||
4644 | |||
4645 | vdbg_printk(TPACPI_DBG_INIT, | ||
4646 | "bay 1: status %s, eject %s; bay 2: status %s, eject %s\n", | ||
4647 | str_supported(tp_features.bay_status), | ||
4648 | str_supported(tp_features.bay_eject), | ||
4649 | str_supported(tp_features.bay_status2), | ||
4650 | str_supported(tp_features.bay_eject2)); | ||
4651 | |||
4652 | return (tp_features.bay_status || tp_features.bay_eject || | ||
4653 | tp_features.bay_status2 || tp_features.bay_eject2)? 0 : 1; | ||
4654 | } | ||
4655 | |||
4656 | static void bay_notify(struct ibm_struct *ibm, u32 event) | ||
4657 | { | ||
4658 | acpi_bus_generate_proc_event(ibm->acpi->device, event, 0); | ||
4659 | acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, | ||
4660 | dev_name(&ibm->acpi->device->dev), | ||
4661 | event, 0); | ||
4662 | } | ||
4663 | |||
4664 | #define bay_occupied(b) (_sta(b##_handle) & 1) | ||
4665 | |||
4666 | static int bay_read(char *p) | ||
4667 | { | ||
4668 | int len = 0; | ||
4669 | int occupied = bay_occupied(bay); | ||
4670 | int occupied2 = bay_occupied(bay2); | ||
4671 | int eject, eject2; | ||
4672 | |||
4673 | len += sprintf(p + len, "status:\t\t%s\n", | ||
4674 | tp_features.bay_status ? | ||
4675 | (occupied ? "occupied" : "unoccupied") : | ||
4676 | "not supported"); | ||
4677 | if (tp_features.bay_status2) | ||
4678 | len += sprintf(p + len, "status2:\t%s\n", occupied2 ? | ||
4679 | "occupied" : "unoccupied"); | ||
4680 | |||
4681 | eject = tp_features.bay_eject && occupied; | ||
4682 | eject2 = tp_features.bay_eject2 && occupied2; | ||
4683 | |||
4684 | if (eject && eject2) | ||
4685 | len += sprintf(p + len, "commands:\teject, eject2\n"); | ||
4686 | else if (eject) | ||
4687 | len += sprintf(p + len, "commands:\teject\n"); | ||
4688 | else if (eject2) | ||
4689 | len += sprintf(p + len, "commands:\teject2\n"); | ||
4690 | |||
4691 | return len; | ||
4692 | } | ||
4693 | |||
4694 | static int bay_write(char *buf) | ||
4695 | { | ||
4696 | char *cmd; | ||
4697 | |||
4698 | if (!tp_features.bay_eject && !tp_features.bay_eject2) | ||
4699 | return -ENODEV; | ||
4700 | |||
4701 | while ((cmd = next_cmd(&buf))) { | ||
4702 | if (tp_features.bay_eject && strlencmp(cmd, "eject") == 0) { | ||
4703 | if (!acpi_evalf(bay_ej_handle, NULL, NULL, "vd", 1)) | ||
4704 | return -EIO; | ||
4705 | } else if (tp_features.bay_eject2 && | ||
4706 | strlencmp(cmd, "eject2") == 0) { | ||
4707 | if (!acpi_evalf(bay2_ej_handle, NULL, NULL, "vd", 1)) | ||
4708 | return -EIO; | ||
4709 | } else | ||
4710 | return -EINVAL; | ||
4711 | } | ||
4712 | |||
4713 | return 0; | ||
4714 | } | ||
4715 | |||
4716 | static struct tp_acpi_drv_struct ibm_bay_acpidriver = { | ||
4717 | .notify = bay_notify, | ||
4718 | .handle = &bay_handle, | ||
4719 | .type = ACPI_SYSTEM_NOTIFY, | ||
4720 | }; | ||
4721 | |||
4722 | static struct ibm_struct bay_driver_data = { | ||
4723 | .name = "bay", | ||
4724 | .read = bay_read, | ||
4725 | .write = bay_write, | ||
4726 | .acpi = &ibm_bay_acpidriver, | ||
4727 | }; | ||
4728 | |||
4729 | #endif /* CONFIG_THINKPAD_ACPI_BAY */ | ||
4730 | |||
4731 | /************************************************************************* | ||
4732 | * CMOS subdriver | 4429 | * CMOS subdriver |
4733 | */ | 4430 | */ |
4734 | 4431 | ||
@@ -5945,14 +5642,48 @@ static struct backlight_ops ibm_backlight_data = { | |||
5945 | 5642 | ||
5946 | /* --------------------------------------------------------------------- */ | 5643 | /* --------------------------------------------------------------------- */ |
5947 | 5644 | ||
5645 | /* | ||
5646 | * These are only useful for models that have only one possibility | ||
5647 | * of GPU. If the BIOS model handles both ATI and Intel, don't use | ||
5648 | * these quirks. | ||
5649 | */ | ||
5650 | #define TPACPI_BRGHT_Q_NOEC 0x0001 /* Must NOT use EC HBRV */ | ||
5651 | #define TPACPI_BRGHT_Q_EC 0x0002 /* Should or must use EC HBRV */ | ||
5652 | #define TPACPI_BRGHT_Q_ASK 0x8000 /* Ask for user report */ | ||
5653 | |||
5654 | static const struct tpacpi_quirk brightness_quirk_table[] __initconst = { | ||
5655 | /* Models with ATI GPUs known to require ECNVRAM mode */ | ||
5656 | TPACPI_Q_IBM('1', 'Y', TPACPI_BRGHT_Q_EC), /* T43/p ATI */ | ||
5657 | |||
5658 | /* Models with ATI GPUs (waiting confirmation) */ | ||
5659 | TPACPI_Q_IBM('1', 'R', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
5660 | TPACPI_Q_IBM('1', 'Q', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
5661 | TPACPI_Q_IBM('7', '6', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
5662 | TPACPI_Q_IBM('7', '8', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), | ||
5663 | |||
5664 | /* Models with Intel Extreme Graphics 2 (waiting confirmation) */ | ||
5665 | TPACPI_Q_IBM('1', 'V', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), | ||
5666 | TPACPI_Q_IBM('1', 'W', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), | ||
5667 | TPACPI_Q_IBM('1', 'U', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), | ||
5668 | |||
5669 | /* Models with Intel GMA900 */ | ||
5670 | TPACPI_Q_IBM('7', '0', TPACPI_BRGHT_Q_NOEC), /* T43, R52 */ | ||
5671 | TPACPI_Q_IBM('7', '4', TPACPI_BRGHT_Q_NOEC), /* X41 */ | ||
5672 | TPACPI_Q_IBM('7', '5', TPACPI_BRGHT_Q_NOEC), /* X41 Tablet */ | ||
5673 | }; | ||
5674 | |||
5948 | static int __init brightness_init(struct ibm_init_struct *iibm) | 5675 | static int __init brightness_init(struct ibm_init_struct *iibm) |
5949 | { | 5676 | { |
5950 | int b; | 5677 | int b; |
5678 | unsigned long quirks; | ||
5951 | 5679 | ||
5952 | vdbg_printk(TPACPI_DBG_INIT, "initializing brightness subdriver\n"); | 5680 | vdbg_printk(TPACPI_DBG_INIT, "initializing brightness subdriver\n"); |
5953 | 5681 | ||
5954 | mutex_init(&brightness_mutex); | 5682 | mutex_init(&brightness_mutex); |
5955 | 5683 | ||
5684 | quirks = tpacpi_check_quirks(brightness_quirk_table, | ||
5685 | ARRAY_SIZE(brightness_quirk_table)); | ||
5686 | |||
5956 | /* | 5687 | /* |
5957 | * We always attempt to detect acpi support, so as to switch | 5688 | * We always attempt to detect acpi support, so as to switch |
5958 | * Lenovo Vista BIOS to ACPI brightness mode even if we are not | 5689 | * Lenovo Vista BIOS to ACPI brightness mode even if we are not |
@@ -6009,23 +5740,13 @@ static int __init brightness_init(struct ibm_init_struct *iibm) | |||
6009 | /* TPACPI_BRGHT_MODE_AUTO not implemented yet, just use default */ | 5740 | /* TPACPI_BRGHT_MODE_AUTO not implemented yet, just use default */ |
6010 | if (brightness_mode == TPACPI_BRGHT_MODE_AUTO || | 5741 | if (brightness_mode == TPACPI_BRGHT_MODE_AUTO || |
6011 | brightness_mode == TPACPI_BRGHT_MODE_MAX) { | 5742 | brightness_mode == TPACPI_BRGHT_MODE_MAX) { |
6012 | if (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) { | 5743 | if (quirks & TPACPI_BRGHT_Q_EC) |
6013 | /* | 5744 | brightness_mode = TPACPI_BRGHT_MODE_ECNVRAM; |
6014 | * IBM models that define HBRV probably have | 5745 | else |
6015 | * EC-based backlight level control | ||
6016 | */ | ||
6017 | if (acpi_evalf(ec_handle, NULL, "HBRV", "qd")) | ||
6018 | /* T40-T43, R50-R52, R50e, R51e, X31-X41 */ | ||
6019 | brightness_mode = TPACPI_BRGHT_MODE_ECNVRAM; | ||
6020 | else | ||
6021 | /* all other IBM ThinkPads */ | ||
6022 | brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; | ||
6023 | } else | ||
6024 | /* All Lenovo ThinkPads */ | ||
6025 | brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; | 5746 | brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; |
6026 | 5747 | ||
6027 | dbg_printk(TPACPI_DBG_BRGHT, | 5748 | dbg_printk(TPACPI_DBG_BRGHT, |
6028 | "selected brightness_mode=%d\n", | 5749 | "driver auto-selected brightness_mode=%d\n", |
6029 | brightness_mode); | 5750 | brightness_mode); |
6030 | } | 5751 | } |
6031 | 5752 | ||
@@ -6052,6 +5773,15 @@ static int __init brightness_init(struct ibm_init_struct *iibm) | |||
6052 | vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, | 5773 | vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, |
6053 | "brightness is supported\n"); | 5774 | "brightness is supported\n"); |
6054 | 5775 | ||
5776 | if (quirks & TPACPI_BRGHT_Q_ASK) { | ||
5777 | printk(TPACPI_NOTICE | ||
5778 | "brightness: will use unverified default: " | ||
5779 | "brightness_mode=%d\n", brightness_mode); | ||
5780 | printk(TPACPI_NOTICE | ||
5781 | "brightness: please report to %s whether it works well " | ||
5782 | "or not on your ThinkPad\n", TPACPI_MAIL); | ||
5783 | } | ||
5784 | |||
6055 | ibm_backlight_device->props.max_brightness = | 5785 | ibm_backlight_device->props.max_brightness = |
6056 | (tp_features.bright_16levels)? 15 : 7; | 5786 | (tp_features.bright_16levels)? 15 : 7; |
6057 | ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK; | 5787 | ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK; |
@@ -7854,22 +7584,6 @@ static struct ibm_init_struct ibms_init[] __initdata = { | |||
7854 | .init = light_init, | 7584 | .init = light_init, |
7855 | .data = &light_driver_data, | 7585 | .data = &light_driver_data, |
7856 | }, | 7586 | }, |
7857 | #ifdef CONFIG_THINKPAD_ACPI_DOCK | ||
7858 | { | ||
7859 | .init = dock_init, | ||
7860 | .data = &dock_driver_data[0], | ||
7861 | }, | ||
7862 | { | ||
7863 | .init = dock_init2, | ||
7864 | .data = &dock_driver_data[1], | ||
7865 | }, | ||
7866 | #endif | ||
7867 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
7868 | { | ||
7869 | .init = bay_init, | ||
7870 | .data = &bay_driver_data, | ||
7871 | }, | ||
7872 | #endif | ||
7873 | { | 7587 | { |
7874 | .init = cmos_init, | 7588 | .init = cmos_init, |
7875 | .data = &cmos_driver_data, | 7589 | .data = &cmos_driver_data, |
@@ -7968,12 +7682,6 @@ TPACPI_PARAM(hotkey); | |||
7968 | TPACPI_PARAM(bluetooth); | 7682 | TPACPI_PARAM(bluetooth); |
7969 | TPACPI_PARAM(video); | 7683 | TPACPI_PARAM(video); |
7970 | TPACPI_PARAM(light); | 7684 | TPACPI_PARAM(light); |
7971 | #ifdef CONFIG_THINKPAD_ACPI_DOCK | ||
7972 | TPACPI_PARAM(dock); | ||
7973 | #endif | ||
7974 | #ifdef CONFIG_THINKPAD_ACPI_BAY | ||
7975 | TPACPI_PARAM(bay); | ||
7976 | #endif /* CONFIG_THINKPAD_ACPI_BAY */ | ||
7977 | TPACPI_PARAM(cmos); | 7685 | TPACPI_PARAM(cmos); |
7978 | TPACPI_PARAM(led); | 7686 | TPACPI_PARAM(led); |
7979 | TPACPI_PARAM(beep); | 7687 | TPACPI_PARAM(beep); |
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 7eda34838bfe..bdbc4f73fcdc 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig | |||
@@ -43,6 +43,13 @@ config BATTERY_DS2760 | |||
43 | help | 43 | help |
44 | Say Y here to enable support for batteries with ds2760 chip. | 44 | Say Y here to enable support for batteries with ds2760 chip. |
45 | 45 | ||
46 | config BATTERY_DS2782 | ||
47 | tristate "DS2782 standalone gas-gauge" | ||
48 | depends on I2C | ||
49 | help | ||
50 | Say Y here to enable support for the DS2782 standalone battery | ||
51 | gas-gauge. | ||
52 | |||
46 | config BATTERY_PMU | 53 | config BATTERY_PMU |
47 | tristate "Apple PMU battery" | 54 | tristate "Apple PMU battery" |
48 | depends on PPC32 && ADB_PMU | 55 | depends on PPC32 && ADB_PMU |
diff --git a/drivers/power/Makefile b/drivers/power/Makefile index daf3179689aa..380d17c9ae29 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile | |||
@@ -19,6 +19,7 @@ obj-$(CONFIG_APM_POWER) += apm_power.o | |||
19 | obj-$(CONFIG_WM8350_POWER) += wm8350_power.o | 19 | obj-$(CONFIG_WM8350_POWER) += wm8350_power.o |
20 | 20 | ||
21 | obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o | 21 | obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o |
22 | obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o | ||
22 | obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o | 23 | obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o |
23 | obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o | 24 | obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o |
24 | obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o | 25 | obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o |
diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c new file mode 100644 index 000000000000..da14f374cb60 --- /dev/null +++ b/drivers/power/ds2782_battery.c | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * I2C client/driver for the Maxim/Dallas DS2782 Stand-Alone Fuel Gauge IC | ||
3 | * | ||
4 | * Copyright (C) 2009 Bluewater Systems Ltd | ||
5 | * | ||
6 | * Author: Ryan Mallon <ryan@bluewatersys.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/types.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/swab.h> | ||
19 | #include <linux/i2c.h> | ||
20 | #include <linux/idr.h> | ||
21 | #include <linux/power_supply.h> | ||
22 | |||
23 | #define DS2782_REG_RARC 0x06 /* Remaining active relative capacity */ | ||
24 | |||
25 | #define DS2782_REG_VOLT_MSB 0x0c | ||
26 | #define DS2782_REG_TEMP_MSB 0x0a | ||
27 | #define DS2782_REG_CURRENT_MSB 0x0e | ||
28 | |||
29 | /* EEPROM Block */ | ||
30 | #define DS2782_REG_RSNSP 0x69 /* Sense resistor value */ | ||
31 | |||
32 | /* Current unit measurement in uA for a 1 milli-ohm sense resistor */ | ||
33 | #define DS2782_CURRENT_UNITS 1563 | ||
34 | |||
35 | #define to_ds2782_info(x) container_of(x, struct ds2782_info, battery) | ||
36 | |||
37 | struct ds2782_info { | ||
38 | struct i2c_client *client; | ||
39 | struct power_supply battery; | ||
40 | int id; | ||
41 | }; | ||
42 | |||
43 | static DEFINE_IDR(battery_id); | ||
44 | static DEFINE_MUTEX(battery_lock); | ||
45 | |||
46 | static inline int ds2782_read_reg(struct ds2782_info *info, int reg, u8 *val) | ||
47 | { | ||
48 | int ret; | ||
49 | |||
50 | ret = i2c_smbus_read_byte_data(info->client, reg); | ||
51 | if (ret < 0) { | ||
52 | dev_err(&info->client->dev, "register read failed\n"); | ||
53 | return ret; | ||
54 | } | ||
55 | |||
56 | *val = ret; | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | static inline int ds2782_read_reg16(struct ds2782_info *info, int reg_msb, | ||
61 | s16 *val) | ||
62 | { | ||
63 | int ret; | ||
64 | |||
65 | ret = swab16(i2c_smbus_read_word_data(info->client, reg_msb)); | ||
66 | if (ret < 0) { | ||
67 | dev_err(&info->client->dev, "register read failed\n"); | ||
68 | return ret; | ||
69 | } | ||
70 | |||
71 | *val = ret; | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int ds2782_get_temp(struct ds2782_info *info, int *temp) | ||
76 | { | ||
77 | s16 raw; | ||
78 | int err; | ||
79 | |||
80 | /* | ||
81 | * Temperature is measured in units of 0.125 degrees celcius, the | ||
82 | * power_supply class measures temperature in tenths of degrees | ||
83 | * celsius. The temperature value is stored as a 10 bit number, plus | ||
84 | * sign in the upper bits of a 16 bit register. | ||
85 | */ | ||
86 | err = ds2782_read_reg16(info, DS2782_REG_TEMP_MSB, &raw); | ||
87 | if (err) | ||
88 | return err; | ||
89 | *temp = ((raw / 32) * 125) / 100; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int ds2782_get_current(struct ds2782_info *info, int *current_uA) | ||
94 | { | ||
95 | int sense_res; | ||
96 | int err; | ||
97 | u8 sense_res_raw; | ||
98 | s16 raw; | ||
99 | |||
100 | /* | ||
101 | * The units of measurement for current are dependent on the value of | ||
102 | * the sense resistor. | ||
103 | */ | ||
104 | err = ds2782_read_reg(info, DS2782_REG_RSNSP, &sense_res_raw); | ||
105 | if (err) | ||
106 | return err; | ||
107 | if (sense_res_raw == 0) { | ||
108 | dev_err(&info->client->dev, "sense resistor value is 0\n"); | ||
109 | return -ENXIO; | ||
110 | } | ||
111 | sense_res = 1000 / sense_res_raw; | ||
112 | |||
113 | dev_dbg(&info->client->dev, "sense resistor = %d milli-ohms\n", | ||
114 | sense_res); | ||
115 | err = ds2782_read_reg16(info, DS2782_REG_CURRENT_MSB, &raw); | ||
116 | if (err) | ||
117 | return err; | ||
118 | *current_uA = raw * (DS2782_CURRENT_UNITS / sense_res); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static int ds2782_get_voltage(struct ds2782_info *info, int *voltage_uA) | ||
123 | { | ||
124 | s16 raw; | ||
125 | int err; | ||
126 | |||
127 | /* | ||
128 | * Voltage is measured in units of 4.88mV. The voltage is stored as | ||
129 | * a 10-bit number plus sign, in the upper bits of a 16-bit register | ||
130 | */ | ||
131 | err = ds2782_read_reg16(info, DS2782_REG_VOLT_MSB, &raw); | ||
132 | if (err) | ||
133 | return err; | ||
134 | *voltage_uA = (raw / 32) * 4800; | ||
135 | return 0; | ||
136 | } | ||
137 | |||
138 | static int ds2782_get_capacity(struct ds2782_info *info, int *capacity) | ||
139 | { | ||
140 | int err; | ||
141 | u8 raw; | ||
142 | |||
143 | err = ds2782_read_reg(info, DS2782_REG_RARC, &raw); | ||
144 | if (err) | ||
145 | return err; | ||
146 | *capacity = raw; | ||
147 | return raw; | ||
148 | } | ||
149 | |||
150 | static int ds2782_get_status(struct ds2782_info *info, int *status) | ||
151 | { | ||
152 | int err; | ||
153 | int current_uA; | ||
154 | int capacity; | ||
155 | |||
156 | err = ds2782_get_current(info, ¤t_uA); | ||
157 | if (err) | ||
158 | return err; | ||
159 | |||
160 | err = ds2782_get_capacity(info, &capacity); | ||
161 | if (err) | ||
162 | return err; | ||
163 | |||
164 | if (capacity == 100) | ||
165 | *status = POWER_SUPPLY_STATUS_FULL; | ||
166 | else if (current_uA == 0) | ||
167 | *status = POWER_SUPPLY_STATUS_NOT_CHARGING; | ||
168 | else if (current_uA < 0) | ||
169 | *status = POWER_SUPPLY_STATUS_DISCHARGING; | ||
170 | else | ||
171 | *status = POWER_SUPPLY_STATUS_CHARGING; | ||
172 | |||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | static int ds2782_battery_get_property(struct power_supply *psy, | ||
177 | enum power_supply_property prop, | ||
178 | union power_supply_propval *val) | ||
179 | { | ||
180 | struct ds2782_info *info = to_ds2782_info(psy); | ||
181 | int ret; | ||
182 | |||
183 | switch (prop) { | ||
184 | case POWER_SUPPLY_PROP_STATUS: | ||
185 | ret = ds2782_get_status(info, &val->intval); | ||
186 | break; | ||
187 | |||
188 | case POWER_SUPPLY_PROP_CAPACITY: | ||
189 | ret = ds2782_get_capacity(info, &val->intval); | ||
190 | break; | ||
191 | |||
192 | case POWER_SUPPLY_PROP_VOLTAGE_NOW: | ||
193 | ret = ds2782_get_voltage(info, &val->intval); | ||
194 | break; | ||
195 | |||
196 | case POWER_SUPPLY_PROP_CURRENT_NOW: | ||
197 | ret = ds2782_get_current(info, &val->intval); | ||
198 | break; | ||
199 | |||
200 | case POWER_SUPPLY_PROP_TEMP: | ||
201 | ret = ds2782_get_temp(info, &val->intval); | ||
202 | break; | ||
203 | |||
204 | default: | ||
205 | ret = -EINVAL; | ||
206 | } | ||
207 | |||
208 | return ret; | ||
209 | } | ||
210 | |||
211 | static enum power_supply_property ds2782_battery_props[] = { | ||
212 | POWER_SUPPLY_PROP_STATUS, | ||
213 | POWER_SUPPLY_PROP_CAPACITY, | ||
214 | POWER_SUPPLY_PROP_VOLTAGE_NOW, | ||
215 | POWER_SUPPLY_PROP_CURRENT_NOW, | ||
216 | POWER_SUPPLY_PROP_TEMP, | ||
217 | }; | ||
218 | |||
219 | static void ds2782_power_supply_init(struct power_supply *battery) | ||
220 | { | ||
221 | battery->type = POWER_SUPPLY_TYPE_BATTERY; | ||
222 | battery->properties = ds2782_battery_props; | ||
223 | battery->num_properties = ARRAY_SIZE(ds2782_battery_props); | ||
224 | battery->get_property = ds2782_battery_get_property; | ||
225 | battery->external_power_changed = NULL; | ||
226 | } | ||
227 | |||
228 | static int ds2782_battery_remove(struct i2c_client *client) | ||
229 | { | ||
230 | struct ds2782_info *info = i2c_get_clientdata(client); | ||
231 | |||
232 | power_supply_unregister(&info->battery); | ||
233 | kfree(info->battery.name); | ||
234 | |||
235 | mutex_lock(&battery_lock); | ||
236 | idr_remove(&battery_id, info->id); | ||
237 | mutex_unlock(&battery_lock); | ||
238 | |||
239 | i2c_set_clientdata(client, info); | ||
240 | |||
241 | kfree(info); | ||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | static int ds2782_battery_probe(struct i2c_client *client, | ||
246 | const struct i2c_device_id *id) | ||
247 | { | ||
248 | struct ds2782_info *info; | ||
249 | int ret; | ||
250 | int num; | ||
251 | |||
252 | /* Get an ID for this battery */ | ||
253 | ret = idr_pre_get(&battery_id, GFP_KERNEL); | ||
254 | if (ret == 0) { | ||
255 | ret = -ENOMEM; | ||
256 | goto fail_id; | ||
257 | } | ||
258 | |||
259 | mutex_lock(&battery_lock); | ||
260 | ret = idr_get_new(&battery_id, client, &num); | ||
261 | mutex_unlock(&battery_lock); | ||
262 | if (ret < 0) | ||
263 | goto fail_id; | ||
264 | |||
265 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
266 | if (!info) { | ||
267 | ret = -ENOMEM; | ||
268 | goto fail_info; | ||
269 | } | ||
270 | |||
271 | info->battery.name = kasprintf(GFP_KERNEL, "ds2782-%d", num); | ||
272 | if (!info->battery.name) { | ||
273 | ret = -ENOMEM; | ||
274 | goto fail_name; | ||
275 | } | ||
276 | |||
277 | i2c_set_clientdata(client, info); | ||
278 | info->client = client; | ||
279 | ds2782_power_supply_init(&info->battery); | ||
280 | |||
281 | ret = power_supply_register(&client->dev, &info->battery); | ||
282 | if (ret) { | ||
283 | dev_err(&client->dev, "failed to register battery\n"); | ||
284 | goto fail_register; | ||
285 | } | ||
286 | |||
287 | return 0; | ||
288 | |||
289 | fail_register: | ||
290 | kfree(info->battery.name); | ||
291 | fail_name: | ||
292 | i2c_set_clientdata(client, info); | ||
293 | kfree(info); | ||
294 | fail_info: | ||
295 | mutex_lock(&battery_lock); | ||
296 | idr_remove(&battery_id, num); | ||
297 | mutex_unlock(&battery_lock); | ||
298 | fail_id: | ||
299 | return ret; | ||
300 | } | ||
301 | |||
302 | static const struct i2c_device_id ds2782_id[] = { | ||
303 | {"ds2782", 0}, | ||
304 | {}, | ||
305 | }; | ||
306 | |||
307 | static struct i2c_driver ds2782_battery_driver = { | ||
308 | .driver = { | ||
309 | .name = "ds2782-battery", | ||
310 | }, | ||
311 | .probe = ds2782_battery_probe, | ||
312 | .remove = ds2782_battery_remove, | ||
313 | .id_table = ds2782_id, | ||
314 | }; | ||
315 | |||
316 | static int __init ds2782_init(void) | ||
317 | { | ||
318 | return i2c_add_driver(&ds2782_battery_driver); | ||
319 | } | ||
320 | module_init(ds2782_init); | ||
321 | |||
322 | static void __exit ds2782_exit(void) | ||
323 | { | ||
324 | i2c_del_driver(&ds2782_battery_driver); | ||
325 | } | ||
326 | module_exit(ds2782_exit); | ||
327 | |||
328 | MODULE_AUTHOR("Ryan Mallon <ryan@bluewatersys.com>"); | ||
329 | MODULE_DESCRIPTION("Maxim/Dallas DS2782 Stand-Alone Fuel Gauage IC driver"); | ||
330 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 5fbca2681baa..58e419299cd6 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
12 | #include <linux/err.h> | 13 | #include <linux/err.h> |
13 | #include <linux/platform_device.h> | 14 | #include <linux/platform_device.h> |
@@ -35,6 +36,7 @@ | |||
35 | #define BAT_STAT_AC 0x10 | 36 | #define BAT_STAT_AC 0x10 |
36 | #define BAT_STAT_CHARGING 0x20 | 37 | #define BAT_STAT_CHARGING 0x20 |
37 | #define BAT_STAT_DISCHARGING 0x40 | 38 | #define BAT_STAT_DISCHARGING 0x40 |
39 | #define BAT_STAT_TRICKLE 0x80 | ||
38 | 40 | ||
39 | #define BAT_ERR_INFOFAIL 0x02 | 41 | #define BAT_ERR_INFOFAIL 0x02 |
40 | #define BAT_ERR_OVERVOLTAGE 0x04 | 42 | #define BAT_ERR_OVERVOLTAGE 0x04 |
@@ -89,7 +91,7 @@ static char bat_serial[17]; /* Ick */ | |||
89 | static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte) | 91 | static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte) |
90 | { | 92 | { |
91 | if (olpc_platform_info.ecver > 0x44) { | 93 | if (olpc_platform_info.ecver > 0x44) { |
92 | if (ec_byte & BAT_STAT_CHARGING) | 94 | if (ec_byte & (BAT_STAT_CHARGING | BAT_STAT_TRICKLE)) |
93 | val->intval = POWER_SUPPLY_STATUS_CHARGING; | 95 | val->intval = POWER_SUPPLY_STATUS_CHARGING; |
94 | else if (ec_byte & BAT_STAT_DISCHARGING) | 96 | else if (ec_byte & BAT_STAT_DISCHARGING) |
95 | val->intval = POWER_SUPPLY_STATUS_DISCHARGING; | 97 | val->intval = POWER_SUPPLY_STATUS_DISCHARGING; |
@@ -219,7 +221,8 @@ static int olpc_bat_get_property(struct power_supply *psy, | |||
219 | It doesn't matter though -- the EC will return the last-known | 221 | It doesn't matter though -- the EC will return the last-known |
220 | information, and it's as if we just ran that _little_ bit faster | 222 | information, and it's as if we just ran that _little_ bit faster |
221 | and managed to read it out before the battery went away. */ | 223 | and managed to read it out before the battery went away. */ |
222 | if (!(ec_byte & BAT_STAT_PRESENT) && psp != POWER_SUPPLY_PROP_PRESENT) | 224 | if (!(ec_byte & (BAT_STAT_PRESENT | BAT_STAT_TRICKLE)) && |
225 | psp != POWER_SUPPLY_PROP_PRESENT) | ||
223 | return -ENODEV; | 226 | return -ENODEV; |
224 | 227 | ||
225 | switch (psp) { | 228 | switch (psp) { |
@@ -229,7 +232,8 @@ static int olpc_bat_get_property(struct power_supply *psy, | |||
229 | return ret; | 232 | return ret; |
230 | break; | 233 | break; |
231 | case POWER_SUPPLY_PROP_PRESENT: | 234 | case POWER_SUPPLY_PROP_PRESENT: |
232 | val->intval = !!(ec_byte & BAT_STAT_PRESENT); | 235 | val->intval = !!(ec_byte & (BAT_STAT_PRESENT | |
236 | BAT_STAT_TRICKLE)); | ||
233 | break; | 237 | break; |
234 | 238 | ||
235 | case POWER_SUPPLY_PROP_HEALTH: | 239 | case POWER_SUPPLY_PROP_HEALTH: |
@@ -334,21 +338,21 @@ static ssize_t olpc_bat_eeprom_read(struct kobject *kobj, | |||
334 | struct bin_attribute *attr, char *buf, loff_t off, size_t count) | 338 | struct bin_attribute *attr, char *buf, loff_t off, size_t count) |
335 | { | 339 | { |
336 | uint8_t ec_byte; | 340 | uint8_t ec_byte; |
337 | int ret, end; | 341 | int ret; |
342 | int i; | ||
338 | 343 | ||
339 | if (off >= EEPROM_SIZE) | 344 | if (off >= EEPROM_SIZE) |
340 | return 0; | 345 | return 0; |
341 | if (off + count > EEPROM_SIZE) | 346 | if (off + count > EEPROM_SIZE) |
342 | count = EEPROM_SIZE - off; | 347 | count = EEPROM_SIZE - off; |
343 | 348 | ||
344 | end = EEPROM_START + off + count; | 349 | for (i = 0; i < count; i++) { |
345 | for (ec_byte = EEPROM_START + off; ec_byte < end; ec_byte++) { | 350 | ec_byte = EEPROM_START + off + i; |
346 | ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, | 351 | ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &buf[i], 1); |
347 | &buf[ec_byte - EEPROM_START], 1); | ||
348 | if (ret) { | 352 | if (ret) { |
349 | printk(KERN_ERR "olpc-battery: EC command " | 353 | pr_err("olpc-battery: " |
350 | "EC_BAT_EEPROM @ 0x%x failed -" | 354 | "EC_BAT_EEPROM cmd @ 0x%x failed - %d!\n", |
351 | " %d!\n", ec_byte, ret); | 355 | ec_byte, ret); |
352 | return -EIO; | 356 | return -EIO; |
353 | } | 357 | } |
354 | } | 358 | } |
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 8030e25152fb..c75d6f35cb5f 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c | |||
@@ -553,40 +553,35 @@ static void _zfcp_erp_unit_reopen_all(struct zfcp_port *port, int clear, | |||
553 | _zfcp_erp_unit_reopen(unit, clear, id, ref); | 553 | _zfcp_erp_unit_reopen(unit, clear, id, ref); |
554 | } | 554 | } |
555 | 555 | ||
556 | static void zfcp_erp_strategy_followup_actions(struct zfcp_erp_action *act) | 556 | static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) |
557 | { | 557 | { |
558 | struct zfcp_adapter *adapter = act->adapter; | ||
559 | struct zfcp_port *port = act->port; | ||
560 | struct zfcp_unit *unit = act->unit; | ||
561 | u32 status = act->status; | ||
562 | |||
563 | /* initiate follow-up actions depending on success of finished action */ | ||
564 | switch (act->action) { | 558 | switch (act->action) { |
565 | |||
566 | case ZFCP_ERP_ACTION_REOPEN_ADAPTER: | 559 | case ZFCP_ERP_ACTION_REOPEN_ADAPTER: |
567 | if (status == ZFCP_ERP_SUCCEEDED) | 560 | _zfcp_erp_adapter_reopen(act->adapter, 0, "ersff_1", NULL); |
568 | _zfcp_erp_port_reopen_all(adapter, 0, "ersfa_1", NULL); | ||
569 | else | ||
570 | _zfcp_erp_adapter_reopen(adapter, 0, "ersfa_2", NULL); | ||
571 | break; | 561 | break; |
572 | |||
573 | case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: | 562 | case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: |
574 | if (status == ZFCP_ERP_SUCCEEDED) | 563 | _zfcp_erp_port_forced_reopen(act->port, 0, "ersff_2", NULL); |
575 | _zfcp_erp_port_reopen(port, 0, "ersfa_3", NULL); | ||
576 | else | ||
577 | _zfcp_erp_adapter_reopen(adapter, 0, "ersfa_4", NULL); | ||
578 | break; | 564 | break; |
579 | |||
580 | case ZFCP_ERP_ACTION_REOPEN_PORT: | 565 | case ZFCP_ERP_ACTION_REOPEN_PORT: |
581 | if (status == ZFCP_ERP_SUCCEEDED) | 566 | _zfcp_erp_port_reopen(act->port, 0, "ersff_3", NULL); |
582 | _zfcp_erp_unit_reopen_all(port, 0, "ersfa_5", NULL); | ||
583 | else | ||
584 | _zfcp_erp_port_forced_reopen(port, 0, "ersfa_6", NULL); | ||
585 | break; | 567 | break; |
586 | |||
587 | case ZFCP_ERP_ACTION_REOPEN_UNIT: | 568 | case ZFCP_ERP_ACTION_REOPEN_UNIT: |
588 | if (status != ZFCP_ERP_SUCCEEDED) | 569 | _zfcp_erp_unit_reopen(act->unit, 0, "ersff_4", NULL); |
589 | _zfcp_erp_port_reopen(unit->port, 0, "ersfa_7", NULL); | 570 | break; |
571 | } | ||
572 | } | ||
573 | |||
574 | static void zfcp_erp_strategy_followup_success(struct zfcp_erp_action *act) | ||
575 | { | ||
576 | switch (act->action) { | ||
577 | case ZFCP_ERP_ACTION_REOPEN_ADAPTER: | ||
578 | _zfcp_erp_port_reopen_all(act->adapter, 0, "ersfs_1", NULL); | ||
579 | break; | ||
580 | case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: | ||
581 | _zfcp_erp_port_reopen(act->port, 0, "ersfs_2", NULL); | ||
582 | break; | ||
583 | case ZFCP_ERP_ACTION_REOPEN_PORT: | ||
584 | _zfcp_erp_unit_reopen_all(act->port, 0, "ersfs_3", NULL); | ||
590 | break; | 585 | break; |
591 | } | 586 | } |
592 | } | 587 | } |
@@ -801,7 +796,7 @@ static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action) | |||
801 | return ZFCP_ERP_FAILED; | 796 | return ZFCP_ERP_FAILED; |
802 | 797 | ||
803 | case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: | 798 | case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: |
804 | if (status & ZFCP_STATUS_PORT_PHYS_OPEN) | 799 | if (!(status & ZFCP_STATUS_PORT_PHYS_OPEN)) |
805 | return ZFCP_ERP_SUCCEEDED; | 800 | return ZFCP_ERP_SUCCEEDED; |
806 | } | 801 | } |
807 | return ZFCP_ERP_FAILED; | 802 | return ZFCP_ERP_FAILED; |
@@ -853,11 +848,17 @@ void zfcp_erp_port_strategy_open_lookup(struct work_struct *work) | |||
853 | gid_pn_work); | 848 | gid_pn_work); |
854 | 849 | ||
855 | retval = zfcp_fc_ns_gid_pn(&port->erp_action); | 850 | retval = zfcp_fc_ns_gid_pn(&port->erp_action); |
856 | if (retval == -ENOMEM) | 851 | if (!retval) { |
857 | zfcp_erp_notify(&port->erp_action, ZFCP_ERP_NOMEM); | 852 | port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP; |
858 | port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP; | 853 | goto out; |
859 | if (retval) | 854 | } |
860 | zfcp_erp_notify(&port->erp_action, ZFCP_ERP_FAILED); | 855 | if (retval == -ENOMEM) { |
856 | zfcp_erp_notify(&port->erp_action, ZFCP_STATUS_ERP_LOWMEM); | ||
857 | goto out; | ||
858 | } | ||
859 | /* all other error condtions */ | ||
860 | zfcp_erp_notify(&port->erp_action, 0); | ||
861 | out: | ||
861 | zfcp_port_put(port); | 862 | zfcp_port_put(port); |
862 | } | 863 | } |
863 | 864 | ||
@@ -1289,7 +1290,10 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) | |||
1289 | retval = zfcp_erp_strategy_statechange(erp_action, retval); | 1290 | retval = zfcp_erp_strategy_statechange(erp_action, retval); |
1290 | if (retval == ZFCP_ERP_EXIT) | 1291 | if (retval == ZFCP_ERP_EXIT) |
1291 | goto unlock; | 1292 | goto unlock; |
1292 | zfcp_erp_strategy_followup_actions(erp_action); | 1293 | if (retval == ZFCP_ERP_SUCCEEDED) |
1294 | zfcp_erp_strategy_followup_success(erp_action); | ||
1295 | if (retval == ZFCP_ERP_FAILED) | ||
1296 | zfcp_erp_strategy_followup_failed(erp_action); | ||
1293 | 1297 | ||
1294 | unlock: | 1298 | unlock: |
1295 | write_unlock(&adapter->erp_lock); | 1299 | write_unlock(&adapter->erp_lock); |
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 2f0705d76b72..47daebfa7e59 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c | |||
@@ -79,11 +79,9 @@ static int zfcp_wka_port_get(struct zfcp_wka_port *wka_port) | |||
79 | 79 | ||
80 | mutex_unlock(&wka_port->mutex); | 80 | mutex_unlock(&wka_port->mutex); |
81 | 81 | ||
82 | wait_event_timeout( | 82 | wait_event(wka_port->completion_wq, |
83 | wka_port->completion_wq, | 83 | wka_port->status == ZFCP_WKA_PORT_ONLINE || |
84 | wka_port->status == ZFCP_WKA_PORT_ONLINE || | 84 | wka_port->status == ZFCP_WKA_PORT_OFFLINE); |
85 | wka_port->status == ZFCP_WKA_PORT_OFFLINE, | ||
86 | HZ >> 1); | ||
87 | 85 | ||
88 | if (wka_port->status == ZFCP_WKA_PORT_ONLINE) { | 86 | if (wka_port->status == ZFCP_WKA_PORT_ONLINE) { |
89 | atomic_inc(&wka_port->refcount); | 87 | atomic_inc(&wka_port->refcount); |
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c57658f3d34f..47795fbf081f 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c | |||
@@ -670,8 +670,11 @@ static int zfcp_fsf_req_sbal_get(struct zfcp_adapter *adapter) | |||
670 | zfcp_fsf_sbal_check(adapter), 5 * HZ); | 670 | zfcp_fsf_sbal_check(adapter), 5 * HZ); |
671 | if (ret > 0) | 671 | if (ret > 0) |
672 | return 0; | 672 | return 0; |
673 | if (!ret) | 673 | if (!ret) { |
674 | atomic_inc(&adapter->qdio_outb_full); | 674 | atomic_inc(&adapter->qdio_outb_full); |
675 | /* assume hanging outbound queue, try queue recovery */ | ||
676 | zfcp_erp_adapter_reopen(adapter, 0, "fsrsg_1", NULL); | ||
677 | } | ||
675 | 678 | ||
676 | spin_lock_bh(&adapter->req_q_lock); | 679 | spin_lock_bh(&adapter->req_q_lock); |
677 | return -EIO; | 680 | return -EIO; |
@@ -722,7 +725,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_adapter *adapter, | |||
722 | req = zfcp_fsf_alloc_qtcb(pool); | 725 | req = zfcp_fsf_alloc_qtcb(pool); |
723 | 726 | ||
724 | if (unlikely(!req)) | 727 | if (unlikely(!req)) |
725 | return ERR_PTR(-EIO); | 728 | return ERR_PTR(-ENOMEM); |
726 | 729 | ||
727 | if (adapter->req_no == 0) | 730 | if (adapter->req_no == 0) |
728 | adapter->req_no++; | 731 | adapter->req_no++; |
@@ -1010,6 +1013,23 @@ skip_fsfstatus: | |||
1010 | send_ct->handler(send_ct->handler_data); | 1013 | send_ct->handler(send_ct->handler_data); |
1011 | } | 1014 | } |
1012 | 1015 | ||
1016 | static void zfcp_fsf_setup_ct_els_unchained(struct qdio_buffer_element *sbale, | ||
1017 | struct scatterlist *sg_req, | ||
1018 | struct scatterlist *sg_resp) | ||
1019 | { | ||
1020 | sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE_READ; | ||
1021 | sbale[2].addr = sg_virt(sg_req); | ||
1022 | sbale[2].length = sg_req->length; | ||
1023 | sbale[3].addr = sg_virt(sg_resp); | ||
1024 | sbale[3].length = sg_resp->length; | ||
1025 | sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; | ||
1026 | } | ||
1027 | |||
1028 | static int zfcp_fsf_one_sbal(struct scatterlist *sg) | ||
1029 | { | ||
1030 | return sg_is_last(sg) && sg->length <= PAGE_SIZE; | ||
1031 | } | ||
1032 | |||
1013 | static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, | 1033 | static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, |
1014 | struct scatterlist *sg_req, | 1034 | struct scatterlist *sg_req, |
1015 | struct scatterlist *sg_resp, | 1035 | struct scatterlist *sg_resp, |
@@ -1020,30 +1040,30 @@ static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, | |||
1020 | int bytes; | 1040 | int bytes; |
1021 | 1041 | ||
1022 | if (!(feat & FSF_FEATURE_ELS_CT_CHAINED_SBALS)) { | 1042 | if (!(feat & FSF_FEATURE_ELS_CT_CHAINED_SBALS)) { |
1023 | if (sg_req->length > PAGE_SIZE || sg_resp->length > PAGE_SIZE || | 1043 | if (!zfcp_fsf_one_sbal(sg_req) || !zfcp_fsf_one_sbal(sg_resp)) |
1024 | !sg_is_last(sg_req) || !sg_is_last(sg_resp)) | ||
1025 | return -EOPNOTSUPP; | 1044 | return -EOPNOTSUPP; |
1026 | 1045 | ||
1027 | sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE_READ; | 1046 | zfcp_fsf_setup_ct_els_unchained(sbale, sg_req, sg_resp); |
1028 | sbale[2].addr = sg_virt(sg_req); | 1047 | return 0; |
1029 | sbale[2].length = sg_req->length; | 1048 | } |
1030 | sbale[3].addr = sg_virt(sg_resp); | 1049 | |
1031 | sbale[3].length = sg_resp->length; | 1050 | /* use single, unchained SBAL if it can hold the request */ |
1032 | sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; | 1051 | if (zfcp_fsf_one_sbal(sg_req) && zfcp_fsf_one_sbal(sg_resp)) { |
1052 | zfcp_fsf_setup_ct_els_unchained(sbale, sg_req, sg_resp); | ||
1033 | return 0; | 1053 | return 0; |
1034 | } | 1054 | } |
1035 | 1055 | ||
1036 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, | 1056 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, |
1037 | sg_req, max_sbals); | 1057 | sg_req, max_sbals); |
1038 | if (bytes <= 0) | 1058 | if (bytes <= 0) |
1039 | return -ENOMEM; | 1059 | return -EIO; |
1040 | req->qtcb->bottom.support.req_buf_length = bytes; | 1060 | req->qtcb->bottom.support.req_buf_length = bytes; |
1041 | req->sbale_curr = ZFCP_LAST_SBALE_PER_SBAL; | 1061 | req->sbale_curr = ZFCP_LAST_SBALE_PER_SBAL; |
1042 | 1062 | ||
1043 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, | 1063 | bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, |
1044 | sg_resp, max_sbals); | 1064 | sg_resp, max_sbals); |
1045 | if (bytes <= 0) | 1065 | if (bytes <= 0) |
1046 | return -ENOMEM; | 1066 | return -EIO; |
1047 | req->qtcb->bottom.support.resp_buf_length = bytes; | 1067 | req->qtcb->bottom.support.resp_buf_length = bytes; |
1048 | 1068 | ||
1049 | return 0; | 1069 | return 0; |
@@ -1607,10 +1627,10 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req) | |||
1607 | case FSF_ACCESS_DENIED: | 1627 | case FSF_ACCESS_DENIED: |
1608 | wka_port->status = ZFCP_WKA_PORT_OFFLINE; | 1628 | wka_port->status = ZFCP_WKA_PORT_OFFLINE; |
1609 | break; | 1629 | break; |
1610 | case FSF_PORT_ALREADY_OPEN: | ||
1611 | break; | ||
1612 | case FSF_GOOD: | 1630 | case FSF_GOOD: |
1613 | wka_port->handle = header->port_handle; | 1631 | wka_port->handle = header->port_handle; |
1632 | /* fall through */ | ||
1633 | case FSF_PORT_ALREADY_OPEN: | ||
1614 | wka_port->status = ZFCP_WKA_PORT_ONLINE; | 1634 | wka_port->status = ZFCP_WKA_PORT_ONLINE; |
1615 | } | 1635 | } |
1616 | out: | 1636 | out: |
@@ -1731,15 +1751,16 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req) | |||
1731 | zfcp_fsf_access_denied_port(req, port); | 1751 | zfcp_fsf_access_denied_port(req, port); |
1732 | break; | 1752 | break; |
1733 | case FSF_PORT_BOXED: | 1753 | case FSF_PORT_BOXED: |
1734 | zfcp_erp_port_boxed(port, "fscpph2", req); | ||
1735 | req->status |= ZFCP_STATUS_FSFREQ_ERROR | | ||
1736 | ZFCP_STATUS_FSFREQ_RETRY; | ||
1737 | /* can't use generic zfcp_erp_modify_port_status because | 1754 | /* can't use generic zfcp_erp_modify_port_status because |
1738 | * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */ | 1755 | * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */ |
1739 | atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status); | 1756 | atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status); |
1740 | list_for_each_entry(unit, &port->unit_list_head, list) | 1757 | list_for_each_entry(unit, &port->unit_list_head, list) |
1741 | atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, | 1758 | atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, |
1742 | &unit->status); | 1759 | &unit->status); |
1760 | zfcp_erp_port_boxed(port, "fscpph2", req); | ||
1761 | req->status |= ZFCP_STATUS_FSFREQ_ERROR | | ||
1762 | ZFCP_STATUS_FSFREQ_RETRY; | ||
1763 | |||
1743 | break; | 1764 | break; |
1744 | case FSF_ADAPTER_STATUS_AVAILABLE: | 1765 | case FSF_ADAPTER_STATUS_AVAILABLE: |
1745 | switch (header->fsf_status_qual.word[0]) { | 1766 | switch (header->fsf_status_qual.word[0]) { |
@@ -2541,7 +2562,6 @@ struct zfcp_fsf_req *zfcp_fsf_control_file(struct zfcp_adapter *adapter, | |||
2541 | bytes = zfcp_qdio_sbals_from_sg(req, direction, fsf_cfdc->sg, | 2562 | bytes = zfcp_qdio_sbals_from_sg(req, direction, fsf_cfdc->sg, |
2542 | FSF_MAX_SBALS_PER_REQ); | 2563 | FSF_MAX_SBALS_PER_REQ); |
2543 | if (bytes != ZFCP_CFDC_MAX_SIZE) { | 2564 | if (bytes != ZFCP_CFDC_MAX_SIZE) { |
2544 | retval = -ENOMEM; | ||
2545 | zfcp_fsf_req_free(req); | 2565 | zfcp_fsf_req_free(req); |
2546 | goto out; | 2566 | goto out; |
2547 | } | 2567 | } |
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 967ede73f4c5..6925a1784682 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c | |||
@@ -167,20 +167,21 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
167 | struct zfcp_unit *unit = scpnt->device->hostdata; | 167 | struct zfcp_unit *unit = scpnt->device->hostdata; |
168 | struct zfcp_fsf_req *old_req, *abrt_req; | 168 | struct zfcp_fsf_req *old_req, *abrt_req; |
169 | unsigned long flags; | 169 | unsigned long flags; |
170 | unsigned long old_req_id = (unsigned long) scpnt->host_scribble; | 170 | unsigned long old_reqid = (unsigned long) scpnt->host_scribble; |
171 | int retval = SUCCESS; | 171 | int retval = SUCCESS; |
172 | int retry = 3; | 172 | int retry = 3; |
173 | char *dbf_tag; | ||
173 | 174 | ||
174 | /* avoid race condition between late normal completion and abort */ | 175 | /* avoid race condition between late normal completion and abort */ |
175 | write_lock_irqsave(&adapter->abort_lock, flags); | 176 | write_lock_irqsave(&adapter->abort_lock, flags); |
176 | 177 | ||
177 | spin_lock(&adapter->req_list_lock); | 178 | spin_lock(&adapter->req_list_lock); |
178 | old_req = zfcp_reqlist_find(adapter, old_req_id); | 179 | old_req = zfcp_reqlist_find(adapter, old_reqid); |
179 | spin_unlock(&adapter->req_list_lock); | 180 | spin_unlock(&adapter->req_list_lock); |
180 | if (!old_req) { | 181 | if (!old_req) { |
181 | write_unlock_irqrestore(&adapter->abort_lock, flags); | 182 | write_unlock_irqrestore(&adapter->abort_lock, flags); |
182 | zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, | 183 | zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, |
183 | old_req_id); | 184 | old_reqid); |
184 | return FAILED; /* completion could be in progress */ | 185 | return FAILED; /* completion could be in progress */ |
185 | } | 186 | } |
186 | old_req->data = NULL; | 187 | old_req->data = NULL; |
@@ -189,7 +190,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
189 | write_unlock_irqrestore(&adapter->abort_lock, flags); | 190 | write_unlock_irqrestore(&adapter->abort_lock, flags); |
190 | 191 | ||
191 | while (retry--) { | 192 | while (retry--) { |
192 | abrt_req = zfcp_fsf_abort_fcp_command(old_req_id, unit); | 193 | abrt_req = zfcp_fsf_abort_fcp_command(old_reqid, unit); |
193 | if (abrt_req) | 194 | if (abrt_req) |
194 | break; | 195 | break; |
195 | 196 | ||
@@ -197,7 +198,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
197 | if (!(atomic_read(&adapter->status) & | 198 | if (!(atomic_read(&adapter->status) & |
198 | ZFCP_STATUS_COMMON_RUNNING)) { | 199 | ZFCP_STATUS_COMMON_RUNNING)) { |
199 | zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, | 200 | zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, |
200 | old_req_id); | 201 | old_reqid); |
201 | return SUCCESS; | 202 | return SUCCESS; |
202 | } | 203 | } |
203 | } | 204 | } |
@@ -208,13 +209,14 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |||
208 | abrt_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); | 209 | abrt_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); |
209 | 210 | ||
210 | if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) | 211 | if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) |
211 | zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, abrt_req, 0); | 212 | dbf_tag = "okay"; |
212 | else if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) | 213 | else if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) |
213 | zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, abrt_req, 0); | 214 | dbf_tag = "lte2"; |
214 | else { | 215 | else { |
215 | zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, abrt_req, 0); | 216 | dbf_tag = "fail"; |
216 | retval = FAILED; | 217 | retval = FAILED; |
217 | } | 218 | } |
219 | zfcp_scsi_dbf_event_abort(dbf_tag, adapter, scpnt, abrt_req, old_reqid); | ||
218 | zfcp_fsf_req_free(abrt_req); | 220 | zfcp_fsf_req_free(abrt_req); |
219 | return retval; | 221 | return retval; |
220 | } | 222 | } |
@@ -534,6 +536,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port) | |||
534 | struct fc_rport_identifiers ids; | 536 | struct fc_rport_identifiers ids; |
535 | struct fc_rport *rport; | 537 | struct fc_rport *rport; |
536 | 538 | ||
539 | if (port->rport) | ||
540 | return; | ||
541 | |||
537 | ids.node_name = port->wwnn; | 542 | ids.node_name = port->wwnn; |
538 | ids.port_name = port->wwpn; | 543 | ids.port_name = port->wwpn; |
539 | ids.port_id = port->d_id; | 544 | ids.port_id = port->d_id; |
@@ -557,8 +562,10 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port) | |||
557 | { | 562 | { |
558 | struct fc_rport *rport = port->rport; | 563 | struct fc_rport *rport = port->rport; |
559 | 564 | ||
560 | if (rport) | 565 | if (rport) { |
561 | fc_remote_port_delete(rport); | 566 | fc_remote_port_delete(rport); |
567 | port->rport = NULL; | ||
568 | } | ||
562 | } | 569 | } |
563 | 570 | ||
564 | void zfcp_scsi_schedule_rport_register(struct zfcp_port *port) | 571 | void zfcp_scsi_schedule_rport_register(struct zfcp_port *port) |
diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 3e51e64d1108..0fe5cce818cb 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c | |||
@@ -494,9 +494,14 @@ static ssize_t zfcp_sysfs_adapter_q_full_show(struct device *dev, | |||
494 | struct Scsi_Host *scsi_host = class_to_shost(dev); | 494 | struct Scsi_Host *scsi_host = class_to_shost(dev); |
495 | struct zfcp_adapter *adapter = | 495 | struct zfcp_adapter *adapter = |
496 | (struct zfcp_adapter *) scsi_host->hostdata[0]; | 496 | (struct zfcp_adapter *) scsi_host->hostdata[0]; |
497 | u64 util; | ||
498 | |||
499 | spin_lock_bh(&adapter->qdio_stat_lock); | ||
500 | util = adapter->req_q_util; | ||
501 | spin_unlock_bh(&adapter->qdio_stat_lock); | ||
497 | 502 | ||
498 | return sprintf(buf, "%d %llu\n", atomic_read(&adapter->qdio_outb_full), | 503 | return sprintf(buf, "%d %llu\n", atomic_read(&adapter->qdio_outb_full), |
499 | (unsigned long long)adapter->req_q_util); | 504 | (unsigned long long)util); |
500 | } | 505 | } |
501 | static DEVICE_ATTR(queue_full, S_IRUGO, zfcp_sysfs_adapter_q_full_show, NULL); | 506 | static DEVICE_ATTR(queue_full, S_IRUGO, zfcp_sysfs_adapter_q_full_show, NULL); |
502 | 507 | ||
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 2bc22be5f849..145ab9ba55ea 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c | |||
@@ -415,9 +415,9 @@ static void fc_exch_timeout(struct work_struct *work) | |||
415 | e_stat = ep->esb_stat; | 415 | e_stat = ep->esb_stat; |
416 | if (e_stat & ESB_ST_COMPLETE) { | 416 | if (e_stat & ESB_ST_COMPLETE) { |
417 | ep->esb_stat = e_stat & ~ESB_ST_REC_QUAL; | 417 | ep->esb_stat = e_stat & ~ESB_ST_REC_QUAL; |
418 | spin_unlock_bh(&ep->ex_lock); | ||
418 | if (e_stat & ESB_ST_REC_QUAL) | 419 | if (e_stat & ESB_ST_REC_QUAL) |
419 | fc_exch_rrq(ep); | 420 | fc_exch_rrq(ep); |
420 | spin_unlock_bh(&ep->ex_lock); | ||
421 | goto done; | 421 | goto done; |
422 | } else { | 422 | } else { |
423 | resp = ep->resp; | 423 | resp = ep->resp; |
@@ -1624,14 +1624,14 @@ static void fc_exch_rrq(struct fc_exch *ep) | |||
1624 | struct fc_lport *lp; | 1624 | struct fc_lport *lp; |
1625 | struct fc_els_rrq *rrq; | 1625 | struct fc_els_rrq *rrq; |
1626 | struct fc_frame *fp; | 1626 | struct fc_frame *fp; |
1627 | struct fc_seq *rrq_sp; | ||
1628 | u32 did; | 1627 | u32 did; |
1629 | 1628 | ||
1630 | lp = ep->lp; | 1629 | lp = ep->lp; |
1631 | 1630 | ||
1632 | fp = fc_frame_alloc(lp, sizeof(*rrq)); | 1631 | fp = fc_frame_alloc(lp, sizeof(*rrq)); |
1633 | if (!fp) | 1632 | if (!fp) |
1634 | return; | 1633 | goto retry; |
1634 | |||
1635 | rrq = fc_frame_payload_get(fp, sizeof(*rrq)); | 1635 | rrq = fc_frame_payload_get(fp, sizeof(*rrq)); |
1636 | memset(rrq, 0, sizeof(*rrq)); | 1636 | memset(rrq, 0, sizeof(*rrq)); |
1637 | rrq->rrq_cmd = ELS_RRQ; | 1637 | rrq->rrq_cmd = ELS_RRQ; |
@@ -1647,13 +1647,20 @@ static void fc_exch_rrq(struct fc_exch *ep) | |||
1647 | fc_host_port_id(lp->host), FC_TYPE_ELS, | 1647 | fc_host_port_id(lp->host), FC_TYPE_ELS, |
1648 | FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); | 1648 | FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); |
1649 | 1649 | ||
1650 | rrq_sp = fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep, | 1650 | if (fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep, lp->e_d_tov)) |
1651 | lp->e_d_tov); | 1651 | return; |
1652 | if (!rrq_sp) { | 1652 | |
1653 | ep->esb_stat |= ESB_ST_REC_QUAL; | 1653 | retry: |
1654 | fc_exch_timer_set_locked(ep, ep->r_a_tov); | 1654 | spin_lock_bh(&ep->ex_lock); |
1655 | if (ep->state & (FC_EX_RST_CLEANUP | FC_EX_DONE)) { | ||
1656 | spin_unlock_bh(&ep->ex_lock); | ||
1657 | /* drop hold for rec qual */ | ||
1658 | fc_exch_release(ep); | ||
1655 | return; | 1659 | return; |
1656 | } | 1660 | } |
1661 | ep->esb_stat |= ESB_ST_REC_QUAL; | ||
1662 | fc_exch_timer_set_locked(ep, ep->r_a_tov); | ||
1663 | spin_unlock_bh(&ep->ex_lock); | ||
1657 | } | 1664 | } |
1658 | 1665 | ||
1659 | 1666 | ||
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 716cc344c5df..a751f6230c22 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c | |||
@@ -1974,10 +1974,10 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) | |||
1974 | * good and have never sent us a successful tmf response | 1974 | * good and have never sent us a successful tmf response |
1975 | * then sent more data for the cmd. | 1975 | * then sent more data for the cmd. |
1976 | */ | 1976 | */ |
1977 | spin_lock(&session->lock); | 1977 | spin_lock_bh(&session->lock); |
1978 | fail_scsi_task(task, DID_ABORT); | 1978 | fail_scsi_task(task, DID_ABORT); |
1979 | conn->tmf_state = TMF_INITIAL; | 1979 | conn->tmf_state = TMF_INITIAL; |
1980 | spin_unlock(&session->lock); | 1980 | spin_unlock_bh(&session->lock); |
1981 | iscsi_start_tx(conn); | 1981 | iscsi_start_tx(conn); |
1982 | goto success_unlocked; | 1982 | goto success_unlocked; |
1983 | case TMF_TIMEDOUT: | 1983 | case TMF_TIMEDOUT: |
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 54fa1e42dc4d..b3381959acce 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c | |||
@@ -766,6 +766,7 @@ static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id) | |||
766 | if (!memcmp(phy->attached_sas_addr, ephy->attached_sas_addr, | 766 | if (!memcmp(phy->attached_sas_addr, ephy->attached_sas_addr, |
767 | SAS_ADDR_SIZE) && ephy->port) { | 767 | SAS_ADDR_SIZE) && ephy->port) { |
768 | sas_port_add_phy(ephy->port, phy->phy); | 768 | sas_port_add_phy(ephy->port, phy->phy); |
769 | phy->port = ephy->port; | ||
769 | phy->phy_state = PHY_DEVICE_DISCOVERED; | 770 | phy->phy_state = PHY_DEVICE_DISCOVERED; |
770 | return 0; | 771 | return 0; |
771 | } | 772 | } |
@@ -945,11 +946,21 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) | |||
945 | if (ex->ex_phy[i].phy_state == PHY_VACANT || | 946 | if (ex->ex_phy[i].phy_state == PHY_VACANT || |
946 | ex->ex_phy[i].phy_state == PHY_NOT_PRESENT) | 947 | ex->ex_phy[i].phy_state == PHY_NOT_PRESENT) |
947 | continue; | 948 | continue; |
948 | 949 | /* | |
950 | * Due to races, the phy might not get added to the | ||
951 | * wide port, so we add the phy to the wide port here. | ||
952 | */ | ||
949 | if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == | 953 | if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == |
950 | SAS_ADDR(child->sas_addr)) | 954 | SAS_ADDR(child->sas_addr)) { |
951 | ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; | 955 | ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; |
956 | res = sas_ex_join_wide_port(dev, i); | ||
957 | if (!res) | ||
958 | SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", | ||
959 | i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); | ||
960 | |||
961 | } | ||
952 | } | 962 | } |
963 | res = 0; | ||
953 | } | 964 | } |
954 | 965 | ||
955 | return res; | 966 | return res; |
@@ -1598,7 +1609,7 @@ static int sas_get_phy_attached_sas_addr(struct domain_device *dev, | |||
1598 | } | 1609 | } |
1599 | 1610 | ||
1600 | static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, | 1611 | static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, |
1601 | int from_phy) | 1612 | int from_phy, bool update) |
1602 | { | 1613 | { |
1603 | struct expander_device *ex = &dev->ex_dev; | 1614 | struct expander_device *ex = &dev->ex_dev; |
1604 | int res = 0; | 1615 | int res = 0; |
@@ -1611,7 +1622,9 @@ static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, | |||
1611 | if (res) | 1622 | if (res) |
1612 | goto out; | 1623 | goto out; |
1613 | else if (phy_change_count != ex->ex_phy[i].phy_change_count) { | 1624 | else if (phy_change_count != ex->ex_phy[i].phy_change_count) { |
1614 | ex->ex_phy[i].phy_change_count = phy_change_count; | 1625 | if (update) |
1626 | ex->ex_phy[i].phy_change_count = | ||
1627 | phy_change_count; | ||
1615 | *phy_id = i; | 1628 | *phy_id = i; |
1616 | return 0; | 1629 | return 0; |
1617 | } | 1630 | } |
@@ -1653,31 +1666,52 @@ out: | |||
1653 | kfree(rg_req); | 1666 | kfree(rg_req); |
1654 | return res; | 1667 | return res; |
1655 | } | 1668 | } |
1669 | /** | ||
1670 | * sas_find_bcast_dev - find the device issue BROADCAST(CHANGE). | ||
1671 | * @dev:domain device to be detect. | ||
1672 | * @src_dev: the device which originated BROADCAST(CHANGE). | ||
1673 | * | ||
1674 | * Add self-configuration expander suport. Suppose two expander cascading, | ||
1675 | * when the first level expander is self-configuring, hotplug the disks in | ||
1676 | * second level expander, BROADCAST(CHANGE) will not only be originated | ||
1677 | * in the second level expander, but also be originated in the first level | ||
1678 | * expander (see SAS protocol SAS 2r-14, 7.11 for detail), it is to say, | ||
1679 | * expander changed count in two level expanders will all increment at least | ||
1680 | * once, but the phy which chang count has changed is the source device which | ||
1681 | * we concerned. | ||
1682 | */ | ||
1656 | 1683 | ||
1657 | static int sas_find_bcast_dev(struct domain_device *dev, | 1684 | static int sas_find_bcast_dev(struct domain_device *dev, |
1658 | struct domain_device **src_dev) | 1685 | struct domain_device **src_dev) |
1659 | { | 1686 | { |
1660 | struct expander_device *ex = &dev->ex_dev; | 1687 | struct expander_device *ex = &dev->ex_dev; |
1661 | int ex_change_count = -1; | 1688 | int ex_change_count = -1; |
1689 | int phy_id = -1; | ||
1662 | int res; | 1690 | int res; |
1691 | struct domain_device *ch; | ||
1663 | 1692 | ||
1664 | res = sas_get_ex_change_count(dev, &ex_change_count); | 1693 | res = sas_get_ex_change_count(dev, &ex_change_count); |
1665 | if (res) | 1694 | if (res) |
1666 | goto out; | 1695 | goto out; |
1667 | if (ex_change_count != -1 && | 1696 | if (ex_change_count != -1 && ex_change_count != ex->ex_change_count) { |
1668 | ex_change_count != ex->ex_change_count) { | 1697 | /* Just detect if this expander phys phy change count changed, |
1669 | *src_dev = dev; | 1698 | * in order to determine if this expander originate BROADCAST, |
1670 | ex->ex_change_count = ex_change_count; | 1699 | * and do not update phy change count field in our structure. |
1671 | } else { | 1700 | */ |
1672 | struct domain_device *ch; | 1701 | res = sas_find_bcast_phy(dev, &phy_id, 0, false); |
1673 | 1702 | if (phy_id != -1) { | |
1674 | list_for_each_entry(ch, &ex->children, siblings) { | 1703 | *src_dev = dev; |
1675 | if (ch->dev_type == EDGE_DEV || | 1704 | ex->ex_change_count = ex_change_count; |
1676 | ch->dev_type == FANOUT_DEV) { | 1705 | SAS_DPRINTK("Expander phy change count has changed\n"); |
1677 | res = sas_find_bcast_dev(ch, src_dev); | 1706 | return res; |
1678 | if (src_dev) | 1707 | } else |
1679 | return res; | 1708 | SAS_DPRINTK("Expander phys DID NOT change\n"); |
1680 | } | 1709 | } |
1710 | list_for_each_entry(ch, &ex->children, siblings) { | ||
1711 | if (ch->dev_type == EDGE_DEV || ch->dev_type == FANOUT_DEV) { | ||
1712 | res = sas_find_bcast_dev(ch, src_dev); | ||
1713 | if (src_dev) | ||
1714 | return res; | ||
1681 | } | 1715 | } |
1682 | } | 1716 | } |
1683 | out: | 1717 | out: |
@@ -1700,24 +1734,26 @@ static void sas_unregister_ex_tree(struct domain_device *dev) | |||
1700 | } | 1734 | } |
1701 | 1735 | ||
1702 | static void sas_unregister_devs_sas_addr(struct domain_device *parent, | 1736 | static void sas_unregister_devs_sas_addr(struct domain_device *parent, |
1703 | int phy_id) | 1737 | int phy_id, bool last) |
1704 | { | 1738 | { |
1705 | struct expander_device *ex_dev = &parent->ex_dev; | 1739 | struct expander_device *ex_dev = &parent->ex_dev; |
1706 | struct ex_phy *phy = &ex_dev->ex_phy[phy_id]; | 1740 | struct ex_phy *phy = &ex_dev->ex_phy[phy_id]; |
1707 | struct domain_device *child, *n; | 1741 | struct domain_device *child, *n; |
1708 | 1742 | if (last) { | |
1709 | list_for_each_entry_safe(child, n, &ex_dev->children, siblings) { | 1743 | list_for_each_entry_safe(child, n, |
1710 | if (SAS_ADDR(child->sas_addr) == | 1744 | &ex_dev->children, siblings) { |
1711 | SAS_ADDR(phy->attached_sas_addr)) { | 1745 | if (SAS_ADDR(child->sas_addr) == |
1712 | if (child->dev_type == EDGE_DEV || | 1746 | SAS_ADDR(phy->attached_sas_addr)) { |
1713 | child->dev_type == FANOUT_DEV) | 1747 | if (child->dev_type == EDGE_DEV || |
1714 | sas_unregister_ex_tree(child); | 1748 | child->dev_type == FANOUT_DEV) |
1715 | else | 1749 | sas_unregister_ex_tree(child); |
1716 | sas_unregister_dev(child); | 1750 | else |
1717 | break; | 1751 | sas_unregister_dev(child); |
1752 | break; | ||
1753 | } | ||
1718 | } | 1754 | } |
1755 | sas_disable_routing(parent, phy->attached_sas_addr); | ||
1719 | } | 1756 | } |
1720 | sas_disable_routing(parent, phy->attached_sas_addr); | ||
1721 | memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); | 1757 | memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); |
1722 | sas_port_delete_phy(phy->port, phy->phy); | 1758 | sas_port_delete_phy(phy->port, phy->phy); |
1723 | if (phy->port->num_phys == 0) | 1759 | if (phy->port->num_phys == 0) |
@@ -1770,15 +1806,31 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) | |||
1770 | { | 1806 | { |
1771 | struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; | 1807 | struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; |
1772 | struct domain_device *child; | 1808 | struct domain_device *child; |
1773 | int res; | 1809 | bool found = false; |
1810 | int res, i; | ||
1774 | 1811 | ||
1775 | SAS_DPRINTK("ex %016llx phy%d new device attached\n", | 1812 | SAS_DPRINTK("ex %016llx phy%d new device attached\n", |
1776 | SAS_ADDR(dev->sas_addr), phy_id); | 1813 | SAS_ADDR(dev->sas_addr), phy_id); |
1777 | res = sas_ex_phy_discover(dev, phy_id); | 1814 | res = sas_ex_phy_discover(dev, phy_id); |
1778 | if (res) | 1815 | if (res) |
1779 | goto out; | 1816 | goto out; |
1817 | /* to support the wide port inserted */ | ||
1818 | for (i = 0; i < dev->ex_dev.num_phys; i++) { | ||
1819 | struct ex_phy *ex_phy_temp = &dev->ex_dev.ex_phy[i]; | ||
1820 | if (i == phy_id) | ||
1821 | continue; | ||
1822 | if (SAS_ADDR(ex_phy_temp->attached_sas_addr) == | ||
1823 | SAS_ADDR(ex_phy->attached_sas_addr)) { | ||
1824 | found = true; | ||
1825 | break; | ||
1826 | } | ||
1827 | } | ||
1828 | if (found) { | ||
1829 | sas_ex_join_wide_port(dev, phy_id); | ||
1830 | return 0; | ||
1831 | } | ||
1780 | res = sas_ex_discover_devices(dev, phy_id); | 1832 | res = sas_ex_discover_devices(dev, phy_id); |
1781 | if (res) | 1833 | if (!res) |
1782 | goto out; | 1834 | goto out; |
1783 | list_for_each_entry(child, &dev->ex_dev.children, siblings) { | 1835 | list_for_each_entry(child, &dev->ex_dev.children, siblings) { |
1784 | if (SAS_ADDR(child->sas_addr) == | 1836 | if (SAS_ADDR(child->sas_addr) == |
@@ -1793,7 +1845,7 @@ out: | |||
1793 | return res; | 1845 | return res; |
1794 | } | 1846 | } |
1795 | 1847 | ||
1796 | static int sas_rediscover_dev(struct domain_device *dev, int phy_id) | 1848 | static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) |
1797 | { | 1849 | { |
1798 | struct expander_device *ex = &dev->ex_dev; | 1850 | struct expander_device *ex = &dev->ex_dev; |
1799 | struct ex_phy *phy = &ex->ex_phy[phy_id]; | 1851 | struct ex_phy *phy = &ex->ex_phy[phy_id]; |
@@ -1804,11 +1856,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id) | |||
1804 | switch (res) { | 1856 | switch (res) { |
1805 | case SMP_RESP_NO_PHY: | 1857 | case SMP_RESP_NO_PHY: |
1806 | phy->phy_state = PHY_NOT_PRESENT; | 1858 | phy->phy_state = PHY_NOT_PRESENT; |
1807 | sas_unregister_devs_sas_addr(dev, phy_id); | 1859 | sas_unregister_devs_sas_addr(dev, phy_id, last); |
1808 | goto out; break; | 1860 | goto out; break; |
1809 | case SMP_RESP_PHY_VACANT: | 1861 | case SMP_RESP_PHY_VACANT: |
1810 | phy->phy_state = PHY_VACANT; | 1862 | phy->phy_state = PHY_VACANT; |
1811 | sas_unregister_devs_sas_addr(dev, phy_id); | 1863 | sas_unregister_devs_sas_addr(dev, phy_id, last); |
1812 | goto out; break; | 1864 | goto out; break; |
1813 | case SMP_RESP_FUNC_ACC: | 1865 | case SMP_RESP_FUNC_ACC: |
1814 | break; | 1866 | break; |
@@ -1816,7 +1868,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id) | |||
1816 | 1868 | ||
1817 | if (SAS_ADDR(attached_sas_addr) == 0) { | 1869 | if (SAS_ADDR(attached_sas_addr) == 0) { |
1818 | phy->phy_state = PHY_EMPTY; | 1870 | phy->phy_state = PHY_EMPTY; |
1819 | sas_unregister_devs_sas_addr(dev, phy_id); | 1871 | sas_unregister_devs_sas_addr(dev, phy_id, last); |
1820 | } else if (SAS_ADDR(attached_sas_addr) == | 1872 | } else if (SAS_ADDR(attached_sas_addr) == |
1821 | SAS_ADDR(phy->attached_sas_addr)) { | 1873 | SAS_ADDR(phy->attached_sas_addr)) { |
1822 | SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n", | 1874 | SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n", |
@@ -1828,12 +1880,27 @@ out: | |||
1828 | return res; | 1880 | return res; |
1829 | } | 1881 | } |
1830 | 1882 | ||
1883 | /** | ||
1884 | * sas_rediscover - revalidate the domain. | ||
1885 | * @dev:domain device to be detect. | ||
1886 | * @phy_id: the phy id will be detected. | ||
1887 | * | ||
1888 | * NOTE: this process _must_ quit (return) as soon as any connection | ||
1889 | * errors are encountered. Connection recovery is done elsewhere. | ||
1890 | * Discover process only interrogates devices in order to discover the | ||
1891 | * domain.For plugging out, we un-register the device only when it is | ||
1892 | * the last phy in the port, for other phys in this port, we just delete it | ||
1893 | * from the port.For inserting, we do discovery when it is the | ||
1894 | * first phy,for other phys in this port, we add it to the port to | ||
1895 | * forming the wide-port. | ||
1896 | */ | ||
1831 | static int sas_rediscover(struct domain_device *dev, const int phy_id) | 1897 | static int sas_rediscover(struct domain_device *dev, const int phy_id) |
1832 | { | 1898 | { |
1833 | struct expander_device *ex = &dev->ex_dev; | 1899 | struct expander_device *ex = &dev->ex_dev; |
1834 | struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; | 1900 | struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; |
1835 | int res = 0; | 1901 | int res = 0; |
1836 | int i; | 1902 | int i; |
1903 | bool last = true; /* is this the last phy of the port */ | ||
1837 | 1904 | ||
1838 | SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n", | 1905 | SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n", |
1839 | SAS_ADDR(dev->sas_addr), phy_id); | 1906 | SAS_ADDR(dev->sas_addr), phy_id); |
@@ -1848,13 +1915,13 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) | |||
1848 | SAS_ADDR(changed_phy->attached_sas_addr)) { | 1915 | SAS_ADDR(changed_phy->attached_sas_addr)) { |
1849 | SAS_DPRINTK("phy%d part of wide port with " | 1916 | SAS_DPRINTK("phy%d part of wide port with " |
1850 | "phy%d\n", phy_id, i); | 1917 | "phy%d\n", phy_id, i); |
1851 | goto out; | 1918 | last = false; |
1919 | break; | ||
1852 | } | 1920 | } |
1853 | } | 1921 | } |
1854 | res = sas_rediscover_dev(dev, phy_id); | 1922 | res = sas_rediscover_dev(dev, phy_id, last); |
1855 | } else | 1923 | } else |
1856 | res = sas_discover_new(dev, phy_id); | 1924 | res = sas_discover_new(dev, phy_id); |
1857 | out: | ||
1858 | return res; | 1925 | return res; |
1859 | } | 1926 | } |
1860 | 1927 | ||
@@ -1881,7 +1948,7 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) | |||
1881 | 1948 | ||
1882 | do { | 1949 | do { |
1883 | phy_id = -1; | 1950 | phy_id = -1; |
1884 | res = sas_find_bcast_phy(dev, &phy_id, i); | 1951 | res = sas_find_bcast_phy(dev, &phy_id, i, true); |
1885 | if (phy_id == -1) | 1952 | if (phy_id == -1) |
1886 | break; | 1953 | break; |
1887 | res = sas_rediscover(dev, phy_id); | 1954 | res = sas_rediscover(dev, phy_id); |
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index e6ac59c023f1..fe8b74c706d2 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c | |||
@@ -56,7 +56,7 @@ static void sas_form_port(struct asd_sas_phy *phy) | |||
56 | } | 56 | } |
57 | } | 57 | } |
58 | 58 | ||
59 | /* find a port */ | 59 | /* see if the phy should be part of a wide port */ |
60 | spin_lock_irqsave(&sas_ha->phy_port_lock, flags); | 60 | spin_lock_irqsave(&sas_ha->phy_port_lock, flags); |
61 | for (i = 0; i < sas_ha->num_phys; i++) { | 61 | for (i = 0; i < sas_ha->num_phys; i++) { |
62 | port = sas_ha->sas_port[i]; | 62 | port = sas_ha->sas_port[i]; |
@@ -69,12 +69,23 @@ static void sas_form_port(struct asd_sas_phy *phy) | |||
69 | SAS_DPRINTK("phy%d matched wide port%d\n", phy->id, | 69 | SAS_DPRINTK("phy%d matched wide port%d\n", phy->id, |
70 | port->id); | 70 | port->id); |
71 | break; | 71 | break; |
72 | } else if (*(u64 *) port->sas_addr == 0 && port->num_phys==0) { | ||
73 | memcpy(port->sas_addr, phy->sas_addr, SAS_ADDR_SIZE); | ||
74 | break; | ||
75 | } | 72 | } |
76 | spin_unlock(&port->phy_list_lock); | 73 | spin_unlock(&port->phy_list_lock); |
77 | } | 74 | } |
75 | /* The phy does not match any existing port, create a new one */ | ||
76 | if (i == sas_ha->num_phys) { | ||
77 | for (i = 0; i < sas_ha->num_phys; i++) { | ||
78 | port = sas_ha->sas_port[i]; | ||
79 | spin_lock(&port->phy_list_lock); | ||
80 | if (*(u64 *)port->sas_addr == 0 | ||
81 | && port->num_phys == 0) { | ||
82 | memcpy(port->sas_addr, phy->sas_addr, | ||
83 | SAS_ADDR_SIZE); | ||
84 | break; | ||
85 | } | ||
86 | spin_unlock(&port->phy_list_lock); | ||
87 | } | ||
88 | } | ||
78 | 89 | ||
79 | if (i >= sas_ha->num_phys) { | 90 | if (i >= sas_ha->num_phys) { |
80 | printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n", | 91 | printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n", |
diff --git a/drivers/scsi/qla4xxx/ql4_dbg.c b/drivers/scsi/qla4xxx/ql4_dbg.c index fcc184cd066d..cbceb0ebabf7 100644 --- a/drivers/scsi/qla4xxx/ql4_dbg.c +++ b/drivers/scsi/qla4xxx/ql4_dbg.c | |||
@@ -15,19 +15,18 @@ void qla4xxx_dump_buffer(void *b, uint32_t size) | |||
15 | uint32_t cnt; | 15 | uint32_t cnt; |
16 | uint8_t *c = b; | 16 | uint8_t *c = b; |
17 | 17 | ||
18 | printk(" 0 1 2 3 4 5 6 7 8 9 Ah Bh Ch Dh Eh " | 18 | printk(" 0 1 2 3 4 5 6 7 8 9 Ah Bh Ch Dh Eh " |
19 | "Fh\n"); | 19 | "Fh\n"); |
20 | printk("------------------------------------------------------------" | 20 | printk("------------------------------------------------------------" |
21 | "--\n"); | 21 | "--\n"); |
22 | for (cnt = 0; cnt < size; cnt++, c++) { | 22 | for (cnt = 0; cnt < size; c++) { |
23 | printk(KERN_DEBUG "%02x", *c); | 23 | printk(KERN_INFO "%02x", *c); |
24 | if (!(cnt % 16)) | 24 | if (!(++cnt % 16)) |
25 | printk(KERN_DEBUG "\n"); | 25 | printk(KERN_INFO "\n"); |
26 | 26 | ||
27 | else | 27 | else |
28 | printk(KERN_DEBUG " "); | 28 | printk(KERN_INFO " "); |
29 | } | 29 | } |
30 | if (cnt % 16) | 30 | printk(KERN_INFO "\n"); |
31 | printk(KERN_DEBUG "\n"); | ||
32 | } | 31 | } |
33 | 32 | ||
diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index b586f27c3bd4..81b5f29254e2 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h | |||
@@ -100,7 +100,6 @@ | |||
100 | #define MAX_SRBS MAX_CMDS_TO_RISC | 100 | #define MAX_SRBS MAX_CMDS_TO_RISC |
101 | #define MBOX_AEN_REG_COUNT 5 | 101 | #define MBOX_AEN_REG_COUNT 5 |
102 | #define MAX_INIT_RETRIES 5 | 102 | #define MAX_INIT_RETRIES 5 |
103 | #define IOCB_HIWAT_CUSHION 16 | ||
104 | 103 | ||
105 | /* | 104 | /* |
106 | * Buffer sizes | 105 | * Buffer sizes |
@@ -184,6 +183,11 @@ struct srb { | |||
184 | uint16_t cc_stat; | 183 | uint16_t cc_stat; |
185 | u_long r_start; /* Time we recieve a cmd from OS */ | 184 | u_long r_start; /* Time we recieve a cmd from OS */ |
186 | u_long u_start; /* Time when we handed the cmd to F/W */ | 185 | u_long u_start; /* Time when we handed the cmd to F/W */ |
186 | |||
187 | /* Used for extended sense / status continuation */ | ||
188 | uint8_t *req_sense_ptr; | ||
189 | uint16_t req_sense_len; | ||
190 | uint16_t reserved2; | ||
187 | }; | 191 | }; |
188 | 192 | ||
189 | /* | 193 | /* |
@@ -302,7 +306,6 @@ struct scsi_qla_host { | |||
302 | uint32_t tot_ddbs; | 306 | uint32_t tot_ddbs; |
303 | 307 | ||
304 | uint16_t iocb_cnt; | 308 | uint16_t iocb_cnt; |
305 | uint16_t iocb_hiwat; | ||
306 | 309 | ||
307 | /* SRB cache. */ | 310 | /* SRB cache. */ |
308 | #define SRB_MIN_REQ 128 | 311 | #define SRB_MIN_REQ 128 |
@@ -436,6 +439,8 @@ struct scsi_qla_host { | |||
436 | /* Map ddb_list entry by FW ddb index */ | 439 | /* Map ddb_list entry by FW ddb index */ |
437 | struct ddb_entry *fw_ddb_index_map[MAX_DDB_ENTRIES]; | 440 | struct ddb_entry *fw_ddb_index_map[MAX_DDB_ENTRIES]; |
438 | 441 | ||
442 | /* Saved srb for status continuation entry processing */ | ||
443 | struct srb *status_srb; | ||
439 | }; | 444 | }; |
440 | 445 | ||
441 | static inline int is_qla4010(struct scsi_qla_host *ha) | 446 | static inline int is_qla4010(struct scsi_qla_host *ha) |
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1b667a70cffa..9cd7a608df38 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h | |||
@@ -572,6 +572,7 @@ struct conn_event_log_entry { | |||
572 | *************************************************************************/ | 572 | *************************************************************************/ |
573 | #define IOCB_MAX_CDB_LEN 16 /* Bytes in a CBD */ | 573 | #define IOCB_MAX_CDB_LEN 16 /* Bytes in a CBD */ |
574 | #define IOCB_MAX_SENSEDATA_LEN 32 /* Bytes of sense data */ | 574 | #define IOCB_MAX_SENSEDATA_LEN 32 /* Bytes of sense data */ |
575 | #define IOCB_MAX_EXT_SENSEDATA_LEN 60 /* Bytes of extended sense data */ | ||
575 | 576 | ||
576 | /* IOCB header structure */ | 577 | /* IOCB header structure */ |
577 | struct qla4_header { | 578 | struct qla4_header { |
@@ -733,6 +734,12 @@ struct status_entry { | |||
733 | 734 | ||
734 | }; | 735 | }; |
735 | 736 | ||
737 | /* Status Continuation entry */ | ||
738 | struct status_cont_entry { | ||
739 | struct qla4_header hdr; /* 00-03 */ | ||
740 | uint8_t ext_sense_data[IOCB_MAX_EXT_SENSEDATA_LEN]; /* 04-63 */ | ||
741 | }; | ||
742 | |||
736 | struct passthru0 { | 743 | struct passthru0 { |
737 | struct qla4_header hdr; /* 00-03 */ | 744 | struct qla4_header hdr; /* 00-03 */ |
738 | uint32_t handle; /* 04-07 */ | 745 | uint32_t handle; /* 04-07 */ |
diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index 912a67494adf..e0c32159749c 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c | |||
@@ -10,9 +10,42 @@ | |||
10 | #include "ql4_dbg.h" | 10 | #include "ql4_dbg.h" |
11 | #include "ql4_inline.h" | 11 | #include "ql4_inline.h" |
12 | 12 | ||
13 | |||
14 | #include <scsi/scsi_tcq.h> | 13 | #include <scsi/scsi_tcq.h> |
15 | 14 | ||
15 | static int | ||
16 | qla4xxx_space_in_req_ring(struct scsi_qla_host *ha, uint16_t req_cnt) | ||
17 | { | ||
18 | uint16_t cnt; | ||
19 | |||
20 | /* Calculate number of free request entries. */ | ||
21 | if ((req_cnt + 2) >= ha->req_q_count) { | ||
22 | cnt = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); | ||
23 | if (ha->request_in < cnt) | ||
24 | ha->req_q_count = cnt - ha->request_in; | ||
25 | else | ||
26 | ha->req_q_count = REQUEST_QUEUE_DEPTH - | ||
27 | (ha->request_in - cnt); | ||
28 | } | ||
29 | |||
30 | /* Check if room for request in request ring. */ | ||
31 | if ((req_cnt + 2) < ha->req_q_count) | ||
32 | return 1; | ||
33 | else | ||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | static void qla4xxx_advance_req_ring_ptr(struct scsi_qla_host *ha) | ||
38 | { | ||
39 | /* Advance request queue pointer */ | ||
40 | if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) { | ||
41 | ha->request_in = 0; | ||
42 | ha->request_ptr = ha->request_ring; | ||
43 | } else { | ||
44 | ha->request_in++; | ||
45 | ha->request_ptr++; | ||
46 | } | ||
47 | } | ||
48 | |||
16 | /** | 49 | /** |
17 | * qla4xxx_get_req_pkt - returns a valid entry in request queue. | 50 | * qla4xxx_get_req_pkt - returns a valid entry in request queue. |
18 | * @ha: Pointer to host adapter structure. | 51 | * @ha: Pointer to host adapter structure. |
@@ -26,35 +59,18 @@ | |||
26 | static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, | 59 | static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, |
27 | struct queue_entry **queue_entry) | 60 | struct queue_entry **queue_entry) |
28 | { | 61 | { |
29 | uint16_t request_in; | 62 | uint16_t req_cnt = 1; |
30 | uint8_t status = QLA_SUCCESS; | ||
31 | |||
32 | *queue_entry = ha->request_ptr; | ||
33 | 63 | ||
34 | /* get the latest request_in and request_out index */ | 64 | if (qla4xxx_space_in_req_ring(ha, req_cnt)) { |
35 | request_in = ha->request_in; | 65 | *queue_entry = ha->request_ptr; |
36 | ha->request_out = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); | ||
37 | |||
38 | /* Advance request queue pointer and check for queue full */ | ||
39 | if (request_in == (REQUEST_QUEUE_DEPTH - 1)) { | ||
40 | request_in = 0; | ||
41 | ha->request_ptr = ha->request_ring; | ||
42 | } else { | ||
43 | request_in++; | ||
44 | ha->request_ptr++; | ||
45 | } | ||
46 | |||
47 | /* request queue is full, try again later */ | ||
48 | if ((ha->iocb_cnt + 1) >= ha->iocb_hiwat) { | ||
49 | /* restore request pointer */ | ||
50 | ha->request_ptr = *queue_entry; | ||
51 | status = QLA_ERROR; | ||
52 | } else { | ||
53 | ha->request_in = request_in; | ||
54 | memset(*queue_entry, 0, sizeof(**queue_entry)); | 66 | memset(*queue_entry, 0, sizeof(**queue_entry)); |
67 | |||
68 | qla4xxx_advance_req_ring_ptr(ha); | ||
69 | ha->req_q_count -= req_cnt; | ||
70 | return QLA_SUCCESS; | ||
55 | } | 71 | } |
56 | 72 | ||
57 | return status; | 73 | return QLA_ERROR; |
58 | } | 74 | } |
59 | 75 | ||
60 | /** | 76 | /** |
@@ -100,21 +116,14 @@ exit_send_marker: | |||
100 | return status; | 116 | return status; |
101 | } | 117 | } |
102 | 118 | ||
103 | static struct continuation_t1_entry* qla4xxx_alloc_cont_entry( | 119 | static struct continuation_t1_entry * |
104 | struct scsi_qla_host *ha) | 120 | qla4xxx_alloc_cont_entry(struct scsi_qla_host *ha) |
105 | { | 121 | { |
106 | struct continuation_t1_entry *cont_entry; | 122 | struct continuation_t1_entry *cont_entry; |
107 | 123 | ||
108 | cont_entry = (struct continuation_t1_entry *)ha->request_ptr; | 124 | cont_entry = (struct continuation_t1_entry *)ha->request_ptr; |
109 | 125 | ||
110 | /* Advance request queue pointer */ | 126 | qla4xxx_advance_req_ring_ptr(ha); |
111 | if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) { | ||
112 | ha->request_in = 0; | ||
113 | ha->request_ptr = ha->request_ring; | ||
114 | } else { | ||
115 | ha->request_in++; | ||
116 | ha->request_ptr++; | ||
117 | } | ||
118 | 127 | ||
119 | /* Load packet defaults */ | 128 | /* Load packet defaults */ |
120 | cont_entry->hdr.entryType = ET_CONTINUE; | 129 | cont_entry->hdr.entryType = ET_CONTINUE; |
@@ -197,13 +206,10 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
197 | struct scsi_cmnd *cmd = srb->cmd; | 206 | struct scsi_cmnd *cmd = srb->cmd; |
198 | struct ddb_entry *ddb_entry; | 207 | struct ddb_entry *ddb_entry; |
199 | struct command_t3_entry *cmd_entry; | 208 | struct command_t3_entry *cmd_entry; |
200 | |||
201 | int nseg; | 209 | int nseg; |
202 | uint16_t tot_dsds; | 210 | uint16_t tot_dsds; |
203 | uint16_t req_cnt; | 211 | uint16_t req_cnt; |
204 | |||
205 | unsigned long flags; | 212 | unsigned long flags; |
206 | uint16_t cnt; | ||
207 | uint32_t index; | 213 | uint32_t index; |
208 | char tag[2]; | 214 | char tag[2]; |
209 | 215 | ||
@@ -217,6 +223,19 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
217 | 223 | ||
218 | index = (uint32_t)cmd->request->tag; | 224 | index = (uint32_t)cmd->request->tag; |
219 | 225 | ||
226 | /* | ||
227 | * Check to see if adapter is online before placing request on | ||
228 | * request queue. If a reset occurs and a request is in the queue, | ||
229 | * the firmware will still attempt to process the request, retrieving | ||
230 | * garbage for pointers. | ||
231 | */ | ||
232 | if (!test_bit(AF_ONLINE, &ha->flags)) { | ||
233 | DEBUG2(printk("scsi%ld: %s: Adapter OFFLINE! " | ||
234 | "Do not issue command.\n", | ||
235 | ha->host_no, __func__)); | ||
236 | goto queuing_error; | ||
237 | } | ||
238 | |||
220 | /* Calculate the number of request entries needed. */ | 239 | /* Calculate the number of request entries needed. */ |
221 | nseg = scsi_dma_map(cmd); | 240 | nseg = scsi_dma_map(cmd); |
222 | if (nseg < 0) | 241 | if (nseg < 0) |
@@ -224,17 +243,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
224 | tot_dsds = nseg; | 243 | tot_dsds = nseg; |
225 | 244 | ||
226 | req_cnt = qla4xxx_calc_request_entries(tot_dsds); | 245 | req_cnt = qla4xxx_calc_request_entries(tot_dsds); |
227 | 246 | if (!qla4xxx_space_in_req_ring(ha, req_cnt)) | |
228 | if (ha->req_q_count < (req_cnt + 2)) { | ||
229 | cnt = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); | ||
230 | if (ha->request_in < cnt) | ||
231 | ha->req_q_count = cnt - ha->request_in; | ||
232 | else | ||
233 | ha->req_q_count = REQUEST_QUEUE_DEPTH - | ||
234 | (ha->request_in - cnt); | ||
235 | } | ||
236 | |||
237 | if (ha->req_q_count < (req_cnt + 2)) | ||
238 | goto queuing_error; | 247 | goto queuing_error; |
239 | 248 | ||
240 | /* total iocbs active */ | 249 | /* total iocbs active */ |
@@ -286,32 +295,10 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) | |||
286 | break; | 295 | break; |
287 | } | 296 | } |
288 | 297 | ||
289 | 298 | qla4xxx_advance_req_ring_ptr(ha); | |
290 | /* Advance request queue pointer */ | ||
291 | ha->request_in++; | ||
292 | if (ha->request_in == REQUEST_QUEUE_DEPTH) { | ||
293 | ha->request_in = 0; | ||
294 | ha->request_ptr = ha->request_ring; | ||
295 | } else | ||
296 | ha->request_ptr++; | ||
297 | |||
298 | |||
299 | qla4xxx_build_scsi_iocbs(srb, cmd_entry, tot_dsds); | 299 | qla4xxx_build_scsi_iocbs(srb, cmd_entry, tot_dsds); |
300 | wmb(); | 300 | wmb(); |
301 | 301 | ||
302 | /* | ||
303 | * Check to see if adapter is online before placing request on | ||
304 | * request queue. If a reset occurs and a request is in the queue, | ||
305 | * the firmware will still attempt to process the request, retrieving | ||
306 | * garbage for pointers. | ||
307 | */ | ||
308 | if (!test_bit(AF_ONLINE, &ha->flags)) { | ||
309 | DEBUG2(printk("scsi%ld: %s: Adapter OFFLINE! " | ||
310 | "Do not issue command.\n", | ||
311 | ha->host_no, __func__)); | ||
312 | goto queuing_error; | ||
313 | } | ||
314 | |||
315 | srb->cmd->host_scribble = (unsigned char *)srb; | 302 | srb->cmd->host_scribble = (unsigned char *)srb; |
316 | 303 | ||
317 | /* update counters */ | 304 | /* update counters */ |
diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 799120fcb9be..8025ee16588e 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c | |||
@@ -11,6 +11,98 @@ | |||
11 | #include "ql4_inline.h" | 11 | #include "ql4_inline.h" |
12 | 12 | ||
13 | /** | 13 | /** |
14 | * qla4xxx_copy_sense - copy sense data into cmd sense buffer | ||
15 | * @ha: Pointer to host adapter structure. | ||
16 | * @sts_entry: Pointer to status entry structure. | ||
17 | * @srb: Pointer to srb structure. | ||
18 | **/ | ||
19 | static void qla4xxx_copy_sense(struct scsi_qla_host *ha, | ||
20 | struct status_entry *sts_entry, | ||
21 | struct srb *srb) | ||
22 | { | ||
23 | struct scsi_cmnd *cmd = srb->cmd; | ||
24 | uint16_t sense_len; | ||
25 | |||
26 | memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); | ||
27 | sense_len = le16_to_cpu(sts_entry->senseDataByteCnt); | ||
28 | if (sense_len == 0) | ||
29 | return; | ||
30 | |||
31 | /* Save total available sense length, | ||
32 | * not to exceed cmd's sense buffer size */ | ||
33 | sense_len = min_t(uint16_t, sense_len, SCSI_SENSE_BUFFERSIZE); | ||
34 | srb->req_sense_ptr = cmd->sense_buffer; | ||
35 | srb->req_sense_len = sense_len; | ||
36 | |||
37 | /* Copy sense from sts_entry pkt */ | ||
38 | sense_len = min_t(uint16_t, sense_len, IOCB_MAX_SENSEDATA_LEN); | ||
39 | memcpy(cmd->sense_buffer, sts_entry->senseData, sense_len); | ||
40 | |||
41 | DEBUG2(printk(KERN_INFO "scsi%ld:%d:%d:%d: %s: sense key = %x, " | ||
42 | "ASL= %02x, ASC/ASCQ = %02x/%02x\n", ha->host_no, | ||
43 | cmd->device->channel, cmd->device->id, | ||
44 | cmd->device->lun, __func__, | ||
45 | sts_entry->senseData[2] & 0x0f, | ||
46 | sts_entry->senseData[7], | ||
47 | sts_entry->senseData[12], | ||
48 | sts_entry->senseData[13])); | ||
49 | |||
50 | DEBUG5(qla4xxx_dump_buffer(cmd->sense_buffer, sense_len)); | ||
51 | srb->flags |= SRB_GOT_SENSE; | ||
52 | |||
53 | /* Update srb, in case a sts_cont pkt follows */ | ||
54 | srb->req_sense_ptr += sense_len; | ||
55 | srb->req_sense_len -= sense_len; | ||
56 | if (srb->req_sense_len != 0) | ||
57 | ha->status_srb = srb; | ||
58 | else | ||
59 | ha->status_srb = NULL; | ||
60 | } | ||
61 | |||
62 | /** | ||
63 | * qla4xxx_status_cont_entry - Process a Status Continuations entry. | ||
64 | * @ha: SCSI driver HA context | ||
65 | * @sts_cont: Entry pointer | ||
66 | * | ||
67 | * Extended sense data. | ||
68 | */ | ||
69 | static void | ||
70 | qla4xxx_status_cont_entry(struct scsi_qla_host *ha, | ||
71 | struct status_cont_entry *sts_cont) | ||
72 | { | ||
73 | struct srb *srb = ha->status_srb; | ||
74 | struct scsi_cmnd *cmd; | ||
75 | uint8_t sense_len; | ||
76 | |||
77 | if (srb == NULL) | ||
78 | return; | ||
79 | |||
80 | cmd = srb->cmd; | ||
81 | if (cmd == NULL) { | ||
82 | DEBUG2(printk(KERN_INFO "scsi%ld: %s: Cmd already returned " | ||
83 | "back to OS srb=%p srb->state:%d\n", ha->host_no, | ||
84 | __func__, srb, srb->state)); | ||
85 | ha->status_srb = NULL; | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | /* Copy sense data. */ | ||
90 | sense_len = min_t(uint16_t, srb->req_sense_len, | ||
91 | IOCB_MAX_EXT_SENSEDATA_LEN); | ||
92 | memcpy(srb->req_sense_ptr, sts_cont->ext_sense_data, sense_len); | ||
93 | DEBUG5(qla4xxx_dump_buffer(srb->req_sense_ptr, sense_len)); | ||
94 | |||
95 | srb->req_sense_ptr += sense_len; | ||
96 | srb->req_sense_len -= sense_len; | ||
97 | |||
98 | /* Place command on done queue. */ | ||
99 | if (srb->req_sense_len == 0) { | ||
100 | qla4xxx_srb_compl(ha, srb); | ||
101 | ha->status_srb = NULL; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | /** | ||
14 | * qla4xxx_status_entry - processes status IOCBs | 106 | * qla4xxx_status_entry - processes status IOCBs |
15 | * @ha: Pointer to host adapter structure. | 107 | * @ha: Pointer to host adapter structure. |
16 | * @sts_entry: Pointer to status entry structure. | 108 | * @sts_entry: Pointer to status entry structure. |
@@ -23,7 +115,6 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
23 | struct srb *srb; | 115 | struct srb *srb; |
24 | struct ddb_entry *ddb_entry; | 116 | struct ddb_entry *ddb_entry; |
25 | uint32_t residual; | 117 | uint32_t residual; |
26 | uint16_t sensebytecnt; | ||
27 | 118 | ||
28 | srb = qla4xxx_del_from_active_array(ha, le32_to_cpu(sts_entry->handle)); | 119 | srb = qla4xxx_del_from_active_array(ha, le32_to_cpu(sts_entry->handle)); |
29 | if (!srb) { | 120 | if (!srb) { |
@@ -92,24 +183,7 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
92 | break; | 183 | break; |
93 | 184 | ||
94 | /* Copy Sense Data into sense buffer. */ | 185 | /* Copy Sense Data into sense buffer. */ |
95 | memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); | 186 | qla4xxx_copy_sense(ha, sts_entry, srb); |
96 | |||
97 | sensebytecnt = le16_to_cpu(sts_entry->senseDataByteCnt); | ||
98 | if (sensebytecnt == 0) | ||
99 | break; | ||
100 | |||
101 | memcpy(cmd->sense_buffer, sts_entry->senseData, | ||
102 | min_t(uint16_t, sensebytecnt, SCSI_SENSE_BUFFERSIZE)); | ||
103 | |||
104 | DEBUG2(printk("scsi%ld:%d:%d:%d: %s: sense key = %x, " | ||
105 | "ASC/ASCQ = %02x/%02x\n", ha->host_no, | ||
106 | cmd->device->channel, cmd->device->id, | ||
107 | cmd->device->lun, __func__, | ||
108 | sts_entry->senseData[2] & 0x0f, | ||
109 | sts_entry->senseData[12], | ||
110 | sts_entry->senseData[13])); | ||
111 | |||
112 | srb->flags |= SRB_GOT_SENSE; | ||
113 | break; | 187 | break; |
114 | 188 | ||
115 | case SCS_INCOMPLETE: | 189 | case SCS_INCOMPLETE: |
@@ -176,23 +250,7 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
176 | break; | 250 | break; |
177 | 251 | ||
178 | /* Copy Sense Data into sense buffer. */ | 252 | /* Copy Sense Data into sense buffer. */ |
179 | memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); | 253 | qla4xxx_copy_sense(ha, sts_entry, srb); |
180 | |||
181 | sensebytecnt = | ||
182 | le16_to_cpu(sts_entry->senseDataByteCnt); | ||
183 | if (sensebytecnt == 0) | ||
184 | break; | ||
185 | |||
186 | memcpy(cmd->sense_buffer, sts_entry->senseData, | ||
187 | min_t(uint16_t, sensebytecnt, SCSI_SENSE_BUFFERSIZE)); | ||
188 | |||
189 | DEBUG2(printk("scsi%ld:%d:%d:%d: %s: sense key = %x, " | ||
190 | "ASC/ASCQ = %02x/%02x\n", ha->host_no, | ||
191 | cmd->device->channel, cmd->device->id, | ||
192 | cmd->device->lun, __func__, | ||
193 | sts_entry->senseData[2] & 0x0f, | ||
194 | sts_entry->senseData[12], | ||
195 | sts_entry->senseData[13])); | ||
196 | } else { | 254 | } else { |
197 | /* | 255 | /* |
198 | * If RISC reports underrun and target does not | 256 | * If RISC reports underrun and target does not |
@@ -268,9 +326,10 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, | |||
268 | 326 | ||
269 | status_entry_exit: | 327 | status_entry_exit: |
270 | 328 | ||
271 | /* complete the request */ | 329 | /* complete the request, if not waiting for status_continuation pkt */ |
272 | srb->cc_stat = sts_entry->completionStatus; | 330 | srb->cc_stat = sts_entry->completionStatus; |
273 | qla4xxx_srb_compl(ha, srb); | 331 | if (ha->status_srb == NULL) |
332 | qla4xxx_srb_compl(ha, srb); | ||
274 | } | 333 | } |
275 | 334 | ||
276 | /** | 335 | /** |
@@ -305,10 +364,7 @@ static void qla4xxx_process_response_queue(struct scsi_qla_host * ha) | |||
305 | /* process entry */ | 364 | /* process entry */ |
306 | switch (sts_entry->hdr.entryType) { | 365 | switch (sts_entry->hdr.entryType) { |
307 | case ET_STATUS: | 366 | case ET_STATUS: |
308 | /* | 367 | /* Common status */ |
309 | * Common status - Single completion posted in single | ||
310 | * IOSB. | ||
311 | */ | ||
312 | qla4xxx_status_entry(ha, sts_entry); | 368 | qla4xxx_status_entry(ha, sts_entry); |
313 | break; | 369 | break; |
314 | 370 | ||
@@ -316,9 +372,8 @@ static void qla4xxx_process_response_queue(struct scsi_qla_host * ha) | |||
316 | break; | 372 | break; |
317 | 373 | ||
318 | case ET_STATUS_CONTINUATION: | 374 | case ET_STATUS_CONTINUATION: |
319 | /* Just throw away the status continuation entries */ | 375 | qla4xxx_status_cont_entry(ha, |
320 | DEBUG2(printk("scsi%ld: %s: Status Continuation entry " | 376 | (struct status_cont_entry *) sts_entry); |
321 | "- ignoring\n", ha->host_no, __func__)); | ||
322 | break; | 377 | break; |
323 | 378 | ||
324 | case ET_COMMAND: | 379 | case ET_COMMAND: |
diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 051b0f5e8c8e..09d6d4b76f39 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c | |||
@@ -385,16 +385,6 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) | |||
385 | mbox_sts[0])); | 385 | mbox_sts[0])); |
386 | return QLA_ERROR; | 386 | return QLA_ERROR; |
387 | } | 387 | } |
388 | |||
389 | /* High-water mark of IOCBs */ | ||
390 | ha->iocb_hiwat = mbox_sts[2]; | ||
391 | if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) | ||
392 | ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; | ||
393 | else | ||
394 | dev_info(&ha->pdev->dev, "WARNING!!! You have less than %d " | ||
395 | "firmware IOCBs available (%d).\n", | ||
396 | IOCB_HIWAT_CUSHION, ha->iocb_hiwat); | ||
397 | |||
398 | return QLA_SUCCESS; | 388 | return QLA_SUCCESS; |
399 | } | 389 | } |
400 | 390 | ||
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index ec9da6ce8489..40e3cafb3a9c 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c | |||
@@ -66,6 +66,7 @@ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess, | |||
66 | static int qla4xxx_host_get_param(struct Scsi_Host *shost, | 66 | static int qla4xxx_host_get_param(struct Scsi_Host *shost, |
67 | enum iscsi_host_param param, char *buf); | 67 | enum iscsi_host_param param, char *buf); |
68 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session); | 68 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session); |
69 | static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc); | ||
69 | 70 | ||
70 | /* | 71 | /* |
71 | * SCSI host template entry points | 72 | * SCSI host template entry points |
@@ -89,6 +90,7 @@ static struct scsi_host_template qla4xxx_driver_template = { | |||
89 | .eh_device_reset_handler = qla4xxx_eh_device_reset, | 90 | .eh_device_reset_handler = qla4xxx_eh_device_reset, |
90 | .eh_target_reset_handler = qla4xxx_eh_target_reset, | 91 | .eh_target_reset_handler = qla4xxx_eh_target_reset, |
91 | .eh_host_reset_handler = qla4xxx_eh_host_reset, | 92 | .eh_host_reset_handler = qla4xxx_eh_host_reset, |
93 | .eh_timed_out = qla4xxx_eh_cmd_timed_out, | ||
92 | 94 | ||
93 | .slave_configure = qla4xxx_slave_configure, | 95 | .slave_configure = qla4xxx_slave_configure, |
94 | .slave_alloc = qla4xxx_slave_alloc, | 96 | .slave_alloc = qla4xxx_slave_alloc, |
@@ -124,6 +126,21 @@ static struct iscsi_transport qla4xxx_iscsi_transport = { | |||
124 | 126 | ||
125 | static struct scsi_transport_template *qla4xxx_scsi_transport; | 127 | static struct scsi_transport_template *qla4xxx_scsi_transport; |
126 | 128 | ||
129 | static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc) | ||
130 | { | ||
131 | struct iscsi_cls_session *session; | ||
132 | struct ddb_entry *ddb_entry; | ||
133 | |||
134 | session = starget_to_session(scsi_target(sc->device)); | ||
135 | ddb_entry = session->dd_data; | ||
136 | |||
137 | /* if we are not logged in then the LLD is going to clean up the cmd */ | ||
138 | if (atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) | ||
139 | return BLK_EH_RESET_TIMER; | ||
140 | else | ||
141 | return BLK_EH_NOT_HANDLED; | ||
142 | } | ||
143 | |||
127 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session) | 144 | static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session) |
128 | { | 145 | { |
129 | struct ddb_entry *ddb_entry = session->dd_data; | 146 | struct ddb_entry *ddb_entry = session->dd_data; |
@@ -904,18 +921,17 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha, | |||
904 | /* Flush any pending ddb changed AENs */ | 921 | /* Flush any pending ddb changed AENs */ |
905 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); | 922 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); |
906 | 923 | ||
924 | qla4xxx_flush_active_srbs(ha); | ||
925 | |||
907 | /* Reset the firmware. If successful, function | 926 | /* Reset the firmware. If successful, function |
908 | * returns with ISP interrupts enabled. | 927 | * returns with ISP interrupts enabled. |
909 | */ | 928 | */ |
910 | if (status == QLA_SUCCESS) { | 929 | DEBUG2(printk("scsi%ld: %s - Performing soft reset..\n", |
911 | DEBUG2(printk("scsi%ld: %s - Performing soft reset..\n", | 930 | ha->host_no, __func__)); |
912 | ha->host_no, __func__)); | 931 | if (ql4xxx_lock_drvr_wait(ha) == QLA_SUCCESS) |
913 | qla4xxx_flush_active_srbs(ha); | 932 | status = qla4xxx_soft_reset(ha); |
914 | if (ql4xxx_lock_drvr_wait(ha) == QLA_SUCCESS) | 933 | else |
915 | status = qla4xxx_soft_reset(ha); | 934 | status = QLA_ERROR; |
916 | else | ||
917 | status = QLA_ERROR; | ||
918 | } | ||
919 | 935 | ||
920 | /* Flush any pending ddb changed AENs */ | 936 | /* Flush any pending ddb changed AENs */ |
921 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); | 937 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); |
@@ -1527,11 +1543,9 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) | |||
1527 | { | 1543 | { |
1528 | struct scsi_qla_host *ha = to_qla_host(cmd->device->host); | 1544 | struct scsi_qla_host *ha = to_qla_host(cmd->device->host); |
1529 | struct ddb_entry *ddb_entry = cmd->device->hostdata; | 1545 | struct ddb_entry *ddb_entry = cmd->device->hostdata; |
1530 | struct srb *sp; | ||
1531 | int ret = FAILED, stat; | 1546 | int ret = FAILED, stat; |
1532 | 1547 | ||
1533 | sp = (struct srb *) cmd->SCp.ptr; | 1548 | if (!ddb_entry) |
1534 | if (!sp || !ddb_entry) | ||
1535 | return ret; | 1549 | return ret; |
1536 | 1550 | ||
1537 | dev_info(&ha->pdev->dev, | 1551 | dev_info(&ha->pdev->dev, |
@@ -1644,7 +1658,7 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd) | |||
1644 | ha = (struct scsi_qla_host *) cmd->device->host->hostdata; | 1658 | ha = (struct scsi_qla_host *) cmd->device->host->hostdata; |
1645 | 1659 | ||
1646 | dev_info(&ha->pdev->dev, | 1660 | dev_info(&ha->pdev->dev, |
1647 | "scsi(%ld:%d:%d:%d): ADAPTER RESET ISSUED.\n", ha->host_no, | 1661 | "scsi(%ld:%d:%d:%d): HOST RESET ISSUED.\n", ha->host_no, |
1648 | cmd->device->channel, cmd->device->id, cmd->device->lun); | 1662 | cmd->device->channel, cmd->device->id, cmd->device->lun); |
1649 | 1663 | ||
1650 | if (qla4xxx_wait_for_hba_online(ha) != QLA_SUCCESS) { | 1664 | if (qla4xxx_wait_for_hba_online(ha) != QLA_SUCCESS) { |
diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index ab984cb89cea..6980cb279c81 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h | |||
@@ -5,5 +5,5 @@ | |||
5 | * See LICENSE.qla4xxx for copyright and licensing details. | 5 | * See LICENSE.qla4xxx for copyright and licensing details. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define QLA4XXX_DRIVER_VERSION "5.01.00-k8" | 8 | #define QLA4XXX_DRIVER_VERSION "5.01.00-k9" |
9 | 9 | ||
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 783e33c65eb7..b47240ca4b19 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c | |||
@@ -990,7 +990,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, | |||
990 | struct iscsi_uevent *ev; | 990 | struct iscsi_uevent *ev; |
991 | int len = NLMSG_SPACE(sizeof(*ev) + data_size); | 991 | int len = NLMSG_SPACE(sizeof(*ev) + data_size); |
992 | 992 | ||
993 | skb = alloc_skb(len, GFP_NOIO); | 993 | skb = alloc_skb(len, GFP_ATOMIC); |
994 | if (!skb) { | 994 | if (!skb) { |
995 | printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); | 995 | printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); |
996 | return -ENOMEM; | 996 | return -ENOMEM; |
@@ -1012,7 +1012,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, | |||
1012 | 1012 | ||
1013 | memcpy((char *)ev + sizeof(*ev), data, data_size); | 1013 | memcpy((char *)ev + sizeof(*ev), data, data_size); |
1014 | 1014 | ||
1015 | return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_NOIO); | 1015 | return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_ATOMIC); |
1016 | } | 1016 | } |
1017 | EXPORT_SYMBOL_GPL(iscsi_offload_mesg); | 1017 | EXPORT_SYMBOL_GPL(iscsi_offload_mesg); |
1018 | 1018 | ||
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5616cd780ff3..b7b9fec67a98 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
@@ -1840,6 +1840,18 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) | |||
1840 | kfree(buffer); | 1840 | kfree(buffer); |
1841 | } | 1841 | } |
1842 | 1842 | ||
1843 | static int sd_try_extended_inquiry(struct scsi_device *sdp) | ||
1844 | { | ||
1845 | /* | ||
1846 | * Although VPD inquiries can go to SCSI-2 type devices, | ||
1847 | * some USB ones crash on receiving them, and the pages | ||
1848 | * we currently ask for are for SPC-3 and beyond | ||
1849 | */ | ||
1850 | if (sdp->scsi_level > SCSI_SPC_2) | ||
1851 | return 1; | ||
1852 | return 0; | ||
1853 | } | ||
1854 | |||
1843 | /** | 1855 | /** |
1844 | * sd_revalidate_disk - called the first time a new disk is seen, | 1856 | * sd_revalidate_disk - called the first time a new disk is seen, |
1845 | * performs disk spin up, read_capacity, etc. | 1857 | * performs disk spin up, read_capacity, etc. |
@@ -1877,8 +1889,12 @@ static int sd_revalidate_disk(struct gendisk *disk) | |||
1877 | */ | 1889 | */ |
1878 | if (sdkp->media_present) { | 1890 | if (sdkp->media_present) { |
1879 | sd_read_capacity(sdkp, buffer); | 1891 | sd_read_capacity(sdkp, buffer); |
1880 | sd_read_block_limits(sdkp); | 1892 | |
1881 | sd_read_block_characteristics(sdkp); | 1893 | if (sd_try_extended_inquiry(sdp)) { |
1894 | sd_read_block_limits(sdkp); | ||
1895 | sd_read_block_characteristics(sdkp); | ||
1896 | } | ||
1897 | |||
1882 | sd_read_write_protect_flag(sdkp, buffer); | 1898 | sd_read_write_protect_flag(sdkp, buffer); |
1883 | sd_read_cache_type(sdkp, buffer); | 1899 | sd_read_cache_type(sdkp, buffer); |
1884 | sd_read_app_tag_own(sdkp, buffer); | 1900 | sd_read_app_tag_own(sdkp, buffer); |
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/serial/cpm_uart/cpm_uart_cpm2.c index 141c0a3333ad..a9802e76b5fa 100644 --- a/drivers/serial/cpm_uart/cpm_uart_cpm2.c +++ b/drivers/serial/cpm_uart/cpm_uart_cpm2.c | |||
@@ -132,7 +132,7 @@ int cpm_uart_allocbuf(struct uart_cpm_port *pinfo, unsigned int is_con) | |||
132 | memsz = L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize) + | 132 | memsz = L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize) + |
133 | L1_CACHE_ALIGN(pinfo->tx_nrfifos * pinfo->tx_fifosize); | 133 | L1_CACHE_ALIGN(pinfo->tx_nrfifos * pinfo->tx_fifosize); |
134 | if (is_con) { | 134 | if (is_con) { |
135 | mem_addr = alloc_bootmem(memsz); | 135 | mem_addr = kzalloc(memsz, GFP_NOWAIT); |
136 | dma_addr = virt_to_bus(mem_addr); | 136 | dma_addr = virt_to_bus(mem_addr); |
137 | } | 137 | } |
138 | else | 138 | else |
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index ef7870f5ea08..857b3668b3ba 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c | |||
@@ -957,9 +957,14 @@ static int __devinit sticore_pci_init(struct pci_dev *pd, | |||
957 | #ifdef CONFIG_PCI | 957 | #ifdef CONFIG_PCI |
958 | unsigned long fb_base, rom_base; | 958 | unsigned long fb_base, rom_base; |
959 | unsigned int fb_len, rom_len; | 959 | unsigned int fb_len, rom_len; |
960 | int err; | ||
960 | struct sti_struct *sti; | 961 | struct sti_struct *sti; |
961 | 962 | ||
962 | pci_enable_device(pd); | 963 | err = pci_enable_device(pd); |
964 | if (err < 0) { | ||
965 | dev_err(&pd->dev, "Cannot enable PCI device\n"); | ||
966 | return err; | ||
967 | } | ||
963 | 968 | ||
964 | fb_base = pci_resource_start(pd, 0); | 969 | fb_base = pci_resource_start(pd, 0); |
965 | fb_len = pci_resource_len(pd, 0); | 970 | fb_len = pci_resource_len(pd, 0); |
@@ -1048,7 +1053,7 @@ static void __devinit sti_init_roms(void) | |||
1048 | 1053 | ||
1049 | /* Register drivers for native & PCI cards */ | 1054 | /* Register drivers for native & PCI cards */ |
1050 | register_parisc_driver(&pa_sti_driver); | 1055 | register_parisc_driver(&pa_sti_driver); |
1051 | pci_register_driver(&pci_sti_driver); | 1056 | WARN_ON(pci_register_driver(&pci_sti_driver)); |
1052 | 1057 | ||
1053 | /* if we didn't find the given default sti, take the first one */ | 1058 | /* if we didn't find the given default sti, take the first one */ |
1054 | if (!default_sti) | 1059 | if (!default_sti) |
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index bcec78ffc765..248e00ec4dc1 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c | |||
@@ -52,8 +52,10 @@ struct virtio_pci_device | |||
52 | char (*msix_names)[256]; | 52 | char (*msix_names)[256]; |
53 | /* Number of available vectors */ | 53 | /* Number of available vectors */ |
54 | unsigned msix_vectors; | 54 | unsigned msix_vectors; |
55 | /* Vectors allocated */ | 55 | /* Vectors allocated, excluding per-vq vectors if any */ |
56 | unsigned msix_used_vectors; | 56 | unsigned msix_used_vectors; |
57 | /* Whether we have vector per vq */ | ||
58 | bool per_vq_vectors; | ||
57 | }; | 59 | }; |
58 | 60 | ||
59 | /* Constants for MSI-X */ | 61 | /* Constants for MSI-X */ |
@@ -258,7 +260,6 @@ static void vp_free_vectors(struct virtio_device *vdev) | |||
258 | 260 | ||
259 | for (i = 0; i < vp_dev->msix_used_vectors; ++i) | 261 | for (i = 0; i < vp_dev->msix_used_vectors; ++i) |
260 | free_irq(vp_dev->msix_entries[i].vector, vp_dev); | 262 | free_irq(vp_dev->msix_entries[i].vector, vp_dev); |
261 | vp_dev->msix_used_vectors = 0; | ||
262 | 263 | ||
263 | if (vp_dev->msix_enabled) { | 264 | if (vp_dev->msix_enabled) { |
264 | /* Disable the vector used for configuration */ | 265 | /* Disable the vector used for configuration */ |
@@ -267,80 +268,77 @@ static void vp_free_vectors(struct virtio_device *vdev) | |||
267 | /* Flush the write out to device */ | 268 | /* Flush the write out to device */ |
268 | ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); | 269 | ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); |
269 | 270 | ||
270 | vp_dev->msix_enabled = 0; | ||
271 | pci_disable_msix(vp_dev->pci_dev); | 271 | pci_disable_msix(vp_dev->pci_dev); |
272 | vp_dev->msix_enabled = 0; | ||
273 | vp_dev->msix_vectors = 0; | ||
272 | } | 274 | } |
273 | } | ||
274 | 275 | ||
275 | static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, | 276 | vp_dev->msix_used_vectors = 0; |
276 | int *options, int noptions) | 277 | kfree(vp_dev->msix_names); |
277 | { | 278 | vp_dev->msix_names = NULL; |
278 | int i; | 279 | kfree(vp_dev->msix_entries); |
279 | for (i = 0; i < noptions; ++i) | 280 | vp_dev->msix_entries = NULL; |
280 | if (!pci_enable_msix(dev, entries, options[i])) | ||
281 | return options[i]; | ||
282 | return -EBUSY; | ||
283 | } | 281 | } |
284 | 282 | ||
285 | static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) | 283 | static int vp_request_vectors(struct virtio_device *vdev, int nvectors, |
284 | bool per_vq_vectors) | ||
286 | { | 285 | { |
287 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | 286 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
288 | const char *name = dev_name(&vp_dev->vdev.dev); | 287 | const char *name = dev_name(&vp_dev->vdev.dev); |
289 | unsigned i, v; | 288 | unsigned i, v; |
290 | int err = -ENOMEM; | 289 | int err = -ENOMEM; |
291 | /* We want at most one vector per queue and one for config changes. | 290 | |
292 | * Fallback to separate vectors for config and a shared for queues. | 291 | if (!nvectors) { |
293 | * Finally fall back to regular interrupts. */ | 292 | /* Can't allocate MSI-X vectors, use regular interrupt */ |
294 | int options[] = { max_vqs + 1, 2 }; | 293 | vp_dev->msix_vectors = 0; |
295 | int nvectors = max(options[0], options[1]); | 294 | err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, |
295 | IRQF_SHARED, name, vp_dev); | ||
296 | if (err) | ||
297 | return err; | ||
298 | vp_dev->intx_enabled = 1; | ||
299 | return 0; | ||
300 | } | ||
296 | 301 | ||
297 | vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, | 302 | vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, |
298 | GFP_KERNEL); | 303 | GFP_KERNEL); |
299 | if (!vp_dev->msix_entries) | 304 | if (!vp_dev->msix_entries) |
300 | goto error_entries; | 305 | goto error; |
301 | vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, | 306 | vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, |
302 | GFP_KERNEL); | 307 | GFP_KERNEL); |
303 | if (!vp_dev->msix_names) | 308 | if (!vp_dev->msix_names) |
304 | goto error_names; | 309 | goto error; |
305 | 310 | ||
306 | for (i = 0; i < nvectors; ++i) | 311 | for (i = 0; i < nvectors; ++i) |
307 | vp_dev->msix_entries[i].entry = i; | 312 | vp_dev->msix_entries[i].entry = i; |
308 | 313 | ||
309 | err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, | 314 | err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); |
310 | options, ARRAY_SIZE(options)); | 315 | if (err > 0) |
311 | if (err < 0) { | 316 | err = -ENOSPC; |
312 | /* Can't allocate enough MSI-X vectors, use regular interrupt */ | 317 | if (err) |
313 | vp_dev->msix_vectors = 0; | 318 | goto error; |
314 | err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, | 319 | vp_dev->msix_vectors = nvectors; |
315 | IRQF_SHARED, name, vp_dev); | 320 | vp_dev->msix_enabled = 1; |
316 | if (err) | 321 | |
317 | goto error_irq; | 322 | /* Set the vector used for configuration */ |
318 | vp_dev->intx_enabled = 1; | 323 | v = vp_dev->msix_used_vectors; |
319 | } else { | 324 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, |
320 | vp_dev->msix_vectors = err; | 325 | "%s-config", name); |
321 | vp_dev->msix_enabled = 1; | 326 | err = request_irq(vp_dev->msix_entries[v].vector, |
322 | 327 | vp_config_changed, 0, vp_dev->msix_names[v], | |
323 | /* Set the vector used for configuration */ | 328 | vp_dev); |
324 | v = vp_dev->msix_used_vectors; | 329 | if (err) |
325 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, | 330 | goto error; |
326 | "%s-config", name); | 331 | ++vp_dev->msix_used_vectors; |
327 | err = request_irq(vp_dev->msix_entries[v].vector, | 332 | |
328 | vp_config_changed, 0, vp_dev->msix_names[v], | 333 | iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); |
329 | vp_dev); | 334 | /* Verify we had enough resources to assign the vector */ |
330 | if (err) | 335 | v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); |
331 | goto error_irq; | 336 | if (v == VIRTIO_MSI_NO_VECTOR) { |
332 | ++vp_dev->msix_used_vectors; | 337 | err = -EBUSY; |
333 | 338 | goto error; | |
334 | iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); | ||
335 | /* Verify we had enough resources to assign the vector */ | ||
336 | v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); | ||
337 | if (v == VIRTIO_MSI_NO_VECTOR) { | ||
338 | err = -EBUSY; | ||
339 | goto error_irq; | ||
340 | } | ||
341 | } | 339 | } |
342 | 340 | ||
343 | if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) { | 341 | if (!per_vq_vectors) { |
344 | /* Shared vector for all VQs */ | 342 | /* Shared vector for all VQs */ |
345 | v = vp_dev->msix_used_vectors; | 343 | v = vp_dev->msix_used_vectors; |
346 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, | 344 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, |
@@ -349,28 +347,25 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) | |||
349 | vp_vring_interrupt, 0, vp_dev->msix_names[v], | 347 | vp_vring_interrupt, 0, vp_dev->msix_names[v], |
350 | vp_dev); | 348 | vp_dev); |
351 | if (err) | 349 | if (err) |
352 | goto error_irq; | 350 | goto error; |
353 | ++vp_dev->msix_used_vectors; | 351 | ++vp_dev->msix_used_vectors; |
354 | } | 352 | } |
355 | return 0; | 353 | return 0; |
356 | error_irq: | 354 | error: |
357 | vp_free_vectors(vdev); | 355 | vp_free_vectors(vdev); |
358 | kfree(vp_dev->msix_names); | ||
359 | error_names: | ||
360 | kfree(vp_dev->msix_entries); | ||
361 | error_entries: | ||
362 | return err; | 356 | return err; |
363 | } | 357 | } |
364 | 358 | ||
365 | static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | 359 | static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, |
366 | void (*callback)(struct virtqueue *vq), | 360 | void (*callback)(struct virtqueue *vq), |
367 | const char *name) | 361 | const char *name, |
362 | u16 vector) | ||
368 | { | 363 | { |
369 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | 364 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
370 | struct virtio_pci_vq_info *info; | 365 | struct virtio_pci_vq_info *info; |
371 | struct virtqueue *vq; | 366 | struct virtqueue *vq; |
372 | unsigned long flags, size; | 367 | unsigned long flags, size; |
373 | u16 num, vector; | 368 | u16 num; |
374 | int err; | 369 | int err; |
375 | 370 | ||
376 | /* Select the queue we're interested in */ | 371 | /* Select the queue we're interested in */ |
@@ -389,7 +384,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | |||
389 | 384 | ||
390 | info->queue_index = index; | 385 | info->queue_index = index; |
391 | info->num = num; | 386 | info->num = num; |
392 | info->vector = VIRTIO_MSI_NO_VECTOR; | 387 | info->vector = vector; |
393 | 388 | ||
394 | size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); | 389 | size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); |
395 | info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); | 390 | info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); |
@@ -413,22 +408,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | |||
413 | vq->priv = info; | 408 | vq->priv = info; |
414 | info->vq = vq; | 409 | info->vq = vq; |
415 | 410 | ||
416 | /* allocate per-vq vector if available and necessary */ | 411 | if (vector != VIRTIO_MSI_NO_VECTOR) { |
417 | if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) { | ||
418 | vector = vp_dev->msix_used_vectors; | ||
419 | snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, | ||
420 | "%s-%s", dev_name(&vp_dev->vdev.dev), name); | ||
421 | err = request_irq(vp_dev->msix_entries[vector].vector, | ||
422 | vring_interrupt, 0, | ||
423 | vp_dev->msix_names[vector], vq); | ||
424 | if (err) | ||
425 | goto out_request_irq; | ||
426 | info->vector = vector; | ||
427 | ++vp_dev->msix_used_vectors; | ||
428 | } else | ||
429 | vector = VP_MSIX_VQ_VECTOR; | ||
430 | |||
431 | if (callback && vp_dev->msix_enabled) { | ||
432 | iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); | 412 | iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); |
433 | vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); | 413 | vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); |
434 | if (vector == VIRTIO_MSI_NO_VECTOR) { | 414 | if (vector == VIRTIO_MSI_NO_VECTOR) { |
@@ -444,11 +424,6 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, | |||
444 | return vq; | 424 | return vq; |
445 | 425 | ||
446 | out_assign: | 426 | out_assign: |
447 | if (info->vector != VIRTIO_MSI_NO_VECTOR) { | ||
448 | free_irq(vp_dev->msix_entries[info->vector].vector, vq); | ||
449 | --vp_dev->msix_used_vectors; | ||
450 | } | ||
451 | out_request_irq: | ||
452 | vring_del_virtqueue(vq); | 427 | vring_del_virtqueue(vq); |
453 | out_activate_queue: | 428 | out_activate_queue: |
454 | iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); | 429 | iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); |
@@ -462,12 +437,13 @@ static void vp_del_vq(struct virtqueue *vq) | |||
462 | { | 437 | { |
463 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); | 438 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); |
464 | struct virtio_pci_vq_info *info = vq->priv; | 439 | struct virtio_pci_vq_info *info = vq->priv; |
465 | unsigned long size; | 440 | unsigned long flags, size; |
466 | 441 | ||
467 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); | 442 | spin_lock_irqsave(&vp_dev->lock, flags); |
443 | list_del(&info->node); | ||
444 | spin_unlock_irqrestore(&vp_dev->lock, flags); | ||
468 | 445 | ||
469 | if (info->vector != VIRTIO_MSI_NO_VECTOR) | 446 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); |
470 | free_irq(vp_dev->msix_entries[info->vector].vector, vq); | ||
471 | 447 | ||
472 | if (vp_dev->msix_enabled) { | 448 | if (vp_dev->msix_enabled) { |
473 | iowrite16(VIRTIO_MSI_NO_VECTOR, | 449 | iowrite16(VIRTIO_MSI_NO_VECTOR, |
@@ -489,36 +465,62 @@ static void vp_del_vq(struct virtqueue *vq) | |||
489 | /* the config->del_vqs() implementation */ | 465 | /* the config->del_vqs() implementation */ |
490 | static void vp_del_vqs(struct virtio_device *vdev) | 466 | static void vp_del_vqs(struct virtio_device *vdev) |
491 | { | 467 | { |
468 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | ||
492 | struct virtqueue *vq, *n; | 469 | struct virtqueue *vq, *n; |
470 | struct virtio_pci_vq_info *info; | ||
493 | 471 | ||
494 | list_for_each_entry_safe(vq, n, &vdev->vqs, list) | 472 | list_for_each_entry_safe(vq, n, &vdev->vqs, list) { |
473 | info = vq->priv; | ||
474 | if (vp_dev->per_vq_vectors) | ||
475 | free_irq(vp_dev->msix_entries[info->vector].vector, vq); | ||
495 | vp_del_vq(vq); | 476 | vp_del_vq(vq); |
477 | } | ||
478 | vp_dev->per_vq_vectors = false; | ||
496 | 479 | ||
497 | vp_free_vectors(vdev); | 480 | vp_free_vectors(vdev); |
498 | } | 481 | } |
499 | 482 | ||
500 | /* the config->find_vqs() implementation */ | 483 | static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, |
501 | static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, | 484 | struct virtqueue *vqs[], |
502 | struct virtqueue *vqs[], | 485 | vq_callback_t *callbacks[], |
503 | vq_callback_t *callbacks[], | 486 | const char *names[], |
504 | const char *names[]) | 487 | int nvectors, |
488 | bool per_vq_vectors) | ||
505 | { | 489 | { |
506 | int vectors = 0; | 490 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
507 | int i, err; | 491 | u16 vector; |
508 | 492 | int i, err, allocated_vectors; | |
509 | /* How many vectors would we like? */ | ||
510 | for (i = 0; i < nvqs; ++i) | ||
511 | if (callbacks[i]) | ||
512 | ++vectors; | ||
513 | 493 | ||
514 | err = vp_request_vectors(vdev, vectors); | 494 | err = vp_request_vectors(vdev, nvectors, per_vq_vectors); |
515 | if (err) | 495 | if (err) |
516 | goto error_request; | 496 | goto error_request; |
517 | 497 | ||
498 | vp_dev->per_vq_vectors = per_vq_vectors; | ||
499 | allocated_vectors = vp_dev->msix_used_vectors; | ||
518 | for (i = 0; i < nvqs; ++i) { | 500 | for (i = 0; i < nvqs; ++i) { |
519 | vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]); | 501 | if (!callbacks[i] || !vp_dev->msix_enabled) |
520 | if (IS_ERR(vqs[i])) | 502 | vector = VIRTIO_MSI_NO_VECTOR; |
503 | else if (vp_dev->per_vq_vectors) | ||
504 | vector = allocated_vectors++; | ||
505 | else | ||
506 | vector = VP_MSIX_VQ_VECTOR; | ||
507 | vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i], vector); | ||
508 | if (IS_ERR(vqs[i])) { | ||
509 | err = PTR_ERR(vqs[i]); | ||
521 | goto error_find; | 510 | goto error_find; |
511 | } | ||
512 | /* allocate per-vq irq if available and necessary */ | ||
513 | if (vp_dev->per_vq_vectors && vector != VIRTIO_MSI_NO_VECTOR) { | ||
514 | snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, | ||
515 | "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); | ||
516 | err = request_irq(vp_dev->msix_entries[vector].vector, | ||
517 | vring_interrupt, 0, | ||
518 | vp_dev->msix_names[vector], vqs[i]); | ||
519 | if (err) { | ||
520 | vp_del_vq(vqs[i]); | ||
521 | goto error_find; | ||
522 | } | ||
523 | } | ||
522 | } | 524 | } |
523 | return 0; | 525 | return 0; |
524 | 526 | ||
@@ -526,7 +528,37 @@ error_find: | |||
526 | vp_del_vqs(vdev); | 528 | vp_del_vqs(vdev); |
527 | 529 | ||
528 | error_request: | 530 | error_request: |
529 | return PTR_ERR(vqs[i]); | 531 | return err; |
532 | } | ||
533 | |||
534 | /* the config->find_vqs() implementation */ | ||
535 | static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, | ||
536 | struct virtqueue *vqs[], | ||
537 | vq_callback_t *callbacks[], | ||
538 | const char *names[]) | ||
539 | { | ||
540 | int vectors = 0; | ||
541 | int i, uninitialized_var(err); | ||
542 | |||
543 | /* How many vectors would we like? */ | ||
544 | for (i = 0; i < nvqs; ++i) | ||
545 | if (callbacks[i]) | ||
546 | ++vectors; | ||
547 | |||
548 | /* We want at most one vector per queue and one for config changes. */ | ||
549 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, | ||
550 | vectors + 1, true); | ||
551 | if (!err) | ||
552 | return 0; | ||
553 | /* Fallback to separate vectors for config and a shared for queues. */ | ||
554 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, | ||
555 | 2, false); | ||
556 | if (!err) | ||
557 | return 0; | ||
558 | /* Finally fall back to regular interrupts. */ | ||
559 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, | ||
560 | 0, false); | ||
561 | return err; | ||
530 | } | 562 | } |
531 | 563 | ||
532 | static struct virtio_config_ops virtio_pci_config_ops = { | 564 | static struct virtio_config_ops virtio_pci_config_ops = { |
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c index fecb307d28e9..aec7cefdef21 100644 --- a/drivers/watchdog/coh901327_wdt.c +++ b/drivers/watchdog/coh901327_wdt.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
20 | #include <linux/clk.h> | 20 | #include <linux/clk.h> |
21 | #include <linux/delay.h> | ||
21 | 22 | ||
22 | #define DRV_NAME "WDOG COH 901 327" | 23 | #define DRV_NAME "WDOG COH 901 327" |
23 | 24 | ||
@@ -92,6 +93,8 @@ static struct clk *clk; | |||
92 | static void coh901327_enable(u16 timeout) | 93 | static void coh901327_enable(u16 timeout) |
93 | { | 94 | { |
94 | u16 val; | 95 | u16 val; |
96 | unsigned long freq; | ||
97 | unsigned long delay_ns; | ||
95 | 98 | ||
96 | clk_enable(clk); | 99 | clk_enable(clk); |
97 | /* Restart timer if it is disabled */ | 100 | /* Restart timer if it is disabled */ |
@@ -102,6 +105,14 @@ static void coh901327_enable(u16 timeout) | |||
102 | /* Acknowledge any pending interrupt so it doesn't just fire off */ | 105 | /* Acknowledge any pending interrupt so it doesn't just fire off */ |
103 | writew(U300_WDOG_IER_WILL_BARK_IRQ_ACK_ENABLE, | 106 | writew(U300_WDOG_IER_WILL_BARK_IRQ_ACK_ENABLE, |
104 | virtbase + U300_WDOG_IER); | 107 | virtbase + U300_WDOG_IER); |
108 | /* | ||
109 | * The interrupt is cleared in the 32 kHz clock domain. | ||
110 | * Wait 3 32 kHz cycles for it to take effect | ||
111 | */ | ||
112 | freq = clk_get_rate(clk); | ||
113 | delay_ns = (1000000000 + freq - 1) / freq; /* Freq to ns and round up */ | ||
114 | delay_ns = 3 * delay_ns; /* Wait 3 cycles */ | ||
115 | ndelay(delay_ns); | ||
105 | /* Enable the watchdog interrupt */ | 116 | /* Enable the watchdog interrupt */ |
106 | writew(U300_WDOG_IMR_WILL_BARK_IRQ_ENABLE, virtbase + U300_WDOG_IMR); | 117 | writew(U300_WDOG_IMR_WILL_BARK_IRQ_ENABLE, virtbase + U300_WDOG_IMR); |
107 | /* Activate the watchdog timer */ | 118 | /* Activate the watchdog timer */ |