diff options
author | Kristian Høgsberg <krh@redhat.com> | 2007-02-06 14:49:34 -0500 |
---|---|---|
committer | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2007-03-09 16:02:52 -0500 |
commit | 7f37c426c68d34aa221ce29262c1eba39034ac97 (patch) | |
tree | 782294e973ef409c13c758890d6c4cb309eac2ce | |
parent | 1d3d52c5367e0ca352aff6d6986533787bcf36d0 (diff) |
firewire: Introduce a retry mechanism for reconnects and logins.
Sometimes we reconnect too soon, sometimes too late. Adding a retry
mechanism make the reconnect step much more robust.
Signed-off-by: Kristian Høgsberg <krh@redhat.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
-rw-r--r-- | drivers/firewire/fw-sbp2.c | 180 |
1 files changed, 109 insertions, 71 deletions
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index c196333e1de9..432c35aefc12 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c | |||
@@ -62,7 +62,8 @@ struct sbp2_device { | |||
62 | /* Timer for flushing ORBs. */ | 62 | /* Timer for flushing ORBs. */ |
63 | struct timer_list orb_timer; | 63 | struct timer_list orb_timer; |
64 | 64 | ||
65 | struct work_struct work; | 65 | int retries; |
66 | struct delayed_work work; | ||
66 | struct Scsi_Host *scsi_host; | 67 | struct Scsi_Host *scsi_host; |
67 | }; | 68 | }; |
68 | 69 | ||
@@ -511,6 +512,75 @@ static int sbp2_agent_reset(struct fw_unit *unit) | |||
511 | 512 | ||
512 | static int add_scsi_devices(struct fw_unit *unit); | 513 | static int add_scsi_devices(struct fw_unit *unit); |
513 | static void remove_scsi_devices(struct fw_unit *unit); | 514 | static void remove_scsi_devices(struct fw_unit *unit); |
515 | static void sbp2_reconnect(struct work_struct *work); | ||
516 | |||
517 | static void sbp2_login(struct work_struct *work) | ||
518 | { | ||
519 | struct sbp2_device *sd = | ||
520 | container_of(work, struct sbp2_device, work.work); | ||
521 | struct fw_unit *unit = sd->unit; | ||
522 | struct fw_device *device = fw_device(unit->device.parent); | ||
523 | struct sbp2_login_response response; | ||
524 | int generation, node_id, local_node_id, lun, retval; | ||
525 | |||
526 | /* FIXME: Make this work for multi-lun devices. */ | ||
527 | lun = 0; | ||
528 | |||
529 | generation = device->card->generation; | ||
530 | node_id = device->node->node_id; | ||
531 | local_node_id = device->card->local_node->node_id; | ||
532 | |||
533 | if (sbp2_send_management_orb(unit, node_id, generation, | ||
534 | SBP2_LOGIN_REQUEST, lun, &response) < 0) { | ||
535 | if (sd->retries++ < 5) { | ||
536 | fw_error("login attempt %d for %s failed, " | ||
537 | "rescheduling\n", | ||
538 | sd->retries, unit->device.bus_id); | ||
539 | schedule_delayed_work(&sd->work, DIV_ROUND_UP(HZ, 5)); | ||
540 | } else { | ||
541 | fw_error("failed to login to %s\n", | ||
542 | unit->device.bus_id); | ||
543 | remove_scsi_devices(unit); | ||
544 | } | ||
545 | return; | ||
546 | } | ||
547 | |||
548 | sd->generation = generation; | ||
549 | sd->node_id = node_id; | ||
550 | sd->address_high = local_node_id << 16; | ||
551 | |||
552 | /* Get command block agent offset and login id. */ | ||
553 | sd->command_block_agent_address = | ||
554 | ((u64) response.command_block_agent.high << 32) | | ||
555 | response.command_block_agent.low; | ||
556 | sd->login_id = login_response_get_login_id(response); | ||
557 | |||
558 | fw_notify("logged in to sbp2 unit %s\n", unit->device.bus_id); | ||
559 | fw_notify(" - management_agent_address: 0x%012llx\n", | ||
560 | (unsigned long long) sd->management_agent_address); | ||
561 | fw_notify(" - command_block_agent_address: 0x%012llx\n", | ||
562 | (unsigned long long) sd->command_block_agent_address); | ||
563 | fw_notify(" - status write address: 0x%012llx\n", | ||
564 | (unsigned long long) sd->address_handler.offset); | ||
565 | |||
566 | #if 0 | ||
567 | /* FIXME: The linux1394 sbp2 does this last step. */ | ||
568 | sbp2_set_busy_timeout(scsi_id); | ||
569 | #endif | ||
570 | |||
571 | INIT_DELAYED_WORK(&sd->work, sbp2_reconnect); | ||
572 | sbp2_agent_reset(unit); | ||
573 | |||
574 | retval = add_scsi_devices(unit); | ||
575 | if (retval < 0) { | ||
576 | sbp2_send_management_orb(unit, sd->node_id, sd->generation, | ||
577 | SBP2_LOGOUT_REQUEST, sd->login_id, | ||
578 | NULL); | ||
579 | /* Set this back to sbp2_login so we fall back and | ||
580 | * retry login on bus reset. */ | ||
581 | INIT_DELAYED_WORK(&sd->work, sbp2_login); | ||
582 | } | ||
583 | } | ||
514 | 584 | ||
515 | static int sbp2_probe(struct device *dev) | 585 | static int sbp2_probe(struct device *dev) |
516 | { | 586 | { |
@@ -518,9 +588,7 @@ static int sbp2_probe(struct device *dev) | |||
518 | struct fw_device *device = fw_device(unit->device.parent); | 588 | struct fw_device *device = fw_device(unit->device.parent); |
519 | struct sbp2_device *sd; | 589 | struct sbp2_device *sd; |
520 | struct fw_csr_iterator ci; | 590 | struct fw_csr_iterator ci; |
521 | int i, key, value, lun, retval; | 591 | int i, key, value; |
522 | int node_id, generation, local_node_id; | ||
523 | struct sbp2_login_response response; | ||
524 | u32 model, firmware_revision; | 592 | u32 model, firmware_revision; |
525 | 593 | ||
526 | sd = kzalloc(sizeof *sd, GFP_KERNEL); | 594 | sd = kzalloc(sizeof *sd, GFP_KERNEL); |
@@ -586,58 +654,10 @@ static int sbp2_probe(struct device *dev) | |||
586 | unit->device.bus_id, | 654 | unit->device.bus_id, |
587 | sd->workarounds, firmware_revision, model); | 655 | sd->workarounds, firmware_revision, model); |
588 | 656 | ||
589 | /* FIXME: Make this work for multi-lun devices. */ | 657 | /* We schedule work to do the login so we can easily |
590 | lun = 0; | 658 | * reschedule retries. */ |
591 | 659 | INIT_DELAYED_WORK(&sd->work, sbp2_login); | |
592 | generation = device->card->generation; | 660 | schedule_delayed_work(&sd->work, 0); |
593 | node_id = device->node->node_id; | ||
594 | local_node_id = device->card->local_node->node_id; | ||
595 | |||
596 | /* FIXME: We should probably do this from a keventd callback | ||
597 | * and handle retries by rescheduling the work. */ | ||
598 | if (sbp2_send_management_orb(unit, node_id, generation, | ||
599 | SBP2_LOGIN_REQUEST, lun, &response) < 0) { | ||
600 | fw_core_remove_address_handler(&sd->address_handler); | ||
601 | del_timer_sync(&sd->orb_timer); | ||
602 | kfree(sd); | ||
603 | return -EBUSY; | ||
604 | } | ||
605 | |||
606 | sd->generation = generation; | ||
607 | sd->node_id = node_id; | ||
608 | sd->address_high = local_node_id << 16; | ||
609 | |||
610 | /* Get command block agent offset and login id. */ | ||
611 | sd->command_block_agent_address = | ||
612 | ((u64) response.command_block_agent.high << 32) | | ||
613 | response.command_block_agent.low; | ||
614 | sd->login_id = login_response_get_login_id(response); | ||
615 | |||
616 | fw_notify("logged in to sbp2 unit %s\n", unit->device.bus_id); | ||
617 | fw_notify(" - management_agent_address: 0x%012llx\n", | ||
618 | (unsigned long long) sd->management_agent_address); | ||
619 | fw_notify(" - command_block_agent_address: 0x%012llx\n", | ||
620 | (unsigned long long) sd->command_block_agent_address); | ||
621 | fw_notify(" - status write address: 0x%012llx\n", | ||
622 | (unsigned long long) sd->address_handler.offset); | ||
623 | |||
624 | #if 0 | ||
625 | /* FIXME: The linux1394 sbp2 does this last step. */ | ||
626 | sbp2_set_busy_timeout(scsi_id); | ||
627 | #endif | ||
628 | |||
629 | sbp2_agent_reset(unit); | ||
630 | |||
631 | retval = add_scsi_devices(unit); | ||
632 | if (retval < 0) { | ||
633 | sbp2_send_management_orb(unit, sd->node_id, sd->generation, | ||
634 | SBP2_LOGOUT_REQUEST, sd->login_id, | ||
635 | NULL); | ||
636 | fw_core_remove_address_handler(&sd->address_handler); | ||
637 | del_timer_sync(&sd->orb_timer); | ||
638 | kfree(sd); | ||
639 | return retval; | ||
640 | } | ||
641 | 661 | ||
642 | return 0; | 662 | return 0; |
643 | } | 663 | } |
@@ -663,28 +683,41 @@ static int sbp2_remove(struct device *dev) | |||
663 | 683 | ||
664 | static void sbp2_reconnect(struct work_struct *work) | 684 | static void sbp2_reconnect(struct work_struct *work) |
665 | { | 685 | { |
666 | struct sbp2_device *sd = container_of(work, struct sbp2_device, work); | 686 | struct sbp2_device *sd = |
687 | container_of(work, struct sbp2_device, work.work); | ||
667 | struct fw_unit *unit = sd->unit; | 688 | struct fw_unit *unit = sd->unit; |
668 | struct fw_device *device = fw_device(unit->device.parent); | 689 | struct fw_device *device = fw_device(unit->device.parent); |
669 | int generation, node_id, local_node_id; | 690 | int generation, node_id, local_node_id; |
670 | 691 | ||
671 | fw_notify("in sbp2_reconnect, reconnecting to unit %s\n", | ||
672 | unit->device.bus_id); | ||
673 | |||
674 | generation = device->card->generation; | 692 | generation = device->card->generation; |
675 | node_id = device->node->node_id; | 693 | node_id = device->node->node_id; |
676 | local_node_id = device->card->local_node->node_id; | 694 | local_node_id = device->card->local_node->node_id; |
677 | 695 | ||
678 | sbp2_send_management_orb(unit, node_id, generation, | 696 | if (sbp2_send_management_orb(unit, node_id, generation, |
679 | SBP2_RECONNECT_REQUEST, sd->login_id, NULL); | 697 | SBP2_RECONNECT_REQUEST, |
680 | 698 | sd->login_id, NULL) < 0) { | |
681 | /* FIXME: handle reconnect failures. */ | 699 | if (sd->retries++ < 5) { |
682 | 700 | fw_error("reconnect attempt %d for %s failed, " | |
683 | sbp2_cancel_orbs(unit); | 701 | "rescheduling\n", |
702 | sd->retries, unit->device.bus_id); | ||
703 | } else { | ||
704 | fw_error("failed to reconnect to %s\n", | ||
705 | unit->device.bus_id); | ||
706 | /* Fall back and try to log in again. */ | ||
707 | sd->retries = 0; | ||
708 | INIT_DELAYED_WORK(&sd->work, sbp2_login); | ||
709 | } | ||
710 | schedule_delayed_work(&sd->work, DIV_ROUND_UP(HZ, 5)); | ||
711 | return; | ||
712 | } | ||
684 | 713 | ||
685 | sd->generation = generation; | 714 | sd->generation = generation; |
686 | sd->node_id = node_id; | 715 | sd->node_id = node_id; |
687 | sd->address_high = local_node_id << 16; | 716 | sd->address_high = local_node_id << 16; |
717 | |||
718 | fw_notify("reconnected to unit %s\n", unit->device.bus_id); | ||
719 | sbp2_agent_reset(unit); | ||
720 | sbp2_cancel_orbs(unit); | ||
688 | } | 721 | } |
689 | 722 | ||
690 | static void sbp2_update(struct fw_unit *unit) | 723 | static void sbp2_update(struct fw_unit *unit) |
@@ -692,10 +725,9 @@ static void sbp2_update(struct fw_unit *unit) | |||
692 | struct fw_device *device = fw_device(unit->device.parent); | 725 | struct fw_device *device = fw_device(unit->device.parent); |
693 | struct sbp2_device *sd = unit->device.driver_data; | 726 | struct sbp2_device *sd = unit->device.driver_data; |
694 | 727 | ||
728 | sd->retries = 0; | ||
695 | fw_device_enable_phys_dma(device); | 729 | fw_device_enable_phys_dma(device); |
696 | 730 | schedule_delayed_work(&sd->work, 0); | |
697 | INIT_WORK(&sd->work, sbp2_reconnect); | ||
698 | schedule_work(&sd->work); | ||
699 | } | 731 | } |
700 | 732 | ||
701 | #define SBP2_UNIT_SPEC_ID_ENTRY 0x0000609e | 733 | #define SBP2_UNIT_SPEC_ID_ENTRY 0x0000609e |
@@ -1056,6 +1088,9 @@ static int add_scsi_devices(struct fw_unit *unit) | |||
1056 | struct sbp2_device *sd = unit->device.driver_data; | 1088 | struct sbp2_device *sd = unit->device.driver_data; |
1057 | int retval, lun; | 1089 | int retval, lun; |
1058 | 1090 | ||
1091 | if (sd->scsi_host != NULL) | ||
1092 | return 0; | ||
1093 | |||
1059 | sd->scsi_host = scsi_host_alloc(&scsi_driver_template, | 1094 | sd->scsi_host = scsi_host_alloc(&scsi_driver_template, |
1060 | sizeof(unsigned long)); | 1095 | sizeof(unsigned long)); |
1061 | if (sd->scsi_host == NULL) { | 1096 | if (sd->scsi_host == NULL) { |
@@ -1088,8 +1123,11 @@ static void remove_scsi_devices(struct fw_unit *unit) | |||
1088 | { | 1123 | { |
1089 | struct sbp2_device *sd = unit->device.driver_data; | 1124 | struct sbp2_device *sd = unit->device.driver_data; |
1090 | 1125 | ||
1091 | scsi_remove_host(sd->scsi_host); | 1126 | if (sd->scsi_host != NULL) { |
1092 | scsi_host_put(sd->scsi_host); | 1127 | scsi_remove_host(sd->scsi_host); |
1128 | scsi_host_put(sd->scsi_host); | ||
1129 | } | ||
1130 | sd->scsi_host = NULL; | ||
1093 | } | 1131 | } |
1094 | 1132 | ||
1095 | MODULE_AUTHOR("Kristian Hoegsberg <krh@bitplanet.net>"); | 1133 | MODULE_AUTHOR("Kristian Hoegsberg <krh@bitplanet.net>"); |