aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Vosburgh <fubar@us.ibm.com>2008-05-18 00:10:13 -0400
committerJeff Garzik <jgarzik@redhat.com>2008-05-22 06:34:28 -0400
commitb2220cad583c9b63e085476df448fa2aff5ea906 (patch)
tree73d2322a9e5edd2ef7f2e97f523162b57f3e8ebb
parent7893b2491a2d5f716540ac5643d78d37a7f6628b (diff)
bonding: refactor ARP active-backup monitor
Refactor ARP monitor for active-backup mode. The motivation for this is to take care of locking issues in a clear manner (particularly to correctly handle RTNL vs. the bonding locks). Currently, the a-b ARP monitor does not hold RTNL at all, but future changes will require RTNL during ARP monitor failovers. Rather than using conditional locking, this patch instead breaks up the ARP monitor into three discrete steps: inspection, commit changes, and probe. The inspection phase marks slaves that require link state changes. The commit phase is only called if inspection detects that changes are needed, and is called with RTNL. Lastly, the probe phase issues the ARP probes that the inspection phase uses to determine link state. Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
-rw-r--r--drivers/net/bonding/bond_main.c427
-rw-r--r--drivers/net/bonding/bonding.h6
2 files changed, 248 insertions, 185 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index fa3c2101fe75..51e0f2de42c6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1051,6 +1051,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1051 } 1051 }
1052 1052
1053 if (new_active) { 1053 if (new_active) {
1054 new_active->jiffies = jiffies;
1055
1054 if (new_active->link == BOND_LINK_BACK) { 1056 if (new_active->link == BOND_LINK_BACK) {
1055 if (USES_PRIMARY(bond->params.mode)) { 1057 if (USES_PRIMARY(bond->params.mode)) {
1056 printk(KERN_INFO DRV_NAME 1058 printk(KERN_INFO DRV_NAME
@@ -1062,7 +1064,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1062 1064
1063 new_active->delay = 0; 1065 new_active->delay = 0;
1064 new_active->link = BOND_LINK_UP; 1066 new_active->link = BOND_LINK_UP;
1065 new_active->jiffies = jiffies;
1066 1067
1067 if (bond->params.mode == BOND_MODE_8023AD) { 1068 if (bond->params.mode == BOND_MODE_8023AD) {
1068 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1069 bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
@@ -2795,243 +2796,299 @@ out:
2795} 2796}
2796 2797
2797/* 2798/*
2798 * When using arp monitoring in active-backup mode, this function is 2799 * Called to inspect slaves for active-backup mode ARP monitor link state
2799 * called to determine if any backup slaves have went down or a new 2800 * changes. Sets new_link in slaves to specify what action should take
2800 * current slave needs to be found. 2801 * place for the slave. Returns 0 if no changes are found, >0 if changes
2801 * The backup slaves never generate traffic, they are considered up by merely 2802 * to link states must be committed.
2802 * receiving traffic. If the current slave goes down, each backup slave will 2803 *
2803 * be given the opportunity to tx/rx an arp before being taken down - this 2804 * Called with bond->lock held for read.
2804 * prevents all slaves from being taken down due to the current slave not
2805 * sending any traffic for the backups to receive. The arps are not necessarily
2806 * necessary, any tx and rx traffic will keep the current slave up. While any
2807 * rx traffic will keep the backup slaves up, the current slave is responsible
2808 * for generating traffic to keep them up regardless of any other traffic they
2809 * may have received.
2810 * see loadbalance_arp_monitor for arp monitoring in load balancing mode
2811 */ 2805 */
2812void bond_activebackup_arp_mon(struct work_struct *work) 2806static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
2813{ 2807{
2814 struct bonding *bond = container_of(work, struct bonding,
2815 arp_work.work);
2816 struct slave *slave; 2808 struct slave *slave;
2817 int delta_in_ticks; 2809 int i, commit = 0;
2818 int i;
2819 2810
2820 read_lock(&bond->lock); 2811 bond_for_each_slave(bond, slave, i) {
2812 slave->new_link = BOND_LINK_NOCHANGE;
2821 2813
2822 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 2814 if (slave->link != BOND_LINK_UP) {
2815 if (time_before_eq(jiffies, slave_last_rx(bond, slave) +
2816 delta_in_ticks)) {
2817 slave->new_link = BOND_LINK_UP;
2818 commit++;
2819 }
2823 2820
2824 if (bond->kill_timers) { 2821 continue;
2825 goto out; 2822 }
2826 }
2827 2823
2828 if (bond->slave_cnt == 0) { 2824 /*
2829 goto re_arm; 2825 * Give slaves 2*delta after being enslaved or made
2826 * active. This avoids bouncing, as the last receive
2827 * times need a full ARP monitor cycle to be updated.
2828 */
2829 if (!time_after_eq(jiffies, slave->jiffies +
2830 2 * delta_in_ticks))
2831 continue;
2832
2833 /*
2834 * Backup slave is down if:
2835 * - No current_arp_slave AND
2836 * - more than 3*delta since last receive AND
2837 * - the bond has an IP address
2838 *
2839 * Note: a non-null current_arp_slave indicates
2840 * the curr_active_slave went down and we are
2841 * searching for a new one; under this condition
2842 * we only take the curr_active_slave down - this
2843 * gives each slave a chance to tx/rx traffic
2844 * before being taken out
2845 */
2846 if (slave->state == BOND_STATE_BACKUP &&
2847 !bond->current_arp_slave &&
2848 time_after(jiffies, slave_last_rx(bond, slave) +
2849 3 * delta_in_ticks)) {
2850 slave->new_link = BOND_LINK_DOWN;
2851 commit++;
2852 }
2853
2854 /*
2855 * Active slave is down if:
2856 * - more than 2*delta since transmitting OR
2857 * - (more than 2*delta since receive AND
2858 * the bond has an IP address)
2859 */
2860 if ((slave->state == BOND_STATE_ACTIVE) &&
2861 (time_after_eq(jiffies, slave->dev->trans_start +
2862 2 * delta_in_ticks) ||
2863 (time_after_eq(jiffies, slave_last_rx(bond, slave)
2864 + 2 * delta_in_ticks)))) {
2865 slave->new_link = BOND_LINK_DOWN;
2866 commit++;
2867 }
2830 } 2868 }
2831 2869
2832 /* determine if any slave has come up or any backup slave has 2870 read_lock(&bond->curr_slave_lock);
2833 * gone down 2871
2834 * TODO: what about up/down delay in arp mode? it wasn't here before 2872 /*
2835 * so it can wait 2873 * Trigger a commit if the primary option setting has changed.
2836 */ 2874 */
2837 bond_for_each_slave(bond, slave, i) { 2875 if (bond->primary_slave &&
2838 if (slave->link != BOND_LINK_UP) { 2876 (bond->primary_slave != bond->curr_active_slave) &&
2839 if (time_before_eq(jiffies, 2877 (bond->primary_slave->link == BOND_LINK_UP))
2840 slave_last_rx(bond, slave) + delta_in_ticks)) { 2878 commit++;
2841 2879
2842 slave->link = BOND_LINK_UP; 2880 read_unlock(&bond->curr_slave_lock);
2843 2881
2844 write_lock_bh(&bond->curr_slave_lock); 2882 return commit;
2883}
2845 2884
2846 if ((!bond->curr_active_slave) && 2885/*
2847 time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) { 2886 * Called to commit link state changes noted by inspection step of
2848 bond_change_active_slave(bond, slave); 2887 * active-backup mode ARP monitor.
2849 bond->current_arp_slave = NULL; 2888 *
2850 } else if (bond->curr_active_slave != slave) { 2889 * Called with RTNL and bond->lock for read.
2851 /* this slave has just come up but we 2890 */
2852 * already have a current slave; this 2891static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
2853 * can also happen if bond_enslave adds 2892{
2854 * a new slave that is up while we are 2893 struct slave *slave;
2855 * searching for a new slave 2894 int i;
2856 */
2857 bond_set_slave_inactive_flags(slave);
2858 bond->current_arp_slave = NULL;
2859 }
2860 2895
2861 bond_set_carrier(bond); 2896 bond_for_each_slave(bond, slave, i) {
2897 switch (slave->new_link) {
2898 case BOND_LINK_NOCHANGE:
2899 continue;
2862 2900
2863 if (slave == bond->curr_active_slave) { 2901 case BOND_LINK_UP:
2864 printk(KERN_INFO DRV_NAME 2902 write_lock_bh(&bond->curr_slave_lock);
2865 ": %s: %s is up and now the "
2866 "active interface\n",
2867 bond->dev->name,
2868 slave->dev->name);
2869 netif_carrier_on(bond->dev);
2870 } else {
2871 printk(KERN_INFO DRV_NAME
2872 ": %s: backup interface %s is "
2873 "now up\n",
2874 bond->dev->name,
2875 slave->dev->name);
2876 }
2877 2903
2878 write_unlock_bh(&bond->curr_slave_lock); 2904 if (!bond->curr_active_slave &&
2879 } 2905 time_before_eq(jiffies, slave->dev->trans_start +
2880 } else { 2906 delta_in_ticks)) {
2881 read_lock(&bond->curr_slave_lock); 2907 slave->link = BOND_LINK_UP;
2908 bond_change_active_slave(bond, slave);
2909 bond->current_arp_slave = NULL;
2882 2910
2883 if ((slave != bond->curr_active_slave) && 2911 printk(KERN_INFO DRV_NAME
2884 (!bond->current_arp_slave) && 2912 ": %s: %s is up and now the "
2885 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks))) { 2913 "active interface\n",
2886 /* a backup slave has gone down; three times 2914 bond->dev->name, slave->dev->name);
2887 * the delta allows the current slave to be 2915
2888 * taken out before the backup slave. 2916 } else if (bond->curr_active_slave != slave) {
2889 * note: a non-null current_arp_slave indicates 2917 /* this slave has just come up but we
2890 * the curr_active_slave went down and we are 2918 * already have a current slave; this can
2891 * searching for a new one; under this 2919 * also happen if bond_enslave adds a new
2892 * condition we only take the curr_active_slave 2920 * slave that is up while we are searching
2893 * down - this gives each slave a chance to 2921 * for a new slave
2894 * tx/rx traffic before being taken out
2895 */ 2922 */
2923 slave->link = BOND_LINK_UP;
2924 bond_set_slave_inactive_flags(slave);
2925 bond->current_arp_slave = NULL;
2926
2927 printk(KERN_INFO DRV_NAME
2928 ": %s: backup interface %s is now up\n",
2929 bond->dev->name, slave->dev->name);
2930 }
2896 2931
2897 read_unlock(&bond->curr_slave_lock); 2932 write_unlock_bh(&bond->curr_slave_lock);
2898 2933
2899 slave->link = BOND_LINK_DOWN; 2934 break;
2900 2935
2901 if (slave->link_failure_count < UINT_MAX) { 2936 case BOND_LINK_DOWN:
2902 slave->link_failure_count++; 2937 if (slave->link_failure_count < UINT_MAX)
2903 } 2938 slave->link_failure_count++;
2939
2940 slave->link = BOND_LINK_DOWN;
2941
2942 if (slave == bond->curr_active_slave) {
2943 printk(KERN_INFO DRV_NAME
2944 ": %s: link status down for active "
2945 "interface %s, disabling it\n",
2946 bond->dev->name, slave->dev->name);
2904 2947
2905 bond_set_slave_inactive_flags(slave); 2948 bond_set_slave_inactive_flags(slave);
2906 2949
2950 write_lock_bh(&bond->curr_slave_lock);
2951
2952 bond_select_active_slave(bond);
2953 if (bond->curr_active_slave)
2954 bond->curr_active_slave->jiffies =
2955 jiffies;
2956
2957 write_unlock_bh(&bond->curr_slave_lock);
2958
2959 bond->current_arp_slave = NULL;
2960
2961 } else if (slave->state == BOND_STATE_BACKUP) {
2907 printk(KERN_INFO DRV_NAME 2962 printk(KERN_INFO DRV_NAME
2908 ": %s: backup interface %s is now down\n", 2963 ": %s: backup interface %s is now down\n",
2909 bond->dev->name, 2964 bond->dev->name, slave->dev->name);
2910 slave->dev->name); 2965
2911 } else { 2966 bond_set_slave_inactive_flags(slave);
2912 read_unlock(&bond->curr_slave_lock);
2913 } 2967 }
2968 break;
2969
2970 default:
2971 printk(KERN_ERR DRV_NAME
2972 ": %s: impossible: new_link %d on slave %s\n",
2973 bond->dev->name, slave->new_link,
2974 slave->dev->name);
2914 } 2975 }
2915 } 2976 }
2916 2977
2917 read_lock(&bond->curr_slave_lock); 2978 /*
2918 slave = bond->curr_active_slave; 2979 * No race with changes to primary via sysfs, as we hold rtnl.
2919 read_unlock(&bond->curr_slave_lock); 2980 */
2920 2981 if (bond->primary_slave &&
2921 if (slave) { 2982 (bond->primary_slave != bond->curr_active_slave) &&
2922 /* if we have sent traffic in the past 2*arp_intervals but 2983 (bond->primary_slave->link == BOND_LINK_UP)) {
2923 * haven't xmit and rx traffic in that time interval, select 2984 write_lock_bh(&bond->curr_slave_lock);
2924 * a different slave. slave->jiffies is only updated when 2985 bond_change_active_slave(bond, bond->primary_slave);
2925 * a slave first becomes the curr_active_slave - not necessarily 2986 write_unlock_bh(&bond->curr_slave_lock);
2926 * after every arp; this ensures the slave has a full 2*delta 2987 }
2927 * before being taken out. if a primary is being used, check
2928 * if it is up and needs to take over as the curr_active_slave
2929 */
2930 if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
2931 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks))) &&
2932 time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) {
2933 2988
2934 slave->link = BOND_LINK_DOWN; 2989 bond_set_carrier(bond);
2990}
2935 2991
2936 if (slave->link_failure_count < UINT_MAX) { 2992/*
2937 slave->link_failure_count++; 2993 * Send ARP probes for active-backup mode ARP monitor.
2938 } 2994 *
2995 * Called with bond->lock held for read.
2996 */
2997static void bond_ab_arp_probe(struct bonding *bond)
2998{
2999 struct slave *slave;
3000 int i;
2939 3001
2940 printk(KERN_INFO DRV_NAME 3002 read_lock(&bond->curr_slave_lock);
2941 ": %s: link status down for active interface "
2942 "%s, disabling it\n",
2943 bond->dev->name,
2944 slave->dev->name);
2945 3003
2946 write_lock_bh(&bond->curr_slave_lock); 3004 if (bond->current_arp_slave && bond->curr_active_slave)
3005 printk("PROBE: c_arp %s && cas %s BAD\n",
3006 bond->current_arp_slave->dev->name,
3007 bond->curr_active_slave->dev->name);
2947 3008
2948 bond_select_active_slave(bond); 3009 if (bond->curr_active_slave) {
2949 slave = bond->curr_active_slave; 3010 bond_arp_send_all(bond, bond->curr_active_slave);
3011 read_unlock(&bond->curr_slave_lock);
3012 return;
3013 }
2950 3014
2951 write_unlock_bh(&bond->curr_slave_lock); 3015 read_unlock(&bond->curr_slave_lock);
2952 3016
2953 bond->current_arp_slave = slave; 3017 /* if we don't have a curr_active_slave, search for the next available
3018 * backup slave from the current_arp_slave and make it the candidate
3019 * for becoming the curr_active_slave
3020 */
2954 3021
2955 if (slave) { 3022 if (!bond->current_arp_slave) {
2956 slave->jiffies = jiffies; 3023 bond->current_arp_slave = bond->first_slave;
2957 } 3024 if (!bond->current_arp_slave)
2958 } else if ((bond->primary_slave) && 3025 return;
2959 (bond->primary_slave != slave) && 3026 }
2960 (bond->primary_slave->link == BOND_LINK_UP)) {
2961 /* at this point, slave is the curr_active_slave */
2962 printk(KERN_INFO DRV_NAME
2963 ": %s: changing from interface %s to primary "
2964 "interface %s\n",
2965 bond->dev->name,
2966 slave->dev->name,
2967 bond->primary_slave->dev->name);
2968 3027
2969 /* primary is up so switch to it */ 3028 bond_set_slave_inactive_flags(bond->current_arp_slave);
2970 write_lock_bh(&bond->curr_slave_lock);
2971 bond_change_active_slave(bond, bond->primary_slave);
2972 write_unlock_bh(&bond->curr_slave_lock);
2973 3029
2974 slave = bond->primary_slave; 3030 /* search for next candidate */
3031 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) {
3032 if (IS_UP(slave->dev)) {
3033 slave->link = BOND_LINK_BACK;
3034 bond_set_slave_active_flags(slave);
3035 bond_arp_send_all(bond, slave);
2975 slave->jiffies = jiffies; 3036 slave->jiffies = jiffies;
2976 } else { 3037 bond->current_arp_slave = slave;
2977 bond->current_arp_slave = NULL; 3038 break;
2978 } 3039 }
2979 3040
2980 /* the current slave must tx an arp to ensure backup slaves 3041 /* if the link state is up at this point, we
2981 * rx traffic 3042 * mark it down - this can happen if we have
3043 * simultaneous link failures and
3044 * reselect_active_interface doesn't make this
3045 * one the current slave so it is still marked
3046 * up when it is actually down
2982 */ 3047 */
2983 if (slave && IS_UP(slave->dev)) 3048 if (slave->link == BOND_LINK_UP) {
2984 bond_arp_send_all(bond, slave); 3049 slave->link = BOND_LINK_DOWN;
2985 } 3050 if (slave->link_failure_count < UINT_MAX)
3051 slave->link_failure_count++;
2986 3052
2987 /* if we don't have a curr_active_slave, search for the next available 3053 bond_set_slave_inactive_flags(slave);
2988 * backup slave from the current_arp_slave and make it the candidate 3054
2989 * for becoming the curr_active_slave 3055 printk(KERN_INFO DRV_NAME
2990 */ 3056 ": %s: backup interface %s is now down.\n",
2991 if (!slave) { 3057 bond->dev->name, slave->dev->name);
2992 if (!bond->current_arp_slave) {
2993 bond->current_arp_slave = bond->first_slave;
2994 } 3058 }
3059 }
3060}
2995 3061
2996 if (bond->current_arp_slave) { 3062void bond_activebackup_arp_mon(struct work_struct *work)
2997 bond_set_slave_inactive_flags(bond->current_arp_slave); 3063{
3064 struct bonding *bond = container_of(work, struct bonding,
3065 arp_work.work);
3066 int delta_in_ticks;
2998 3067
2999 /* search for next candidate */ 3068 read_lock(&bond->lock);
3000 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) {
3001 if (IS_UP(slave->dev)) {
3002 slave->link = BOND_LINK_BACK;
3003 bond_set_slave_active_flags(slave);
3004 bond_arp_send_all(bond, slave);
3005 slave->jiffies = jiffies;
3006 bond->current_arp_slave = slave;
3007 break;
3008 }
3009 3069
3010 /* if the link state is up at this point, we 3070 if (bond->kill_timers)
3011 * mark it down - this can happen if we have 3071 goto out;
3012 * simultaneous link failures and
3013 * reselect_active_interface doesn't make this
3014 * one the current slave so it is still marked
3015 * up when it is actually down
3016 */
3017 if (slave->link == BOND_LINK_UP) {
3018 slave->link = BOND_LINK_DOWN;
3019 if (slave->link_failure_count < UINT_MAX) {
3020 slave->link_failure_count++;
3021 }
3022 3072
3023 bond_set_slave_inactive_flags(slave); 3073 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
3024 3074
3025 printk(KERN_INFO DRV_NAME 3075 if (bond->slave_cnt == 0)
3026 ": %s: backup interface %s is " 3076 goto re_arm;
3027 "now down.\n", 3077
3028 bond->dev->name, 3078 if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
3029 slave->dev->name); 3079 read_unlock(&bond->lock);
3030 } 3080 rtnl_lock();
3031 } 3081 read_lock(&bond->lock);
3032 } 3082
3083 bond_ab_arp_commit(bond, delta_in_ticks);
3084
3085 read_unlock(&bond->lock);
3086 rtnl_unlock();
3087 read_lock(&bond->lock);
3033 } 3088 }
3034 3089
3090 bond_ab_arp_probe(bond);
3091
3035re_arm: 3092re_arm:
3036 if (bond->params.arp_interval) { 3093 if (bond->params.arp_interval) {
3037 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 3094 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 46a2ed507b33..8766b1213690 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -158,6 +158,7 @@ struct slave {
158 unsigned long jiffies; 158 unsigned long jiffies;
159 unsigned long last_arp_rx; 159 unsigned long last_arp_rx;
160 s8 link; /* one of BOND_LINK_XXXX */ 160 s8 link; /* one of BOND_LINK_XXXX */
161 s8 new_link;
161 s8 state; /* one of BOND_STATE_XXXX */ 162 s8 state; /* one of BOND_STATE_XXXX */
162 u32 original_flags; 163 u32 original_flags;
163 u32 original_mtu; 164 u32 original_mtu;
@@ -170,6 +171,11 @@ struct slave {
170}; 171};
171 172
172/* 173/*
174 * Link pseudo-state only used internally by monitors
175 */
176#define BOND_LINK_NOCHANGE -1
177
178/*
173 * Here are the locking policies for the two bonding locks: 179 * Here are the locking policies for the two bonding locks:
174 * 180 *
175 * 1) Get bond->lock when reading/writing slave list. 181 * 1) Get bond->lock when reading/writing slave list.