From 572a103ded0ad880f75ce83e99f0512fbb80b5b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 May 2007 18:34:17 -0700 Subject: [NET] link_watch: Move link watch list into net_device These days the link watch mechanism is an integral part of the network subsystem as it manages the carrier status. So it now makes sense to allocate some memory for it in net_device rather than allocating it on demand. In fact, this is necessary because we can't tolerate a memory allocation failure since that means we'd have to potentially throw a link up event away. It also simplifies the code greatly. In doing so I discovered a subtle race condition in the use of singleevent. This race condition still exists (and is somewhat magnified) without singleevent but it's now plugged thanks to an smp_mb__before_clear_bit. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/link_watch.c | 50 +++++++++++++++----------------------------------- 1 file changed, 15 insertions(+), 35 deletions(-) (limited to 'net/core') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index e3c26a9cca..71a35da275 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -28,7 +27,6 @@ enum lw_bits { LW_RUNNING = 0, - LW_SE_USED }; static unsigned long linkwatch_flags; @@ -37,17 +35,9 @@ static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); -static LIST_HEAD(lweventlist); +static struct net_device *lweventlist; static DEFINE_SPINLOCK(lweventlist_lock); -struct lw_event { - struct list_head list; - struct net_device *dev; -}; - -/* Avoid kmalloc() for most systems */ -static struct lw_event singleevent; - static unsigned char default_operstate(const struct net_device *dev) { if (!netif_carrier_ok(dev)) @@ -90,21 +80,23 @@ static void rfc2863_policy(struct net_device *dev) /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { - struct list_head head, *n, *next; + struct net_device *next; spin_lock_irq(&lweventlist_lock); - list_replace_init(&lweventlist, &head); + next = lweventlist; + lweventlist = NULL; spin_unlock_irq(&lweventlist_lock); - list_for_each_safe(n, next, &head) { - struct lw_event *event = list_entry(n, struct lw_event, list); - struct net_device *dev = event->dev; + while (next) { + struct net_device *dev = next; - if (event == &singleevent) { - clear_bit(LW_SE_USED, &linkwatch_flags); - } else { - kfree(event); - } + next = dev->link_watch_next; + + /* + * Make sure the above read is complete since it can be + * rewritten as soon as we clear the bit below. + */ + smp_mb__before_clear_bit(); /* We are about to handle this device, * so new events can be accepted @@ -147,24 +139,12 @@ void linkwatch_fire_event(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { unsigned long flags; - struct lw_event *event; - - if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) { - event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC); - - if (unlikely(event == NULL)) { - clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); - return; - } - } else { - event = &singleevent; - } dev_hold(dev); - event->dev = dev; spin_lock_irqsave(&lweventlist_lock, flags); - list_add_tail(&event->list, &lweventlist); + dev->link_watch_next = lweventlist; + lweventlist = dev; spin_unlock_irqrestore(&lweventlist_lock, flags); if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) { -- cgit v1.2.2 From 294cc44b7e48a6e7732499eebcf409b231460d8e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 May 2007 18:36:28 -0700 Subject: [NET]: Remove link_watch delay for up even when we're down Currently all link carrier events are delayed by up to a second before they're processed to prevent link storms. This causes unnecessary packet loss during that interval. In fact, we can achieve the same effect in preventing storms by only delaying down events and unnecssary up events. The latter is defined as up events when we're already up. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/link_watch.c | 90 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 23 deletions(-) (limited to 'net/core') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 71a35da275..b5f45799c2 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -77,11 +77,52 @@ static void rfc2863_policy(struct net_device *dev) } -/* Must be called with the rtnl semaphore held */ -void linkwatch_run_queue(void) +static int linkwatch_urgent_event(struct net_device *dev) +{ + return netif_running(dev) && netif_carrier_ok(dev) && + dev->qdisc != dev->qdisc_sleeping; +} + + +static void linkwatch_add_event(struct net_device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&lweventlist_lock, flags); + dev->link_watch_next = lweventlist; + lweventlist = dev; + spin_unlock_irqrestore(&lweventlist_lock, flags); +} + + +static void linkwatch_schedule_work(unsigned long delay) +{ + if (test_and_set_bit(LW_RUNNING, &linkwatch_flags)) + return; + + /* If we wrap around we'll delay it by at most HZ. */ + if (delay > HZ) + delay = 0; + + schedule_delayed_work(&linkwatch_work, delay); +} + + +static void __linkwatch_run_queue(int urgent_only) { struct net_device *next; + /* + * Limit the number of linkwatch events to one + * per second so that a runaway driver does not + * cause a storm of messages on the netlink + * socket. This limit does not apply to up events + * while the device qdisc is down. + */ + if (!urgent_only) + linkwatch_nextevent = jiffies + HZ; + clear_bit(LW_RUNNING, &linkwatch_flags); + spin_lock_irq(&lweventlist_lock); next = lweventlist; lweventlist = NULL; @@ -92,6 +133,11 @@ void linkwatch_run_queue(void) next = dev->link_watch_next; + if (urgent_only && !linkwatch_urgent_event(dev)) { + linkwatch_add_event(dev); + continue; + } + /* * Make sure the above read is complete since it can be * rewritten as soon as we clear the bit below. @@ -116,21 +162,23 @@ void linkwatch_run_queue(void) dev_put(dev); } + + if (lweventlist) + linkwatch_schedule_work(linkwatch_nextevent - jiffies); } -static void linkwatch_event(struct work_struct *dummy) +/* Must be called with the rtnl semaphore held */ +void linkwatch_run_queue(void) { - /* Limit the number of linkwatch events to one - * per second so that a runaway driver does not - * cause a storm of messages on the netlink - * socket - */ - linkwatch_nextevent = jiffies + HZ; - clear_bit(LW_RUNNING, &linkwatch_flags); + __linkwatch_run_queue(0); +} + +static void linkwatch_event(struct work_struct *dummy) +{ rtnl_lock(); - linkwatch_run_queue(); + __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); rtnl_unlock(); } @@ -138,23 +186,19 @@ static void linkwatch_event(struct work_struct *dummy) void linkwatch_fire_event(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { - unsigned long flags; + unsigned long delay; dev_hold(dev); - spin_lock_irqsave(&lweventlist_lock, flags); - dev->link_watch_next = lweventlist; - lweventlist = dev; - spin_unlock_irqrestore(&lweventlist_lock, flags); + linkwatch_add_event(dev); - if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) { - unsigned long delay = linkwatch_nextevent - jiffies; + delay = linkwatch_nextevent - jiffies; - /* If we wrap around we'll delay it by at most HZ. */ - if (delay > HZ) - delay = 0; - schedule_delayed_work(&linkwatch_work, delay); - } + /* Minimise down-time: drop delay for up event. */ + if (linkwatch_urgent_event(dev)) + delay = 0; + + linkwatch_schedule_work(delay); } } -- cgit v1.2.2 From db0ccffed91e234cad99a35f07d5a322f410baa2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 May 2007 23:22:43 -0700 Subject: [NET] link_watch: Eliminate potential delay on wrap-around When the jiffies wrap around or when the system boots up for the first time, down events can be delayed indefinitely since we no longer update linkwatch_nextevent when only urgent events are processed. This patch fixes this by setting linkwatch_nextevent when a wrap-around occurs. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/link_watch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index b5f45799c2..4674ae5741 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -101,8 +101,10 @@ static void linkwatch_schedule_work(unsigned long delay) return; /* If we wrap around we'll delay it by at most HZ. */ - if (delay > HZ) + if (delay > HZ) { + linkwatch_nextevent = jiffies; delay = 0; + } schedule_delayed_work(&linkwatch_work, delay); } -- cgit v1.2.2 From d9568ba91b1fdd1ea4fdbf9fcc76b867cca6c1d5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 9 May 2007 00:17:30 -0700 Subject: [NET] link_watch: Always schedule urgent events Urgent events may be delayed if we already have a non-urgent event queued for that device. This patch changes this by making sure that an urgent event is always looked at immediately. I've replaced the LW_RUNNING flag by LW_URGENT since whether work is scheduled is already kept track by the work queue system. The only complication is that we have to provide some exclusion for the setting linkwatch_nextevent which is available in the actual work function. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/link_watch.c | 60 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 19 deletions(-) (limited to 'net/core') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 4674ae5741..a5e372b9ec 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -26,7 +26,7 @@ enum lw_bits { - LW_RUNNING = 0, + LW_URGENT = 0, }; static unsigned long linkwatch_flags; @@ -95,18 +95,41 @@ static void linkwatch_add_event(struct net_device *dev) } -static void linkwatch_schedule_work(unsigned long delay) +static void linkwatch_schedule_work(int urgent) { - if (test_and_set_bit(LW_RUNNING, &linkwatch_flags)) + unsigned long delay = linkwatch_nextevent - jiffies; + + if (test_bit(LW_URGENT, &linkwatch_flags)) return; - /* If we wrap around we'll delay it by at most HZ. */ - if (delay > HZ) { - linkwatch_nextevent = jiffies; + /* Minimise down-time: drop delay for up event. */ + if (urgent) { + if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) + return; delay = 0; } - schedule_delayed_work(&linkwatch_work, delay); + /* If we wrap around we'll delay it by at most HZ. */ + if (delay > HZ) + delay = 0; + + /* + * This is true if we've scheduled it immeditately or if we don't + * need an immediate execution and it's already pending. + */ + if (schedule_delayed_work(&linkwatch_work, delay) == !delay) + return; + + /* Don't bother if there is nothing urgent. */ + if (!test_bit(LW_URGENT, &linkwatch_flags)) + return; + + /* It's already running which is good enough. */ + if (!cancel_delayed_work(&linkwatch_work)) + return; + + /* Otherwise we reschedule it again for immediate exection. */ + schedule_delayed_work(&linkwatch_work, 0); } @@ -123,7 +146,11 @@ static void __linkwatch_run_queue(int urgent_only) */ if (!urgent_only) linkwatch_nextevent = jiffies + HZ; - clear_bit(LW_RUNNING, &linkwatch_flags); + /* Limit wrap-around effect on delay. */ + else if (time_after(linkwatch_nextevent, jiffies + HZ)) + linkwatch_nextevent = jiffies; + + clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); next = lweventlist; @@ -166,7 +193,7 @@ static void __linkwatch_run_queue(int urgent_only) } if (lweventlist) - linkwatch_schedule_work(linkwatch_nextevent - jiffies); + linkwatch_schedule_work(0); } @@ -187,21 +214,16 @@ static void linkwatch_event(struct work_struct *dummy) void linkwatch_fire_event(struct net_device *dev) { - if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { - unsigned long delay; + int urgent = linkwatch_urgent_event(dev); + if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); linkwatch_add_event(dev); + } else if (!urgent) + return; - delay = linkwatch_nextevent - jiffies; - - /* Minimise down-time: drop delay for up event. */ - if (linkwatch_urgent_event(dev)) - delay = 0; - - linkwatch_schedule_work(delay); - } + linkwatch_schedule_work(urgent); } EXPORT_SYMBOL(linkwatch_fire_event); -- cgit v1.2.2