aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-18 23:08:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-18 23:08:47 -0400
commitecb2cf1a6b63825a258ff4fe0d7f3070fbe4676b (patch)
tree4b03d332066d148f0d6c416528c6ba5e874d466a /net
parentee114b97e67b2a572f94982567a21ac4ee17c133 (diff)
parent3e3aac497513c669e1c62c71e1d552ea85c1d974 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "A couple interesting SKB fragment handling fixes, plus the usual small bits here and there: 1) Fix 64-bit divide build failure on 32-bit platforms in mlx5, from Tim Gardner. 2) Get rid of a stupid reimplementation on "%*phC" in our sysfs MAC address printing helper. 3) Fix NETIF_F_SG capability advertisement in hyperv driver, if the device can't do checksumming offloads then it shouldn't say it can do SG either. From Haiyang Zhang. 4) bgmac needs to depend on PHYLIB, from Hauke Mehrtens. 5) Don't leak DMA mappings on mapping failures, from Neil Horman. 6) We need to reset the transport header of SKBs in ipv4 before we attempt to perform early socket demux, just like ipv6 does. From Eric Dumazet. 7) Add missing locking on vxlan device removal, from Stephen Hemminger. 8) xen-netfront has to make two passes over an SKB to prepare it for transfer. One pass calculates the number of slots needed, the second massages the SKB and fills the slots. Unfortunately, the first pass doesn't calculate the number of slots properly so we can end up trying to build a MAX_SKB_FRAGS + 1 SKB which doesn't work out so well. Fix from Jan Beulich with help and discussion with several others. 9) Fix a similar problem in tun and macvtap, which have to split up scatter-gather elements at PAGE_SIZE boundaries. Don't do zerocopy if it would result in a > MAX_SKB_FRAGS skb. Fixes from Jason Wang. 10) On receive, once we've decoded the VLAN state completely, clear skb->vlan_tci. Otherwise demuxed tunnels underneath can trigger the VLAN code again, corrupting the packet. Fix from Eric Dumazet" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: vlan: fix a race in egress prio management vlan: mask vlan prio bits macvtap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS tuntap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS pkt_sched: sch_qfq: remove a source of high packet delay/jitter xen-netfront: pull on receive skb may need to happen earlier vxlan: add necessary locking on device removal hyperv: Fix the NETIF_F_SG flag setting in netvsc net: Fix sysfs_format_mac() code duplication. be2net: Fix to avoid hardware workaround when not needed macvtap: do not assume 802.1Q when send vlan packets macvtap: fix the missing ret value of TUNSETQUEUE ipv4: set transport header earlier mlx5 core: Fix __udivdi3 when compiling for 32 bit arches bgmac: add dependency to phylib net/irda: fixed style issues in irlan_eth ethtool: fixed trailing statements in ethtool ndisc: bool initializations should use true and false atl1e: unmap partially mapped skb on dma error and free skb
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c2
-rw-r--r--net/8021q/vlan_dev.c7
-rw-r--r--net/core/dev.c11
-rw-r--r--net/core/ethtool.c30
-rw-r--r--net/ethernet/eth.c21
-rw-r--r--net/ipv4/ip_input.c7
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/irda/irlan/irlan_eth.c31
-rw-r--r--net/sched/sch_qfq.c85
9 files changed, 114 insertions, 86 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 8a15eaadc4bd..4a78c4de9f20 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
9{ 9{
10 struct sk_buff *skb = *skbp; 10 struct sk_buff *skb = *skbp;
11 __be16 vlan_proto = skb->vlan_proto; 11 __be16 vlan_proto = skb->vlan_proto;
12 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; 12 u16 vlan_id = vlan_tx_tag_get_id(skb);
13 struct net_device *vlan_dev; 13 struct net_device *vlan_dev;
14 struct vlan_pcpu_stats *rx_stats; 14 struct vlan_pcpu_stats *rx_stats;
15 15
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3a8c8fd63c88..1cd3d2a406f5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
73{ 73{
74 struct vlan_priority_tci_mapping *mp; 74 struct vlan_priority_tci_mapping *mp;
75 75
76 smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
77
76 mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; 78 mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
77 while (mp) { 79 while (mp) {
78 if (mp->priority == skb->priority) { 80 if (mp->priority == skb->priority) {
@@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
249 np->next = mp; 251 np->next = mp;
250 np->priority = skb_prio; 252 np->priority = skb_prio;
251 np->vlan_qos = vlan_qos; 253 np->vlan_qos = vlan_qos;
254 /* Before inserting this element in hash table, make sure all its fields
255 * are committed to memory.
256 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
257 */
258 smp_wmb();
252 vlan->egress_priority_map[skb_prio & 0xF] = np; 259 vlan->egress_priority_map[skb_prio & 0xF] = np;
253 if (vlan_qos) 260 if (vlan_qos)
254 vlan->nr_egress_mappings++; 261 vlan->nr_egress_mappings++;
diff --git a/net/core/dev.c b/net/core/dev.c
index a3d8d44cb7f4..26755dd40daa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3580,8 +3580,15 @@ ncls:
3580 } 3580 }
3581 } 3581 }
3582 3582
3583 if (vlan_tx_nonzero_tag_present(skb)) 3583 if (unlikely(vlan_tx_tag_present(skb))) {
3584 skb->pkt_type = PACKET_OTHERHOST; 3584 if (vlan_tx_tag_get_id(skb))
3585 skb->pkt_type = PACKET_OTHERHOST;
3586 /* Note: we might in the future use prio bits
3587 * and set skb->priority like in vlan_do_receive()
3588 * For the time being, just ignore Priority Code Point
3589 */
3590 skb->vlan_tci = 0;
3591 }
3585 3592
3586 /* deliver only exact match when indicated */ 3593 /* deliver only exact match when indicated */
3587 null_or_dev = deliver_exact ? skb->dev : NULL; 3594 null_or_dev = deliver_exact ? skb->dev : NULL;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ab5fa6336c84..78e9d9223e40 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -279,11 +279,16 @@ static u32 __ethtool_get_flags(struct net_device *dev)
279{ 279{
280 u32 flags = 0; 280 u32 flags = 0;
281 281
282 if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; 282 if (dev->features & NETIF_F_LRO)
283 if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; 283 flags |= ETH_FLAG_LRO;
284 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; 284 if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
285 if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; 285 flags |= ETH_FLAG_RXVLAN;
286 if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; 286 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
287 flags |= ETH_FLAG_TXVLAN;
288 if (dev->features & NETIF_F_NTUPLE)
289 flags |= ETH_FLAG_NTUPLE;
290 if (dev->features & NETIF_F_RXHASH)
291 flags |= ETH_FLAG_RXHASH;
287 292
288 return flags; 293 return flags;
289} 294}
@@ -295,11 +300,16 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
295 if (data & ~ETH_ALL_FLAGS) 300 if (data & ~ETH_ALL_FLAGS)
296 return -EINVAL; 301 return -EINVAL;
297 302
298 if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; 303 if (data & ETH_FLAG_LRO)
299 if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX; 304 features |= NETIF_F_LRO;
300 if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX; 305 if (data & ETH_FLAG_RXVLAN)
301 if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; 306 features |= NETIF_F_HW_VLAN_CTAG_RX;
302 if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; 307 if (data & ETH_FLAG_TXVLAN)
308 features |= NETIF_F_HW_VLAN_CTAG_TX;
309 if (data & ETH_FLAG_NTUPLE)
310 features |= NETIF_F_NTUPLE;
311 if (data & ETH_FLAG_RXHASH)
312 features |= NETIF_F_RXHASH;
303 313
304 /* allow changing only bits set in hw_features */ 314 /* allow changing only bits set in hw_features */
305 changed = (features ^ dev->features) & ETH_ALL_FEATURES; 315 changed = (features ^ dev->features) & ETH_ALL_FEATURES;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5359560926bc..be1f64d35358 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -401,27 +401,8 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
401} 401}
402EXPORT_SYMBOL(alloc_etherdev_mqs); 402EXPORT_SYMBOL(alloc_etherdev_mqs);
403 403
404static size_t _format_mac_addr(char *buf, int buflen,
405 const unsigned char *addr, int len)
406{
407 int i;
408 char *cp = buf;
409
410 for (i = 0; i < len; i++) {
411 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
412 if (i == len - 1)
413 break;
414 cp += scnprintf(cp, buflen - (cp - buf), ":");
415 }
416 return cp - buf;
417}
418
419ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) 404ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
420{ 405{
421 size_t l; 406 return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
422
423 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
424 l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
425 return (ssize_t)l;
426} 407}
427EXPORT_SYMBOL(sysfs_format_mac); 408EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3da817b89e9b..15e3e683adec 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
190{ 190{
191 struct net *net = dev_net(skb->dev); 191 struct net *net = dev_net(skb->dev);
192 192
193 __skb_pull(skb, ip_hdrlen(skb)); 193 __skb_pull(skb, skb_network_header_len(skb));
194
195 /* Point into the IP datagram, just past the header. */
196 skb_reset_transport_header(skb);
197 194
198 rcu_read_lock(); 195 rcu_read_lock();
199 { 196 {
@@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
437 goto drop; 434 goto drop;
438 } 435 }
439 436
437 skb->transport_header = skb->network_header + iph->ihl*4;
438
440 /* Remove any debris in the socket control block */ 439 /* Remove any debris in the socket control block */
441 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 440 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
442 441
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3b5730b48c5..24c03396e008 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -479,7 +479,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
479 if (ifp) { 479 if (ifp) {
480 src_addr = solicited_addr; 480 src_addr = solicited_addr;
481 if (ifp->flags & IFA_F_OPTIMISTIC) 481 if (ifp->flags & IFA_F_OPTIMISTIC)
482 override = 0; 482 override = false;
483 inc_opt |= ifp->idev->cnf.force_tllao; 483 inc_opt |= ifp->idev->cnf.force_tllao;
484 in6_ifa_put(ifp); 484 in6_ifa_put(ifp);
485 } else { 485 } else {
@@ -557,7 +557,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
557 } 557 }
558 558
559 if (ipv6_addr_any(saddr)) 559 if (ipv6_addr_any(saddr))
560 inc_opt = 0; 560 inc_opt = false;
561 if (inc_opt) 561 if (inc_opt)
562 optlen += ndisc_opt_addr_space(dev); 562 optlen += ndisc_opt_addr_space(dev);
563 563
@@ -790,7 +790,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
790 (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { 790 (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
791 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && 791 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
792 skb->pkt_type != PACKET_HOST && 792 skb->pkt_type != PACKET_HOST &&
793 inc != 0 && 793 inc &&
794 idev->nd_parms->proxy_delay != 0) { 794 idev->nd_parms->proxy_delay != 0) {
795 /* 795 /*
796 * for anycast or proxy, 796 * for anycast or proxy,
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index d14152e866d9..ffcec225b5d9 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -44,12 +44,12 @@ static int irlan_eth_open(struct net_device *dev);
44static int irlan_eth_close(struct net_device *dev); 44static int irlan_eth_close(struct net_device *dev);
45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, 45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
46 struct net_device *dev); 46 struct net_device *dev);
47static void irlan_eth_set_multicast_list( struct net_device *dev); 47static void irlan_eth_set_multicast_list(struct net_device *dev);
48 48
49static const struct net_device_ops irlan_eth_netdev_ops = { 49static const struct net_device_ops irlan_eth_netdev_ops = {
50 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
51 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
52 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
53 .ndo_set_rx_mode = irlan_eth_set_multicast_list, 53 .ndo_set_rx_mode = irlan_eth_set_multicast_list,
54 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
55 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
@@ -110,7 +110,7 @@ static int irlan_eth_open(struct net_device *dev)
110{ 110{
111 struct irlan_cb *self = netdev_priv(dev); 111 struct irlan_cb *self = netdev_priv(dev);
112 112
113 IRDA_DEBUG(2, "%s()\n", __func__ ); 113 IRDA_DEBUG(2, "%s()\n", __func__);
114 114
115 /* Ready to play! */ 115 /* Ready to play! */
116 netif_stop_queue(dev); /* Wait until data link is ready */ 116 netif_stop_queue(dev); /* Wait until data link is ready */
@@ -137,7 +137,7 @@ static int irlan_eth_close(struct net_device *dev)
137{ 137{
138 struct irlan_cb *self = netdev_priv(dev); 138 struct irlan_cb *self = netdev_priv(dev);
139 139
140 IRDA_DEBUG(2, "%s()\n", __func__ ); 140 IRDA_DEBUG(2, "%s()\n", __func__);
141 141
142 /* Stop device */ 142 /* Stop device */
143 netif_stop_queue(dev); 143 netif_stop_queue(dev);
@@ -310,35 +310,32 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
310{ 310{
311 struct irlan_cb *self = netdev_priv(dev); 311 struct irlan_cb *self = netdev_priv(dev);
312 312
313 IRDA_DEBUG(2, "%s()\n", __func__ ); 313 IRDA_DEBUG(2, "%s()\n", __func__);
314 314
315 /* Check if data channel has been connected yet */ 315 /* Check if data channel has been connected yet */
316 if (self->client.state != IRLAN_DATA) { 316 if (self->client.state != IRLAN_DATA) {
317 IRDA_DEBUG(1, "%s(), delaying!\n", __func__ ); 317 IRDA_DEBUG(1, "%s(), delaying!\n", __func__);
318 return; 318 return;
319 } 319 }
320 320
321 if (dev->flags & IFF_PROMISC) { 321 if (dev->flags & IFF_PROMISC) {
322 /* Enable promiscuous mode */ 322 /* Enable promiscuous mode */
323 IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n"); 323 IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n");
324 } 324 } else if ((dev->flags & IFF_ALLMULTI) ||
325 else if ((dev->flags & IFF_ALLMULTI) ||
326 netdev_mc_count(dev) > HW_MAX_ADDRS) { 325 netdev_mc_count(dev) > HW_MAX_ADDRS) {
327 /* Disable promiscuous mode, use normal mode. */ 326 /* Disable promiscuous mode, use normal mode. */
328 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); 327 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__);
329 /* hardware_set_filter(NULL); */ 328 /* hardware_set_filter(NULL); */
330 329
331 irlan_set_multicast_filter(self, TRUE); 330 irlan_set_multicast_filter(self, TRUE);
332 } 331 } else if (!netdev_mc_empty(dev)) {
333 else if (!netdev_mc_empty(dev)) { 332 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__);
334 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ );
335 /* Walk the address list, and load the filter */ 333 /* Walk the address list, and load the filter */
336 /* hardware_set_filter(dev->mc_list); */ 334 /* hardware_set_filter(dev->mc_list); */
337 335
338 irlan_set_multicast_filter(self, TRUE); 336 irlan_set_multicast_filter(self, TRUE);
339 } 337 } else {
340 else { 338 IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__);
341 IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__ );
342 irlan_set_multicast_filter(self, FALSE); 339 irlan_set_multicast_filter(self, FALSE);
343 } 340 }
344 341
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index a7ab323849b6..8056fb4e618a 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -113,7 +113,6 @@
113 113
114#define FRAC_BITS 30 /* fixed point arithmetic */ 114#define FRAC_BITS 30 /* fixed point arithmetic */
115#define ONE_FP (1UL << FRAC_BITS) 115#define ONE_FP (1UL << FRAC_BITS)
116#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
117 116
118#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ 117#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
119#define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ 118#define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */
@@ -189,6 +188,7 @@ struct qfq_sched {
189 struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ 188 struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */
190 u32 num_active_agg; /* Num. of active aggregates */ 189 u32 num_active_agg; /* Num. of active aggregates */
191 u32 wsum; /* weight sum */ 190 u32 wsum; /* weight sum */
191 u32 iwsum; /* inverse weight sum */
192 192
193 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ 193 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
194 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ 194 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
@@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
314 314
315 q->wsum += 315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes); 316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317 q->iwsum = ONE_FP / q->wsum;
317 318
318 agg->num_classes = new_num_classes; 319 agg->num_classes = new_num_classes;
319} 320}
@@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
340{ 341{
341 if (!hlist_unhashed(&agg->nonfull_next)) 342 if (!hlist_unhashed(&agg->nonfull_next))
342 hlist_del_init(&agg->nonfull_next); 343 hlist_del_init(&agg->nonfull_next);
344 q->wsum -= agg->class_weight;
345 if (q->wsum != 0)
346 q->iwsum = ONE_FP / q->wsum;
347
343 if (q->in_serv_agg == agg) 348 if (q->in_serv_agg == agg)
344 q->in_serv_agg = qfq_choose_next_agg(q); 349 q->in_serv_agg = qfq_choose_next_agg(q);
345 kfree(agg); 350 kfree(agg);
@@ -834,38 +839,60 @@ static void qfq_make_eligible(struct qfq_sched *q)
834 } 839 }
835} 840}
836 841
837
838/* 842/*
839 * The index of the slot in which the aggregate is to be inserted must 843 * The index of the slot in which the input aggregate agg is to be
840 * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' 844 * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2'
841 * because the start time of the group may be moved backward by one 845 * and not a '-1' because the start time of the group may be moved
842 * slot after the aggregate has been inserted, and this would cause 846 * backward by one slot after the aggregate has been inserted, and
843 * non-empty slots to be right-shifted by one position. 847 * this would cause non-empty slots to be right-shifted by one
848 * position.
849 *
850 * QFQ+ fully satisfies this bound to the slot index if the parameters
851 * of the classes are not changed dynamically, and if QFQ+ never
852 * happens to postpone the service of agg unjustly, i.e., it never
853 * happens that the aggregate becomes backlogged and eligible, or just
854 * eligible, while an aggregate with a higher approximated finish time
855 * is being served. In particular, in this case QFQ+ guarantees that
856 * the timestamps of agg are low enough that the slot index is never
857 * higher than 2. Unfortunately, QFQ+ cannot provide the same
858 * guarantee if it happens to unjustly postpone the service of agg, or
859 * if the parameters of some class are changed.
860 *
861 * As for the first event, i.e., an out-of-order service, the
862 * upper bound to the slot index guaranteed by QFQ+ grows to
863 * 2 +
864 * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) *
865 * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1.
844 * 866 *
845 * If the weight and lmax (max_pkt_size) of the classes do not change, 867 * The following function deals with this problem by backward-shifting
846 * then QFQ+ does meet the above contraint according to the current 868 * the timestamps of agg, if needed, so as to guarantee that the slot
847 * values of its parameters. In fact, if the weight and lmax of the 869 * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may
848 * classes do not change, then, from the theory, QFQ+ guarantees that 870 * cause the service of other aggregates to be postponed, yet the
849 * the slot index is never higher than 871 * worst-case guarantees of these aggregates are not violated. In
850 * 2 + QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * 872 * fact, in case of no out-of-order service, the timestamps of agg
851 * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM) = 2 + 8 * 128 * (1 / 64) = 18 873 * would have been even lower than they are after the backward shift,
874 * because QFQ+ would have guaranteed a maximum value equal to 2 for
875 * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose
876 * service is postponed because of the backward-shift would have
877 * however waited for the service of agg before being served.
852 * 878 *
853 * When the weight of a class is increased or the lmax of the class is 879 * The other event that may cause the slot index to be higher than 2
854 * decreased, a new aggregate with smaller slot size than the original 880 * for agg is a recent change of the parameters of some class. If the
855 * parent aggregate of the class may happen to be activated. The 881 * weight of a class is increased or the lmax (max_pkt_size) of the
856 * activation of this aggregate should be properly delayed to when the 882 * class is decreased, then a new aggregate with smaller slot size
857 * service of the class has finished in the ideal system tracked by 883 * than the original parent aggregate of the class may happen to be
858 * QFQ+. If the activation of the aggregate is not delayed to this 884 * activated. The activation of this aggregate should be properly
859 * reference time instant, then this aggregate may be unjustly served 885 * delayed to when the service of the class has finished in the ideal
860 * before other aggregates waiting for service. This may cause the 886 * system tracked by QFQ+. If the activation of the aggregate is not
861 * above bound to the slot index to be violated for some of these 887 * delayed to this reference time instant, then this aggregate may be
862 * unlucky aggregates. 888 * unjustly served before other aggregates waiting for service. This
889 * may cause the above bound to the slot index to be violated for some
890 * of these unlucky aggregates.
863 * 891 *
864 * Instead of delaying the activation of the new aggregate, which is 892 * Instead of delaying the activation of the new aggregate, which is
865 * quite complex, the following inaccurate but simple solution is used: 893 * quite complex, the above-discussed capping of the slot index is
866 * if the slot index is higher than QFQ_MAX_SLOTS-2, then the 894 * used to handle also the consequences of a change of the parameters
867 * timestamps of the aggregate are shifted backward so as to let the 895 * of a class.
868 * slot index become equal to QFQ_MAX_SLOTS-2.
869 */ 896 */
870static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg, 897static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
871 u64 roundedS) 898 u64 roundedS)
@@ -1136,7 +1163,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1136 else 1163 else
1137 in_serv_agg->budget -= len; 1164 in_serv_agg->budget -= len;
1138 1165
1139 q->V += (u64)len * IWSUM; 1166 q->V += (u64)len * q->iwsum;
1140 pr_debug("qfq dequeue: len %u F %lld now %lld\n", 1167 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1141 len, (unsigned long long) in_serv_agg->F, 1168 len, (unsigned long long) in_serv_agg->F,
1142 (unsigned long long) q->V); 1169 (unsigned long long) q->V);