diff options
author | Krishna Kumar <krkumar2@in.ibm.com> | 2010-08-04 02:15:59 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-08-17 00:06:25 -0400 |
commit | 1565c7c1c4c8e931bdba66abc8aa6f141a406872 (patch) | |
tree | 2e2f732e113faa24f4b0e7c804b37bf4283595c3 | |
parent | bfb564e7391340638afe4ad67744a8f3858e7566 (diff) |
macvtap: Implement multiqueue for macvtap driver
Implement multiqueue facility for macvtap driver. The idea is that
a macvtap device can be opened multiple times and the fd's can be
used to register eg, as backend for vhost.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/macvtap.c | 99 | ||||
-rw-r--r-- | include/linux/if_macvlan.h | 9 |
2 files changed, 90 insertions, 18 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 3b1c54a9c6ef..42567279843e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -84,26 +84,45 @@ static const struct proto_ops macvtap_socket_ops; | |||
84 | static DEFINE_SPINLOCK(macvtap_lock); | 84 | static DEFINE_SPINLOCK(macvtap_lock); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Choose the next free queue, for now there is only one | 87 | * get_slot: return a [unused/occupied] slot in vlan->taps[]: |
88 | * - if 'q' is NULL, return the first empty slot; | ||
89 | * - otherwise, return the slot this pointer occupies. | ||
88 | */ | 90 | */ |
91 | static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) | ||
92 | { | ||
93 | int i; | ||
94 | |||
95 | for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { | ||
96 | if (rcu_dereference(vlan->taps[i]) == q) | ||
97 | return i; | ||
98 | } | ||
99 | |||
100 | /* Should never happen */ | ||
101 | BUG_ON(1); | ||
102 | } | ||
103 | |||
89 | static int macvtap_set_queue(struct net_device *dev, struct file *file, | 104 | static int macvtap_set_queue(struct net_device *dev, struct file *file, |
90 | struct macvtap_queue *q) | 105 | struct macvtap_queue *q) |
91 | { | 106 | { |
92 | struct macvlan_dev *vlan = netdev_priv(dev); | 107 | struct macvlan_dev *vlan = netdev_priv(dev); |
108 | int index; | ||
93 | int err = -EBUSY; | 109 | int err = -EBUSY; |
94 | 110 | ||
95 | spin_lock(&macvtap_lock); | 111 | spin_lock(&macvtap_lock); |
96 | if (rcu_dereference(vlan->tap)) | 112 | if (vlan->numvtaps == MAX_MACVTAP_QUEUES) |
97 | goto out; | 113 | goto out; |
98 | 114 | ||
99 | err = 0; | 115 | err = 0; |
116 | index = get_slot(vlan, NULL); | ||
100 | rcu_assign_pointer(q->vlan, vlan); | 117 | rcu_assign_pointer(q->vlan, vlan); |
101 | rcu_assign_pointer(vlan->tap, q); | 118 | rcu_assign_pointer(vlan->taps[index], q); |
102 | sock_hold(&q->sk); | 119 | sock_hold(&q->sk); |
103 | 120 | ||
104 | q->file = file; | 121 | q->file = file; |
105 | file->private_data = q; | 122 | file->private_data = q; |
106 | 123 | ||
124 | vlan->numvtaps++; | ||
125 | |||
107 | out: | 126 | out: |
108 | spin_unlock(&macvtap_lock); | 127 | spin_unlock(&macvtap_lock); |
109 | return err; | 128 | return err; |
@@ -124,9 +143,12 @@ static void macvtap_put_queue(struct macvtap_queue *q) | |||
124 | spin_lock(&macvtap_lock); | 143 | spin_lock(&macvtap_lock); |
125 | vlan = rcu_dereference(q->vlan); | 144 | vlan = rcu_dereference(q->vlan); |
126 | if (vlan) { | 145 | if (vlan) { |
127 | rcu_assign_pointer(vlan->tap, NULL); | 146 | int index = get_slot(vlan, q); |
147 | |||
148 | rcu_assign_pointer(vlan->taps[index], NULL); | ||
128 | rcu_assign_pointer(q->vlan, NULL); | 149 | rcu_assign_pointer(q->vlan, NULL); |
129 | sock_put(&q->sk); | 150 | sock_put(&q->sk); |
151 | --vlan->numvtaps; | ||
130 | } | 152 | } |
131 | 153 | ||
132 | spin_unlock(&macvtap_lock); | 154 | spin_unlock(&macvtap_lock); |
@@ -136,39 +158,82 @@ static void macvtap_put_queue(struct macvtap_queue *q) | |||
136 | } | 158 | } |
137 | 159 | ||
138 | /* | 160 | /* |
139 | * Since we only support one queue, just dereference the pointer. | 161 | * Select a queue based on the rxq of the device on which this packet |
162 | * arrived. If the incoming device is not mq, calculate a flow hash | ||
163 | * to select a queue. If all fails, find the first available queue. | ||
164 | * Cache vlan->numvtaps since it can become zero during the execution | ||
165 | * of this function. | ||
140 | */ | 166 | */ |
141 | static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, | 167 | static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, |
142 | struct sk_buff *skb) | 168 | struct sk_buff *skb) |
143 | { | 169 | { |
144 | struct macvlan_dev *vlan = netdev_priv(dev); | 170 | struct macvlan_dev *vlan = netdev_priv(dev); |
171 | struct macvtap_queue *tap = NULL; | ||
172 | int numvtaps = vlan->numvtaps; | ||
173 | __u32 rxq; | ||
174 | |||
175 | if (!numvtaps) | ||
176 | goto out; | ||
177 | |||
178 | if (likely(skb_rx_queue_recorded(skb))) { | ||
179 | rxq = skb_get_rx_queue(skb); | ||
180 | |||
181 | while (unlikely(rxq >= numvtaps)) | ||
182 | rxq -= numvtaps; | ||
183 | |||
184 | tap = rcu_dereference(vlan->taps[rxq]); | ||
185 | if (tap) | ||
186 | goto out; | ||
187 | } | ||
188 | |||
189 | /* Check if we can use flow to select a queue */ | ||
190 | rxq = skb_get_rxhash(skb); | ||
191 | if (rxq) { | ||
192 | tap = rcu_dereference(vlan->taps[rxq % numvtaps]); | ||
193 | if (tap) | ||
194 | goto out; | ||
195 | } | ||
145 | 196 | ||
146 | return rcu_dereference(vlan->tap); | 197 | /* Everything failed - find first available queue */ |
198 | for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { | ||
199 | tap = rcu_dereference(vlan->taps[rxq]); | ||
200 | if (tap) | ||
201 | break; | ||
202 | } | ||
203 | |||
204 | out: | ||
205 | return tap; | ||
147 | } | 206 | } |
148 | 207 | ||
149 | /* | 208 | /* |
150 | * The net_device is going away, give up the reference | 209 | * The net_device is going away, give up the reference |
151 | * that it holds on the queue (all the queues one day) | 210 | * that it holds on all queues and safely set the pointer |
152 | * and safely set the pointer from the queues to NULL. | 211 | * from the queues to NULL. |
153 | */ | 212 | */ |
154 | static void macvtap_del_queues(struct net_device *dev) | 213 | static void macvtap_del_queues(struct net_device *dev) |
155 | { | 214 | { |
156 | struct macvlan_dev *vlan = netdev_priv(dev); | 215 | struct macvlan_dev *vlan = netdev_priv(dev); |
157 | struct macvtap_queue *q; | 216 | struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; |
217 | int i, j = 0; | ||
158 | 218 | ||
219 | /* macvtap_put_queue can free some slots, so go through all slots */ | ||
159 | spin_lock(&macvtap_lock); | 220 | spin_lock(&macvtap_lock); |
160 | q = rcu_dereference(vlan->tap); | 221 | for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { |
161 | if (!q) { | 222 | q = rcu_dereference(vlan->taps[i]); |
162 | spin_unlock(&macvtap_lock); | 223 | if (q) { |
163 | return; | 224 | qlist[j++] = q; |
225 | rcu_assign_pointer(vlan->taps[i], NULL); | ||
226 | rcu_assign_pointer(q->vlan, NULL); | ||
227 | vlan->numvtaps--; | ||
228 | } | ||
164 | } | 229 | } |
165 | 230 | BUG_ON(vlan->numvtaps != 0); | |
166 | rcu_assign_pointer(vlan->tap, NULL); | ||
167 | rcu_assign_pointer(q->vlan, NULL); | ||
168 | spin_unlock(&macvtap_lock); | 231 | spin_unlock(&macvtap_lock); |
169 | 232 | ||
170 | synchronize_rcu(); | 233 | synchronize_rcu(); |
171 | sock_put(&q->sk); | 234 | |
235 | for (--j; j >= 0; j--) | ||
236 | sock_put(&qlist[j]->sk); | ||
172 | } | 237 | } |
173 | 238 | ||
174 | /* | 239 | /* |
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 35280b302290..8a2fd66a8b5f 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h | |||
@@ -40,6 +40,12 @@ struct macvlan_rx_stats { | |||
40 | unsigned long rx_errors; | 40 | unsigned long rx_errors; |
41 | }; | 41 | }; |
42 | 42 | ||
43 | /* | ||
44 | * Maximum times a macvtap device can be opened. This can be used to | ||
45 | * configure the number of receive queue, e.g. for multiqueue virtio. | ||
46 | */ | ||
47 | #define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16) | ||
48 | |||
43 | struct macvlan_dev { | 49 | struct macvlan_dev { |
44 | struct net_device *dev; | 50 | struct net_device *dev; |
45 | struct list_head list; | 51 | struct list_head list; |
@@ -50,7 +56,8 @@ struct macvlan_dev { | |||
50 | enum macvlan_mode mode; | 56 | enum macvlan_mode mode; |
51 | int (*receive)(struct sk_buff *skb); | 57 | int (*receive)(struct sk_buff *skb); |
52 | int (*forward)(struct net_device *dev, struct sk_buff *skb); | 58 | int (*forward)(struct net_device *dev, struct sk_buff *skb); |
53 | struct macvtap_queue *tap; | 59 | struct macvtap_queue *taps[MAX_MACVTAP_QUEUES]; |
60 | int numvtaps; | ||
54 | }; | 61 | }; |
55 | 62 | ||
56 | static inline void macvlan_count_rx(const struct macvlan_dev *vlan, | 63 | static inline void macvlan_count_rx(const struct macvlan_dev *vlan, |