diff options
author | Ed Cashin <ecashin@coraid.com> | 2012-10-04 20:16:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-05 14:05:28 -0400 |
commit | 0c966214589b9767fd8771b71328f83bac58cb25 (patch) | |
tree | fa46832f149cb30b1847bc6eee13e79ef9c0c444 /drivers/block/aoe | |
parent | eecdf226721673095ef7849f960350897392e8bf (diff) |
aoe: support more AoE addresses with dynamic block device minor numbers
The ATA over Ethernet protocol uses a major (shelf) and minor (slot)
address to identify a particular storage target. These changes remove an
artificial limitation the aoe driver imposes on the use of AoE addresses.
For example, without these changes, the slot address has a maximum of 15,
but users commonly use slot numbers much greater than that.
The AoE shelf and slot address space is often used sparsely. Instead of
using a static mapping between AoE addresses and the block device minor
number, the block device minor numbers are now allocated on demand.
Signed-off-by: Ed Cashin <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/aoe')
-rw-r--r-- | drivers/block/aoe/aoe.h | 6 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 2 | ||||
-rw-r--r-- | drivers/block/aoe/aoechr.c | 2 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 25 | ||||
-rw-r--r-- | drivers/block/aoe/aoedev.c | 86 |
5 files changed, 72 insertions, 49 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 27d0a214f3bc..7b694f7da2de 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h | |||
@@ -49,6 +49,8 @@ struct aoe_hdr { | |||
49 | __be32 tag; | 49 | __be32 tag; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | #define AOE_MAXSHELF (0xffff-1) /* one less than the broadcast shelf address */ | ||
53 | |||
52 | struct aoe_atahdr { | 54 | struct aoe_atahdr { |
53 | unsigned char aflags; | 55 | unsigned char aflags; |
54 | unsigned char errfeat; | 56 | unsigned char errfeat; |
@@ -211,8 +213,7 @@ void aoe_ktstop(struct ktstate *k); | |||
211 | 213 | ||
212 | int aoedev_init(void); | 214 | int aoedev_init(void); |
213 | void aoedev_exit(void); | 215 | void aoedev_exit(void); |
214 | struct aoedev *aoedev_by_aoeaddr(int maj, int min); | 216 | struct aoedev *aoedev_by_aoeaddr(ulong maj, int min, int do_alloc); |
215 | struct aoedev *aoedev_by_sysminor_m(ulong sysminor); | ||
216 | void aoedev_downdev(struct aoedev *d); | 217 | void aoedev_downdev(struct aoedev *d); |
217 | int aoedev_flush(const char __user *str, size_t size); | 218 | int aoedev_flush(const char __user *str, size_t size); |
218 | void aoe_failbuf(struct aoedev *, struct buf *); | 219 | void aoe_failbuf(struct aoedev *, struct buf *); |
@@ -223,4 +224,3 @@ void aoenet_exit(void); | |||
223 | void aoenet_xmit(struct sk_buff_head *); | 224 | void aoenet_xmit(struct sk_buff_head *); |
224 | int is_aoe_netif(struct net_device *ifp); | 225 | int is_aoe_netif(struct net_device *ifp); |
225 | int set_aoe_iflist(const char __user *str, size_t size); | 226 | int set_aoe_iflist(const char __user *str, size_t size); |
226 | |||
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 83160ab0d273..00dfc5008ad4 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c | |||
@@ -249,7 +249,7 @@ aoeblk_gdalloc(void *vp) | |||
249 | q->queuedata = d; | 249 | q->queuedata = d; |
250 | d->gd = gd; | 250 | d->gd = gd; |
251 | gd->major = AOE_MAJOR; | 251 | gd->major = AOE_MAJOR; |
252 | gd->first_minor = d->sysminor * AOE_PARTITIONS; | 252 | gd->first_minor = d->sysminor; |
253 | gd->fops = &aoe_bdops; | 253 | gd->fops = &aoe_bdops; |
254 | gd->private_data = d; | 254 | gd->private_data = d; |
255 | set_capacity(gd, d->ssize); | 255 | set_capacity(gd, d->ssize); |
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index deb30c183fba..ed57a890c643 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c | |||
@@ -91,7 +91,7 @@ revalidate(const char __user *str, size_t size) | |||
91 | pr_err("aoe: invalid device specification %s\n", buf); | 91 | pr_err("aoe: invalid device specification %s\n", buf); |
92 | return -EINVAL; | 92 | return -EINVAL; |
93 | } | 93 | } |
94 | d = aoedev_by_aoeaddr(major, minor); | 94 | d = aoedev_by_aoeaddr(major, minor, 0); |
95 | if (!d) | 95 | if (!d) |
96 | return -EINVAL; | 96 | return -EINVAL; |
97 | spin_lock_irqsave(&d->lock, flags); | 97 | spin_lock_irqsave(&d->lock, flags); |
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 39dacdbda7f1..94e810c36de1 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -1149,7 +1149,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
1149 | 1149 | ||
1150 | h = (struct aoe_hdr *) skb->data; | 1150 | h = (struct aoe_hdr *) skb->data; |
1151 | aoemajor = be16_to_cpu(get_unaligned(&h->major)); | 1151 | aoemajor = be16_to_cpu(get_unaligned(&h->major)); |
1152 | d = aoedev_by_aoeaddr(aoemajor, h->minor); | 1152 | d = aoedev_by_aoeaddr(aoemajor, h->minor, 0); |
1153 | if (d == NULL) { | 1153 | if (d == NULL) { |
1154 | snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " | 1154 | snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " |
1155 | "for unknown device %d.%d\n", | 1155 | "for unknown device %d.%d\n", |
@@ -1330,7 +1330,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1330 | struct aoe_hdr *h; | 1330 | struct aoe_hdr *h; |
1331 | struct aoe_cfghdr *ch; | 1331 | struct aoe_cfghdr *ch; |
1332 | struct aoetgt *t; | 1332 | struct aoetgt *t; |
1333 | ulong flags, sysminor, aoemajor; | 1333 | ulong flags, aoemajor; |
1334 | struct sk_buff *sl; | 1334 | struct sk_buff *sl; |
1335 | struct sk_buff_head queue; | 1335 | struct sk_buff_head queue; |
1336 | u16 n; | 1336 | u16 n; |
@@ -1349,18 +1349,15 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1349 | "Check shelf dip switches.\n"); | 1349 | "Check shelf dip switches.\n"); |
1350 | return; | 1350 | return; |
1351 | } | 1351 | } |
1352 | if (h->minor >= NPERSHELF) { | 1352 | if (aoemajor > AOE_MAXSHELF) { |
1353 | pr_err("aoe: e%ld.%d %s, %d\n", | 1353 | pr_info("aoe: e%ld.%d: shelf number too large\n", |
1354 | aoemajor, h->minor, | 1354 | aoemajor, (int) h->minor); |
1355 | "slot number larger than the maximum", | ||
1356 | NPERSHELF-1); | ||
1357 | return; | 1355 | return; |
1358 | } | 1356 | } |
1359 | 1357 | ||
1360 | sysminor = SYSMINOR(aoemajor, h->minor); | 1358 | d = aoedev_by_aoeaddr(aoemajor, h->minor, 1); |
1361 | if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { | 1359 | if (d == NULL) { |
1362 | printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n", | 1360 | pr_info("aoe: device allocation failure\n"); |
1363 | aoemajor, (int) h->minor); | ||
1364 | return; | 1361 | return; |
1365 | } | 1362 | } |
1366 | 1363 | ||
@@ -1368,12 +1365,6 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1368 | if (n > aoe_maxout) /* keep it reasonable */ | 1365 | if (n > aoe_maxout) /* keep it reasonable */ |
1369 | n = aoe_maxout; | 1366 | n = aoe_maxout; |
1370 | 1367 | ||
1371 | d = aoedev_by_sysminor_m(sysminor); | ||
1372 | if (d == NULL) { | ||
1373 | printk(KERN_INFO "aoe: device sysminor_m failure\n"); | ||
1374 | return; | ||
1375 | } | ||
1376 | |||
1377 | spin_lock_irqsave(&d->lock, flags); | 1368 | spin_lock_irqsave(&d->lock, flags); |
1378 | 1369 | ||
1379 | t = gettgt(d, h->src); | 1370 | t = gettgt(d, h->src); |
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index ccaecff4c69b..68a7a5a9ced0 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c | |||
@@ -9,6 +9,8 @@ | |||
9 | #include <linux/netdevice.h> | 9 | #include <linux/netdevice.h> |
10 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/bitmap.h> | ||
13 | #include <linux/kdev_t.h> | ||
12 | #include "aoe.h" | 14 | #include "aoe.h" |
13 | 15 | ||
14 | static void dummy_timer(ulong); | 16 | static void dummy_timer(ulong); |
@@ -19,35 +21,63 @@ static void skbpoolfree(struct aoedev *d); | |||
19 | static struct aoedev *devlist; | 21 | static struct aoedev *devlist; |
20 | static DEFINE_SPINLOCK(devlist_lock); | 22 | static DEFINE_SPINLOCK(devlist_lock); |
21 | 23 | ||
22 | /* | 24 | /* Because some systems will have one, many, or no |
23 | * Users who grab a pointer to the device with aoedev_by_aoeaddr or | 25 | * - partitions, |
24 | * aoedev_by_sysminor_m automatically get a reference count and must | 26 | * - slots per shelf, |
25 | * be responsible for performing a aoedev_put. With the addition of | 27 | * - or shelves, |
26 | * async kthread processing I'm no longer confident that we can | 28 | * we need some flexibility in the way the minor numbers |
27 | * guarantee consistency in the face of device flushes. | 29 | * are allocated. So they are dynamic. |
28 | * | ||
29 | * For the time being, we only bother to add extra references for | ||
30 | * frames sitting on the iocq. When the kthreads finish processing | ||
31 | * these frames, they will aoedev_put the device. | ||
32 | */ | 30 | */ |
33 | struct aoedev * | 31 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) |
34 | aoedev_by_aoeaddr(int maj, int min) | 32 | |
33 | static DEFINE_SPINLOCK(used_minors_lock); | ||
34 | static DECLARE_BITMAP(used_minors, N_DEVS); | ||
35 | |||
36 | static int | ||
37 | minor_get(ulong *minor) | ||
35 | { | 38 | { |
36 | struct aoedev *d; | ||
37 | ulong flags; | 39 | ulong flags; |
40 | ulong n; | ||
41 | int error = 0; | ||
42 | |||
43 | spin_lock_irqsave(&used_minors_lock, flags); | ||
44 | n = find_first_zero_bit(used_minors, N_DEVS); | ||
45 | if (n < N_DEVS) | ||
46 | set_bit(n, used_minors); | ||
47 | else | ||
48 | error = -1; | ||
49 | spin_unlock_irqrestore(&used_minors_lock, flags); | ||
50 | |||
51 | *minor = n * AOE_PARTITIONS; | ||
52 | return error; | ||
53 | } | ||
38 | 54 | ||
39 | spin_lock_irqsave(&devlist_lock, flags); | 55 | static void |
56 | minor_free(ulong minor) | ||
57 | { | ||
58 | ulong flags; | ||
40 | 59 | ||
41 | for (d=devlist; d; d=d->next) | 60 | minor /= AOE_PARTITIONS; |
42 | if (d->aoemajor == maj && d->aoeminor == min) { | 61 | BUG_ON(minor >= N_DEVS); |
43 | d->ref++; | ||
44 | break; | ||
45 | } | ||
46 | 62 | ||
47 | spin_unlock_irqrestore(&devlist_lock, flags); | 63 | spin_lock_irqsave(&used_minors_lock, flags); |
48 | return d; | 64 | BUG_ON(!test_bit(minor, used_minors)); |
65 | clear_bit(minor, used_minors); | ||
66 | spin_unlock_irqrestore(&used_minors_lock, flags); | ||
49 | } | 67 | } |
50 | 68 | ||
69 | /* | ||
70 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | ||
71 | * automatically get a reference count and must be responsible | ||
72 | * for performing a aoedev_put. With the addition of async | ||
73 | * kthread processing I'm no longer confident that we can | ||
74 | * guarantee consistency in the face of device flushes. | ||
75 | * | ||
76 | * For the time being, we only bother to add extra references for | ||
77 | * frames sitting on the iocq. When the kthreads finish processing | ||
78 | * these frames, they will aoedev_put the device. | ||
79 | */ | ||
80 | |||
51 | void | 81 | void |
52 | aoedev_put(struct aoedev *d) | 82 | aoedev_put(struct aoedev *d) |
53 | { | 83 | { |
@@ -159,6 +189,7 @@ aoedev_freedev(struct aoedev *d) | |||
159 | if (d->bufpool) | 189 | if (d->bufpool) |
160 | mempool_destroy(d->bufpool); | 190 | mempool_destroy(d->bufpool); |
161 | skbpoolfree(d); | 191 | skbpoolfree(d); |
192 | minor_free(d->sysminor); | ||
162 | kfree(d); | 193 | kfree(d); |
163 | } | 194 | } |
164 | 195 | ||
@@ -246,22 +277,23 @@ skbpoolfree(struct aoedev *d) | |||
246 | __skb_queue_head_init(&d->skbpool); | 277 | __skb_queue_head_init(&d->skbpool); |
247 | } | 278 | } |
248 | 279 | ||
249 | /* find it or malloc it */ | 280 | /* find it or allocate it */ |
250 | struct aoedev * | 281 | struct aoedev * |
251 | aoedev_by_sysminor_m(ulong sysminor) | 282 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
252 | { | 283 | { |
253 | struct aoedev *d; | 284 | struct aoedev *d; |
254 | int i; | 285 | int i; |
255 | ulong flags; | 286 | ulong flags; |
287 | ulong sysminor; | ||
256 | 288 | ||
257 | spin_lock_irqsave(&devlist_lock, flags); | 289 | spin_lock_irqsave(&devlist_lock, flags); |
258 | 290 | ||
259 | for (d=devlist; d; d=d->next) | 291 | for (d=devlist; d; d=d->next) |
260 | if (d->sysminor == sysminor) { | 292 | if (d->aoemajor == maj && d->aoeminor == min) { |
261 | d->ref++; | 293 | d->ref++; |
262 | break; | 294 | break; |
263 | } | 295 | } |
264 | if (d) | 296 | if (d || !do_alloc || minor_get(&sysminor) < 0) |
265 | goto out; | 297 | goto out; |
266 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | 298 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); |
267 | if (!d) | 299 | if (!d) |
@@ -280,8 +312,8 @@ aoedev_by_sysminor_m(ulong sysminor) | |||
280 | for (i = 0; i < NFACTIVE; i++) | 312 | for (i = 0; i < NFACTIVE; i++) |
281 | INIT_LIST_HEAD(&d->factive[i]); | 313 | INIT_LIST_HEAD(&d->factive[i]); |
282 | d->sysminor = sysminor; | 314 | d->sysminor = sysminor; |
283 | d->aoemajor = AOEMAJOR(sysminor); | 315 | d->aoemajor = maj; |
284 | d->aoeminor = AOEMINOR(sysminor); | 316 | d->aoeminor = min; |
285 | d->mintimer = MINTIMER; | 317 | d->mintimer = MINTIMER; |
286 | d->next = devlist; | 318 | d->next = devlist; |
287 | devlist = d; | 319 | devlist = d; |