diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 12 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/aoe/aoe.h | 93 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 91 | ||||
-rw-r--r-- | drivers/block/aoe/aoechr.c | 13 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 1233 | ||||
-rw-r--r-- | drivers/block/aoe/aoedev.c | 265 | ||||
-rw-r--r-- | drivers/block/aoe/aoemain.c | 10 | ||||
-rw-r--r-- | drivers/block/aoe/aoenet.c | 61 | ||||
-rw-r--r-- | drivers/block/cciss_scsi.c | 1 | ||||
-rw-r--r-- | drivers/block/floppy.c | 5 | ||||
-rw-r--r-- | drivers/block/loop.c | 4 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 38 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.h | 10 | ||||
-rw-r--r-- | drivers/block/nbd.c | 32 | ||||
-rw-r--r-- | drivers/block/nvme.c | 155 | ||||
-rw-r--r-- | drivers/block/rbd.c | 1789 | ||||
-rw-r--r-- | drivers/block/rbd_types.h | 27 | ||||
-rw-r--r-- | drivers/block/ub.c | 2474 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 306 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 3 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 4 |
22 files changed, 2802 insertions, 3825 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a796407123c7..f529407db93f 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -353,18 +353,6 @@ config BLK_DEV_SX8 | |||
353 | 353 | ||
354 | Use devices /dev/sx8/$N and /dev/sx8/$Np$M. | 354 | Use devices /dev/sx8/$N and /dev/sx8/$Np$M. |
355 | 355 | ||
356 | config BLK_DEV_UB | ||
357 | tristate "Low Performance USB Block driver (deprecated)" | ||
358 | depends on USB | ||
359 | help | ||
360 | This driver supports certain USB attached storage devices | ||
361 | such as flash keys. | ||
362 | |||
363 | If you enable this driver, it is recommended to avoid conflicts | ||
364 | with usb-storage by enabling USB_LIBUSUAL. | ||
365 | |||
366 | If unsure, say N. | ||
367 | |||
368 | config BLK_DEV_RAM | 356 | config BLK_DEV_RAM |
369 | tristate "RAM block device support" | 357 | tristate "RAM block device support" |
370 | ---help--- | 358 | ---help--- |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 5b795059f8fb..17e82df3df74 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -33,7 +33,6 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o | |||
33 | 33 | ||
34 | obj-$(CONFIG_VIODASD) += viodasd.o | 34 | obj-$(CONFIG_VIODASD) += viodasd.o |
35 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o | 35 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o |
36 | obj-$(CONFIG_BLK_DEV_UB) += ub.o | ||
37 | obj-$(CONFIG_BLK_DEV_HD) += hd.o | 36 | obj-$(CONFIG_BLK_DEV_HD) += hd.o |
38 | 37 | ||
39 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o |
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index db195abad698..d2ed7f18d1ac 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
2 | #define VERSION "47" | 2 | #define VERSION "50" |
3 | #define AOE_MAJOR 152 | 3 | #define AOE_MAJOR 152 |
4 | #define DEVICE_NAME "aoe" | 4 | #define DEVICE_NAME "aoe" |
5 | 5 | ||
@@ -10,9 +10,6 @@ | |||
10 | #define AOE_PARTITIONS (16) | 10 | #define AOE_PARTITIONS (16) |
11 | #endif | 11 | #endif |
12 | 12 | ||
13 | #define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * NPERSHELF + (aoeminor)) | ||
14 | #define AOEMAJOR(sysminor) ((sysminor) / NPERSHELF) | ||
15 | #define AOEMINOR(sysminor) ((sysminor) % NPERSHELF) | ||
16 | #define WHITESPACE " \t\v\f\n" | 13 | #define WHITESPACE " \t\v\f\n" |
17 | 14 | ||
18 | enum { | 15 | enum { |
@@ -75,72 +72,67 @@ enum { | |||
75 | DEVFL_UP = 1, /* device is installed in system and ready for AoE->ATA commands */ | 72 | DEVFL_UP = 1, /* device is installed in system and ready for AoE->ATA commands */ |
76 | DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ | 73 | DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ |
77 | DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ | 74 | DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ |
78 | DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */ | 75 | DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */ |
79 | DEVFL_GDALLOC = (1<<4), /* need to alloc gendisk */ | 76 | DEVFL_KICKME = (1<<4), /* slow polling network card catch */ |
80 | DEVFL_KICKME = (1<<5), /* slow polling network card catch */ | 77 | DEVFL_NEWSIZE = (1<<5), /* need to update dev size in block layer */ |
81 | DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ | ||
82 | |||
83 | BUFFL_FAIL = 1, | ||
84 | }; | 78 | }; |
85 | 79 | ||
86 | enum { | 80 | enum { |
87 | DEFAULTBCNT = 2 * 512, /* 2 sectors */ | 81 | DEFAULTBCNT = 2 * 512, /* 2 sectors */ |
88 | NPERSHELF = 16, /* number of slots per shelf address */ | ||
89 | FREETAG = -1, | ||
90 | MIN_BUFS = 16, | 82 | MIN_BUFS = 16, |
91 | NTARGETS = 8, | 83 | NTARGETS = 8, |
92 | NAOEIFS = 8, | 84 | NAOEIFS = 8, |
93 | NSKBPOOLMAX = 128, | 85 | NSKBPOOLMAX = 256, |
86 | NFACTIVE = 61, | ||
94 | 87 | ||
95 | TIMERTICK = HZ / 10, | 88 | TIMERTICK = HZ / 10, |
96 | MINTIMER = HZ >> 2, | 89 | MINTIMER = HZ >> 2, |
97 | MAXTIMER = HZ << 1, | 90 | MAXTIMER = HZ << 1, |
98 | HELPWAIT = 20, | ||
99 | }; | 91 | }; |
100 | 92 | ||
101 | struct buf { | 93 | struct buf { |
102 | struct list_head bufs; | ||
103 | ulong stime; /* for disk stats */ | ||
104 | ulong flags; | ||
105 | ulong nframesout; | 94 | ulong nframesout; |
106 | ulong resid; | 95 | ulong resid; |
107 | ulong bv_resid; | 96 | ulong bv_resid; |
108 | ulong bv_off; | ||
109 | sector_t sector; | 97 | sector_t sector; |
110 | struct bio *bio; | 98 | struct bio *bio; |
111 | struct bio_vec *bv; | 99 | struct bio_vec *bv; |
100 | struct request *rq; | ||
112 | }; | 101 | }; |
113 | 102 | ||
114 | struct frame { | 103 | struct frame { |
115 | int tag; | 104 | struct list_head head; |
105 | u32 tag; | ||
116 | ulong waited; | 106 | ulong waited; |
107 | struct aoetgt *t; /* parent target I belong to */ | ||
108 | sector_t lba; | ||
109 | struct sk_buff *skb; /* command skb freed on module exit */ | ||
110 | struct sk_buff *r_skb; /* response skb for async processing */ | ||
117 | struct buf *buf; | 111 | struct buf *buf; |
118 | char *bufaddr; | 112 | struct bio_vec *bv; |
119 | ulong bcnt; | 113 | ulong bcnt; |
120 | sector_t lba; | 114 | ulong bv_off; |
121 | struct sk_buff *skb; | ||
122 | }; | 115 | }; |
123 | 116 | ||
124 | struct aoeif { | 117 | struct aoeif { |
125 | struct net_device *nd; | 118 | struct net_device *nd; |
126 | unsigned char lost; | 119 | ulong lost; |
127 | unsigned char lostjumbo; | 120 | int bcnt; |
128 | ushort maxbcnt; | ||
129 | }; | 121 | }; |
130 | 122 | ||
131 | struct aoetgt { | 123 | struct aoetgt { |
132 | unsigned char addr[6]; | 124 | unsigned char addr[6]; |
133 | ushort nframes; | 125 | ushort nframes; |
134 | struct frame *frames; | 126 | struct aoedev *d; /* parent device I belong to */ |
127 | struct list_head ffree; /* list of free frames */ | ||
135 | struct aoeif ifs[NAOEIFS]; | 128 | struct aoeif ifs[NAOEIFS]; |
136 | struct aoeif *ifp; /* current aoeif in use */ | 129 | struct aoeif *ifp; /* current aoeif in use */ |
137 | ushort nout; | 130 | ushort nout; |
138 | ushort maxout; | 131 | ushort maxout; |
139 | u16 lasttag; /* last tag sent */ | 132 | ulong falloc; |
140 | u16 useme; | ||
141 | ulong lastwadj; /* last window adjustment */ | 133 | ulong lastwadj; /* last window adjustment */ |
134 | int minbcnt; | ||
142 | int wpkts, rpkts; | 135 | int wpkts, rpkts; |
143 | int dataref; | ||
144 | }; | 136 | }; |
145 | 137 | ||
146 | struct aoedev { | 138 | struct aoedev { |
@@ -153,6 +145,9 @@ struct aoedev { | |||
153 | u16 rttavg; /* round trip average of requests/responses */ | 145 | u16 rttavg; /* round trip average of requests/responses */ |
154 | u16 mintimer; | 146 | u16 mintimer; |
155 | u16 fw_ver; /* version of blade's firmware */ | 147 | u16 fw_ver; /* version of blade's firmware */ |
148 | u16 lasttag; /* last tag sent */ | ||
149 | u16 useme; | ||
150 | ulong ref; | ||
156 | struct work_struct work;/* disk create work struct */ | 151 | struct work_struct work;/* disk create work struct */ |
157 | struct gendisk *gd; | 152 | struct gendisk *gd; |
158 | struct request_queue *blkq; | 153 | struct request_queue *blkq; |
@@ -160,16 +155,31 @@ struct aoedev { | |||
160 | sector_t ssize; | 155 | sector_t ssize; |
161 | struct timer_list timer; | 156 | struct timer_list timer; |
162 | spinlock_t lock; | 157 | spinlock_t lock; |
163 | struct sk_buff_head sendq; | ||
164 | struct sk_buff_head skbpool; | 158 | struct sk_buff_head skbpool; |
165 | mempool_t *bufpool; /* for deadlock-free Buf allocation */ | 159 | mempool_t *bufpool; /* for deadlock-free Buf allocation */ |
166 | struct list_head bufq; /* queue of bios to work on */ | 160 | struct { /* pointers to work in progress */ |
167 | struct buf *inprocess; /* the one we're currently working on */ | 161 | struct buf *buf; |
162 | struct bio *nxbio; | ||
163 | struct request *rq; | ||
164 | } ip; | ||
165 | ulong maxbcnt; | ||
166 | struct list_head factive[NFACTIVE]; /* hash of active frames */ | ||
168 | struct aoetgt *targets[NTARGETS]; | 167 | struct aoetgt *targets[NTARGETS]; |
169 | struct aoetgt **tgt; /* target in use when working */ | 168 | struct aoetgt **tgt; /* target in use when working */ |
170 | struct aoetgt **htgt; /* target needing rexmit assistance */ | 169 | struct aoetgt *htgt; /* target needing rexmit assistance */ |
170 | ulong ntargets; | ||
171 | ulong kicked; | ||
171 | }; | 172 | }; |
172 | 173 | ||
174 | /* kthread tracking */ | ||
175 | struct ktstate { | ||
176 | struct completion rendez; | ||
177 | struct task_struct *task; | ||
178 | wait_queue_head_t *waitq; | ||
179 | int (*fn) (void); | ||
180 | char *name; | ||
181 | spinlock_t *lock; | ||
182 | }; | ||
173 | 183 | ||
174 | int aoeblk_init(void); | 184 | int aoeblk_init(void); |
175 | void aoeblk_exit(void); | 185 | void aoeblk_exit(void); |
@@ -182,22 +192,29 @@ void aoechr_error(char *); | |||
182 | 192 | ||
183 | void aoecmd_work(struct aoedev *d); | 193 | void aoecmd_work(struct aoedev *d); |
184 | void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); | 194 | void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); |
185 | void aoecmd_ata_rsp(struct sk_buff *); | 195 | struct sk_buff *aoecmd_ata_rsp(struct sk_buff *); |
186 | void aoecmd_cfg_rsp(struct sk_buff *); | 196 | void aoecmd_cfg_rsp(struct sk_buff *); |
187 | void aoecmd_sleepwork(struct work_struct *); | 197 | void aoecmd_sleepwork(struct work_struct *); |
188 | void aoecmd_cleanslate(struct aoedev *); | 198 | void aoecmd_cleanslate(struct aoedev *); |
199 | void aoecmd_exit(void); | ||
200 | int aoecmd_init(void); | ||
189 | struct sk_buff *aoecmd_ata_id(struct aoedev *); | 201 | struct sk_buff *aoecmd_ata_id(struct aoedev *); |
202 | void aoe_freetframe(struct frame *); | ||
203 | void aoe_flush_iocq(void); | ||
204 | void aoe_end_request(struct aoedev *, struct request *, int); | ||
205 | int aoe_ktstart(struct ktstate *k); | ||
206 | void aoe_ktstop(struct ktstate *k); | ||
190 | 207 | ||
191 | int aoedev_init(void); | 208 | int aoedev_init(void); |
192 | void aoedev_exit(void); | 209 | void aoedev_exit(void); |
193 | struct aoedev *aoedev_by_aoeaddr(int maj, int min); | 210 | struct aoedev *aoedev_by_aoeaddr(ulong maj, int min, int do_alloc); |
194 | struct aoedev *aoedev_by_sysminor_m(ulong sysminor); | ||
195 | void aoedev_downdev(struct aoedev *d); | 211 | void aoedev_downdev(struct aoedev *d); |
196 | int aoedev_flush(const char __user *str, size_t size); | 212 | int aoedev_flush(const char __user *str, size_t size); |
213 | void aoe_failbuf(struct aoedev *, struct buf *); | ||
214 | void aoedev_put(struct aoedev *); | ||
197 | 215 | ||
198 | int aoenet_init(void); | 216 | int aoenet_init(void); |
199 | void aoenet_exit(void); | 217 | void aoenet_exit(void); |
200 | void aoenet_xmit(struct sk_buff_head *); | 218 | void aoenet_xmit(struct sk_buff_head *); |
201 | int is_aoe_netif(struct net_device *ifp); | 219 | int is_aoe_netif(struct net_device *ifp); |
202 | int set_aoe_iflist(const char __user *str, size_t size); | 220 | int set_aoe_iflist(const char __user *str, size_t size); |
203 | |||
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 321de7b6c442..00dfc5008ad4 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* | 2 | /* |
3 | * aoeblk.c | 3 | * aoeblk.c |
4 | * block device routines | 4 | * block device routines |
@@ -161,68 +161,22 @@ aoeblk_release(struct gendisk *disk, fmode_t mode) | |||
161 | } | 161 | } |
162 | 162 | ||
163 | static void | 163 | static void |
164 | aoeblk_make_request(struct request_queue *q, struct bio *bio) | 164 | aoeblk_request(struct request_queue *q) |
165 | { | 165 | { |
166 | struct sk_buff_head queue; | ||
167 | struct aoedev *d; | 166 | struct aoedev *d; |
168 | struct buf *buf; | 167 | struct request *rq; |
169 | ulong flags; | ||
170 | |||
171 | blk_queue_bounce(q, &bio); | ||
172 | |||
173 | if (bio == NULL) { | ||
174 | printk(KERN_ERR "aoe: bio is NULL\n"); | ||
175 | BUG(); | ||
176 | return; | ||
177 | } | ||
178 | d = bio->bi_bdev->bd_disk->private_data; | ||
179 | if (d == NULL) { | ||
180 | printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n"); | ||
181 | BUG(); | ||
182 | bio_endio(bio, -ENXIO); | ||
183 | return; | ||
184 | } else if (bio->bi_io_vec == NULL) { | ||
185 | printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); | ||
186 | BUG(); | ||
187 | bio_endio(bio, -ENXIO); | ||
188 | return; | ||
189 | } | ||
190 | buf = mempool_alloc(d->bufpool, GFP_NOIO); | ||
191 | if (buf == NULL) { | ||
192 | printk(KERN_INFO "aoe: buf allocation failure\n"); | ||
193 | bio_endio(bio, -ENOMEM); | ||
194 | return; | ||
195 | } | ||
196 | memset(buf, 0, sizeof(*buf)); | ||
197 | INIT_LIST_HEAD(&buf->bufs); | ||
198 | buf->stime = jiffies; | ||
199 | buf->bio = bio; | ||
200 | buf->resid = bio->bi_size; | ||
201 | buf->sector = bio->bi_sector; | ||
202 | buf->bv = &bio->bi_io_vec[bio->bi_idx]; | ||
203 | buf->bv_resid = buf->bv->bv_len; | ||
204 | WARN_ON(buf->bv_resid == 0); | ||
205 | buf->bv_off = buf->bv->bv_offset; | ||
206 | |||
207 | spin_lock_irqsave(&d->lock, flags); | ||
208 | 168 | ||
169 | d = q->queuedata; | ||
209 | if ((d->flags & DEVFL_UP) == 0) { | 170 | if ((d->flags & DEVFL_UP) == 0) { |
210 | pr_info_ratelimited("aoe: device %ld.%d is not up\n", | 171 | pr_info_ratelimited("aoe: device %ld.%d is not up\n", |
211 | d->aoemajor, d->aoeminor); | 172 | d->aoemajor, d->aoeminor); |
212 | spin_unlock_irqrestore(&d->lock, flags); | 173 | while ((rq = blk_peek_request(q))) { |
213 | mempool_free(buf, d->bufpool); | 174 | blk_start_request(rq); |
214 | bio_endio(bio, -ENXIO); | 175 | aoe_end_request(d, rq, 1); |
176 | } | ||
215 | return; | 177 | return; |
216 | } | 178 | } |
217 | |||
218 | list_add_tail(&buf->bufs, &d->bufq); | ||
219 | |||
220 | aoecmd_work(d); | 179 | aoecmd_work(d); |
221 | __skb_queue_head_init(&queue); | ||
222 | skb_queue_splice_init(&d->sendq, &queue); | ||
223 | |||
224 | spin_unlock_irqrestore(&d->lock, flags); | ||
225 | aoenet_xmit(&queue); | ||
226 | } | 180 | } |
227 | 181 | ||
228 | static int | 182 | static int |
@@ -254,41 +208,54 @@ aoeblk_gdalloc(void *vp) | |||
254 | { | 208 | { |
255 | struct aoedev *d = vp; | 209 | struct aoedev *d = vp; |
256 | struct gendisk *gd; | 210 | struct gendisk *gd; |
211 | mempool_t *mp; | ||
212 | struct request_queue *q; | ||
213 | enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; | ||
257 | ulong flags; | 214 | ulong flags; |
258 | 215 | ||
259 | gd = alloc_disk(AOE_PARTITIONS); | 216 | gd = alloc_disk(AOE_PARTITIONS); |
260 | if (gd == NULL) { | 217 | if (gd == NULL) { |
261 | printk(KERN_ERR | 218 | pr_err("aoe: cannot allocate disk structure for %ld.%d\n", |
262 | "aoe: cannot allocate disk structure for %ld.%d\n", | ||
263 | d->aoemajor, d->aoeminor); | 219 | d->aoemajor, d->aoeminor); |
264 | goto err; | 220 | goto err; |
265 | } | 221 | } |
266 | 222 | ||
267 | d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache); | 223 | mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab, |
268 | if (d->bufpool == NULL) { | 224 | buf_pool_cache); |
225 | if (mp == NULL) { | ||
269 | printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", | 226 | printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", |
270 | d->aoemajor, d->aoeminor); | 227 | d->aoemajor, d->aoeminor); |
271 | goto err_disk; | 228 | goto err_disk; |
272 | } | 229 | } |
230 | q = blk_init_queue(aoeblk_request, &d->lock); | ||
231 | if (q == NULL) { | ||
232 | pr_err("aoe: cannot allocate block queue for %ld.%d\n", | ||
233 | d->aoemajor, d->aoeminor); | ||
234 | mempool_destroy(mp); | ||
235 | goto err_disk; | ||
236 | } | ||
273 | 237 | ||
274 | d->blkq = blk_alloc_queue(GFP_KERNEL); | 238 | d->blkq = blk_alloc_queue(GFP_KERNEL); |
275 | if (!d->blkq) | 239 | if (!d->blkq) |
276 | goto err_mempool; | 240 | goto err_mempool; |
277 | blk_queue_make_request(d->blkq, aoeblk_make_request); | ||
278 | d->blkq->backing_dev_info.name = "aoe"; | 241 | d->blkq->backing_dev_info.name = "aoe"; |
279 | if (bdi_init(&d->blkq->backing_dev_info)) | 242 | if (bdi_init(&d->blkq->backing_dev_info)) |
280 | goto err_blkq; | 243 | goto err_blkq; |
281 | spin_lock_irqsave(&d->lock, flags); | 244 | spin_lock_irqsave(&d->lock, flags); |
245 | blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); | ||
246 | q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; | ||
247 | d->bufpool = mp; | ||
248 | d->blkq = gd->queue = q; | ||
249 | q->queuedata = d; | ||
250 | d->gd = gd; | ||
282 | gd->major = AOE_MAJOR; | 251 | gd->major = AOE_MAJOR; |
283 | gd->first_minor = d->sysminor * AOE_PARTITIONS; | 252 | gd->first_minor = d->sysminor; |
284 | gd->fops = &aoe_bdops; | 253 | gd->fops = &aoe_bdops; |
285 | gd->private_data = d; | 254 | gd->private_data = d; |
286 | set_capacity(gd, d->ssize); | 255 | set_capacity(gd, d->ssize); |
287 | snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", | 256 | snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", |
288 | d->aoemajor, d->aoeminor); | 257 | d->aoemajor, d->aoeminor); |
289 | 258 | ||
290 | gd->queue = d->blkq; | ||
291 | d->gd = gd; | ||
292 | d->flags &= ~DEVFL_GDALLOC; | 259 | d->flags &= ~DEVFL_GDALLOC; |
293 | d->flags |= DEVFL_UP; | 260 | d->flags |= DEVFL_UP; |
294 | 261 | ||
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index e86d2062a164..ed57a890c643 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* | 2 | /* |
3 | * aoechr.c | 3 | * aoechr.c |
4 | * AoE character device driver | 4 | * AoE character device driver |
@@ -86,34 +86,34 @@ revalidate(const char __user *str, size_t size) | |||
86 | if (copy_from_user(buf, str, size)) | 86 | if (copy_from_user(buf, str, size)) |
87 | return -EFAULT; | 87 | return -EFAULT; |
88 | 88 | ||
89 | /* should be e%d.%d format */ | ||
90 | n = sscanf(buf, "e%d.%d", &major, &minor); | 89 | n = sscanf(buf, "e%d.%d", &major, &minor); |
91 | if (n != 2) { | 90 | if (n != 2) { |
92 | printk(KERN_ERR "aoe: invalid device specification\n"); | 91 | pr_err("aoe: invalid device specification %s\n", buf); |
93 | return -EINVAL; | 92 | return -EINVAL; |
94 | } | 93 | } |
95 | d = aoedev_by_aoeaddr(major, minor); | 94 | d = aoedev_by_aoeaddr(major, minor, 0); |
96 | if (!d) | 95 | if (!d) |
97 | return -EINVAL; | 96 | return -EINVAL; |
98 | spin_lock_irqsave(&d->lock, flags); | 97 | spin_lock_irqsave(&d->lock, flags); |
99 | aoecmd_cleanslate(d); | 98 | aoecmd_cleanslate(d); |
99 | aoecmd_cfg(major, minor); | ||
100 | loop: | 100 | loop: |
101 | skb = aoecmd_ata_id(d); | 101 | skb = aoecmd_ata_id(d); |
102 | spin_unlock_irqrestore(&d->lock, flags); | 102 | spin_unlock_irqrestore(&d->lock, flags); |
103 | /* try again if we are able to sleep a bit, | 103 | /* try again if we are able to sleep a bit, |
104 | * otherwise give up this revalidation | 104 | * otherwise give up this revalidation |
105 | */ | 105 | */ |
106 | if (!skb && !msleep_interruptible(200)) { | 106 | if (!skb && !msleep_interruptible(250)) { |
107 | spin_lock_irqsave(&d->lock, flags); | 107 | spin_lock_irqsave(&d->lock, flags); |
108 | goto loop; | 108 | goto loop; |
109 | } | 109 | } |
110 | aoedev_put(d); | ||
110 | if (skb) { | 111 | if (skb) { |
111 | struct sk_buff_head queue; | 112 | struct sk_buff_head queue; |
112 | __skb_queue_head_init(&queue); | 113 | __skb_queue_head_init(&queue); |
113 | __skb_queue_tail(&queue, skb); | 114 | __skb_queue_tail(&queue, skb); |
114 | aoenet_xmit(&queue); | 115 | aoenet_xmit(&queue); |
115 | } | 116 | } |
116 | aoecmd_cfg(major, minor); | ||
117 | return 0; | 117 | return 0; |
118 | } | 118 | } |
119 | 119 | ||
@@ -174,6 +174,7 @@ aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp | |||
174 | break; | 174 | break; |
175 | case MINOR_FLUSH: | 175 | case MINOR_FLUSH: |
176 | ret = aoedev_flush(buf, cnt); | 176 | ret = aoedev_flush(buf, cnt); |
177 | break; | ||
177 | } | 178 | } |
178 | if (ret == 0) | 179 | if (ret == 0) |
179 | ret = cnt; | 180 | ret = cnt; |
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index de0435e63b02..3804a0af3ef1 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* | 2 | /* |
3 | * aoecmd.c | 3 | * aoecmd.c |
4 | * Filesystem request handling methods | 4 | * Filesystem request handling methods |
@@ -12,10 +12,19 @@ | |||
12 | #include <linux/netdevice.h> | 12 | #include <linux/netdevice.h> |
13 | #include <linux/genhd.h> | 13 | #include <linux/genhd.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/workqueue.h> | ||
16 | #include <linux/kthread.h> | ||
15 | #include <net/net_namespace.h> | 17 | #include <net/net_namespace.h> |
16 | #include <asm/unaligned.h> | 18 | #include <asm/unaligned.h> |
19 | #include <linux/uio.h> | ||
17 | #include "aoe.h" | 20 | #include "aoe.h" |
18 | 21 | ||
22 | #define MAXIOC (8192) /* default meant to avoid most soft lockups */ | ||
23 | |||
24 | static void ktcomplete(struct frame *, struct sk_buff *); | ||
25 | |||
26 | static struct buf *nextbuf(struct aoedev *); | ||
27 | |||
19 | static int aoe_deadsecs = 60 * 3; | 28 | static int aoe_deadsecs = 60 * 3; |
20 | module_param(aoe_deadsecs, int, 0644); | 29 | module_param(aoe_deadsecs, int, 0644); |
21 | MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); | 30 | MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); |
@@ -25,6 +34,15 @@ module_param(aoe_maxout, int, 0644); | |||
25 | MODULE_PARM_DESC(aoe_maxout, | 34 | MODULE_PARM_DESC(aoe_maxout, |
26 | "Only aoe_maxout outstanding packets for every MAC on eX.Y."); | 35 | "Only aoe_maxout outstanding packets for every MAC on eX.Y."); |
27 | 36 | ||
37 | static wait_queue_head_t ktiowq; | ||
38 | static struct ktstate kts; | ||
39 | |||
40 | /* io completion queue */ | ||
41 | static struct { | ||
42 | struct list_head head; | ||
43 | spinlock_t lock; | ||
44 | } iocq; | ||
45 | |||
28 | static struct sk_buff * | 46 | static struct sk_buff * |
29 | new_skb(ulong len) | 47 | new_skb(ulong len) |
30 | { | 48 | { |
@@ -35,20 +53,27 @@ new_skb(ulong len) | |||
35 | skb_reset_mac_header(skb); | 53 | skb_reset_mac_header(skb); |
36 | skb_reset_network_header(skb); | 54 | skb_reset_network_header(skb); |
37 | skb->protocol = __constant_htons(ETH_P_AOE); | 55 | skb->protocol = __constant_htons(ETH_P_AOE); |
56 | skb_checksum_none_assert(skb); | ||
38 | } | 57 | } |
39 | return skb; | 58 | return skb; |
40 | } | 59 | } |
41 | 60 | ||
42 | static struct frame * | 61 | static struct frame * |
43 | getframe(struct aoetgt *t, int tag) | 62 | getframe(struct aoedev *d, u32 tag) |
44 | { | 63 | { |
45 | struct frame *f, *e; | 64 | struct frame *f; |
65 | struct list_head *head, *pos, *nx; | ||
66 | u32 n; | ||
46 | 67 | ||
47 | f = t->frames; | 68 | n = tag % NFACTIVE; |
48 | e = f + t->nframes; | 69 | head = &d->factive[n]; |
49 | for (; f<e; f++) | 70 | list_for_each_safe(pos, nx, head) { |
50 | if (f->tag == tag) | 71 | f = list_entry(pos, struct frame, head); |
72 | if (f->tag == tag) { | ||
73 | list_del(pos); | ||
51 | return f; | 74 | return f; |
75 | } | ||
76 | } | ||
52 | return NULL; | 77 | return NULL; |
53 | } | 78 | } |
54 | 79 | ||
@@ -58,18 +83,18 @@ getframe(struct aoetgt *t, int tag) | |||
58 | * This driver reserves tag -1 to mean "unused frame." | 83 | * This driver reserves tag -1 to mean "unused frame." |
59 | */ | 84 | */ |
60 | static int | 85 | static int |
61 | newtag(struct aoetgt *t) | 86 | newtag(struct aoedev *d) |
62 | { | 87 | { |
63 | register ulong n; | 88 | register ulong n; |
64 | 89 | ||
65 | n = jiffies & 0xffff; | 90 | n = jiffies & 0xffff; |
66 | return n |= (++t->lasttag & 0x7fff) << 16; | 91 | return n |= (++d->lasttag & 0x7fff) << 16; |
67 | } | 92 | } |
68 | 93 | ||
69 | static int | 94 | static u32 |
70 | aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h) | 95 | aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h) |
71 | { | 96 | { |
72 | u32 host_tag = newtag(t); | 97 | u32 host_tag = newtag(d); |
73 | 98 | ||
74 | memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); | 99 | memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); |
75 | memcpy(h->dst, t->addr, sizeof h->dst); | 100 | memcpy(h->dst, t->addr, sizeof h->dst); |
@@ -94,16 +119,18 @@ put_lba(struct aoe_atahdr *ah, sector_t lba) | |||
94 | ah->lba5 = lba >>= 8; | 119 | ah->lba5 = lba >>= 8; |
95 | } | 120 | } |
96 | 121 | ||
97 | static void | 122 | static struct aoeif * |
98 | ifrotate(struct aoetgt *t) | 123 | ifrotate(struct aoetgt *t) |
99 | { | 124 | { |
100 | t->ifp++; | 125 | struct aoeif *ifp; |
101 | if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL) | 126 | |
102 | t->ifp = t->ifs; | 127 | ifp = t->ifp; |
103 | if (t->ifp->nd == NULL) { | 128 | ifp++; |
104 | printk(KERN_INFO "aoe: no interface to rotate to\n"); | 129 | if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL) |
105 | BUG(); | 130 | ifp = t->ifs; |
106 | } | 131 | if (ifp->nd == NULL) |
132 | return NULL; | ||
133 | return t->ifp = ifp; | ||
107 | } | 134 | } |
108 | 135 | ||
109 | static void | 136 | static void |
@@ -128,78 +155,128 @@ skb_pool_get(struct aoedev *d) | |||
128 | return NULL; | 155 | return NULL; |
129 | } | 156 | } |
130 | 157 | ||
131 | /* freeframe is where we do our load balancing so it's a little hairy. */ | 158 | void |
159 | aoe_freetframe(struct frame *f) | ||
160 | { | ||
161 | struct aoetgt *t; | ||
162 | |||
163 | t = f->t; | ||
164 | f->buf = NULL; | ||
165 | f->bv = NULL; | ||
166 | f->r_skb = NULL; | ||
167 | list_add(&f->head, &t->ffree); | ||
168 | } | ||
169 | |||
132 | static struct frame * | 170 | static struct frame * |
133 | freeframe(struct aoedev *d) | 171 | newtframe(struct aoedev *d, struct aoetgt *t) |
134 | { | 172 | { |
135 | struct frame *f, *e, *rf; | 173 | struct frame *f; |
136 | struct aoetgt **t; | ||
137 | struct sk_buff *skb; | 174 | struct sk_buff *skb; |
175 | struct list_head *pos; | ||
176 | |||
177 | if (list_empty(&t->ffree)) { | ||
178 | if (t->falloc >= NSKBPOOLMAX*2) | ||
179 | return NULL; | ||
180 | f = kcalloc(1, sizeof(*f), GFP_ATOMIC); | ||
181 | if (f == NULL) | ||
182 | return NULL; | ||
183 | t->falloc++; | ||
184 | f->t = t; | ||
185 | } else { | ||
186 | pos = t->ffree.next; | ||
187 | list_del(pos); | ||
188 | f = list_entry(pos, struct frame, head); | ||
189 | } | ||
190 | |||
191 | skb = f->skb; | ||
192 | if (skb == NULL) { | ||
193 | f->skb = skb = new_skb(ETH_ZLEN); | ||
194 | if (!skb) { | ||
195 | bail: aoe_freetframe(f); | ||
196 | return NULL; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | if (atomic_read(&skb_shinfo(skb)->dataref) != 1) { | ||
201 | skb = skb_pool_get(d); | ||
202 | if (skb == NULL) | ||
203 | goto bail; | ||
204 | skb_pool_put(d, f->skb); | ||
205 | f->skb = skb; | ||
206 | } | ||
207 | |||
208 | skb->truesize -= skb->data_len; | ||
209 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; | ||
210 | skb_trim(skb, 0); | ||
211 | return f; | ||
212 | } | ||
213 | |||
214 | static struct frame * | ||
215 | newframe(struct aoedev *d) | ||
216 | { | ||
217 | struct frame *f; | ||
218 | struct aoetgt *t, **tt; | ||
219 | int totout = 0; | ||
138 | 220 | ||
139 | if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ | 221 | if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ |
140 | printk(KERN_ERR "aoe: NULL TARGETS!\n"); | 222 | printk(KERN_ERR "aoe: NULL TARGETS!\n"); |
141 | return NULL; | 223 | return NULL; |
142 | } | 224 | } |
143 | t = d->tgt; | 225 | tt = d->tgt; /* last used target */ |
144 | t++; | ||
145 | if (t >= &d->targets[NTARGETS] || !*t) | ||
146 | t = d->targets; | ||
147 | for (;;) { | 226 | for (;;) { |
148 | if ((*t)->nout < (*t)->maxout | 227 | tt++; |
228 | if (tt >= &d->targets[NTARGETS] || !*tt) | ||
229 | tt = d->targets; | ||
230 | t = *tt; | ||
231 | totout += t->nout; | ||
232 | if (t->nout < t->maxout | ||
149 | && t != d->htgt | 233 | && t != d->htgt |
150 | && (*t)->ifp->nd) { | 234 | && t->ifp->nd) { |
151 | rf = NULL; | 235 | f = newtframe(d, t); |
152 | f = (*t)->frames; | 236 | if (f) { |
153 | e = f + (*t)->nframes; | 237 | ifrotate(t); |
154 | for (; f < e; f++) { | 238 | d->tgt = tt; |
155 | if (f->tag != FREETAG) | ||
156 | continue; | ||
157 | skb = f->skb; | ||
158 | if (!skb | ||
159 | && !(f->skb = skb = new_skb(ETH_ZLEN))) | ||
160 | continue; | ||
161 | if (atomic_read(&skb_shinfo(skb)->dataref) | ||
162 | != 1) { | ||
163 | if (!rf) | ||
164 | rf = f; | ||
165 | continue; | ||
166 | } | ||
167 | gotone: skb_shinfo(skb)->nr_frags = skb->data_len = 0; | ||
168 | skb_trim(skb, 0); | ||
169 | d->tgt = t; | ||
170 | ifrotate(*t); | ||
171 | return f; | 239 | return f; |
172 | } | 240 | } |
173 | /* Work can be done, but the network layer is | ||
174 | holding our precious packets. Try to grab | ||
175 | one from the pool. */ | ||
176 | f = rf; | ||
177 | if (f == NULL) { /* more paranoia */ | ||
178 | printk(KERN_ERR | ||
179 | "aoe: freeframe: %s.\n", | ||
180 | "unexpected null rf"); | ||
181 | d->flags |= DEVFL_KICKME; | ||
182 | return NULL; | ||
183 | } | ||
184 | skb = skb_pool_get(d); | ||
185 | if (skb) { | ||
186 | skb_pool_put(d, f->skb); | ||
187 | f->skb = skb; | ||
188 | goto gotone; | ||
189 | } | ||
190 | (*t)->dataref++; | ||
191 | if ((*t)->nout == 0) | ||
192 | d->flags |= DEVFL_KICKME; | ||
193 | } | 241 | } |
194 | if (t == d->tgt) /* we've looped and found nada */ | 242 | if (tt == d->tgt) /* we've looped and found nada */ |
195 | break; | 243 | break; |
196 | t++; | 244 | } |
197 | if (t >= &d->targets[NTARGETS] || !*t) | 245 | if (totout == 0) { |
198 | t = d->targets; | 246 | d->kicked++; |
247 | d->flags |= DEVFL_KICKME; | ||
199 | } | 248 | } |
200 | return NULL; | 249 | return NULL; |
201 | } | 250 | } |
202 | 251 | ||
252 | static void | ||
253 | skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt) | ||
254 | { | ||
255 | int frag = 0; | ||
256 | ulong fcnt; | ||
257 | loop: | ||
258 | fcnt = bv->bv_len - (off - bv->bv_offset); | ||
259 | if (fcnt > cnt) | ||
260 | fcnt = cnt; | ||
261 | skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt); | ||
262 | cnt -= fcnt; | ||
263 | if (cnt <= 0) | ||
264 | return; | ||
265 | bv++; | ||
266 | off = bv->bv_offset; | ||
267 | goto loop; | ||
268 | } | ||
269 | |||
270 | static void | ||
271 | fhash(struct frame *f) | ||
272 | { | ||
273 | struct aoedev *d = f->t->d; | ||
274 | u32 n; | ||
275 | |||
276 | n = f->tag % NFACTIVE; | ||
277 | list_add_tail(&f->head, &d->factive[n]); | ||
278 | } | ||
279 | |||
203 | static int | 280 | static int |
204 | aoecmd_ata_rw(struct aoedev *d) | 281 | aoecmd_ata_rw(struct aoedev *d) |
205 | { | 282 | { |
@@ -207,26 +284,47 @@ aoecmd_ata_rw(struct aoedev *d) | |||
207 | struct aoe_hdr *h; | 284 | struct aoe_hdr *h; |
208 | struct aoe_atahdr *ah; | 285 | struct aoe_atahdr *ah; |
209 | struct buf *buf; | 286 | struct buf *buf; |
210 | struct bio_vec *bv; | ||
211 | struct aoetgt *t; | 287 | struct aoetgt *t; |
212 | struct sk_buff *skb; | 288 | struct sk_buff *skb; |
213 | ulong bcnt; | 289 | struct sk_buff_head queue; |
290 | ulong bcnt, fbcnt; | ||
214 | char writebit, extbit; | 291 | char writebit, extbit; |
215 | 292 | ||
216 | writebit = 0x10; | 293 | writebit = 0x10; |
217 | extbit = 0x4; | 294 | extbit = 0x4; |
218 | 295 | ||
219 | f = freeframe(d); | 296 | buf = nextbuf(d); |
297 | if (buf == NULL) | ||
298 | return 0; | ||
299 | f = newframe(d); | ||
220 | if (f == NULL) | 300 | if (f == NULL) |
221 | return 0; | 301 | return 0; |
222 | t = *d->tgt; | 302 | t = *d->tgt; |
223 | buf = d->inprocess; | 303 | bcnt = d->maxbcnt; |
224 | bv = buf->bv; | ||
225 | bcnt = t->ifp->maxbcnt; | ||
226 | if (bcnt == 0) | 304 | if (bcnt == 0) |
227 | bcnt = DEFAULTBCNT; | 305 | bcnt = DEFAULTBCNT; |
228 | if (bcnt > buf->bv_resid) | 306 | if (bcnt > buf->resid) |
229 | bcnt = buf->bv_resid; | 307 | bcnt = buf->resid; |
308 | fbcnt = bcnt; | ||
309 | f->bv = buf->bv; | ||
310 | f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid); | ||
311 | do { | ||
312 | if (fbcnt < buf->bv_resid) { | ||
313 | buf->bv_resid -= fbcnt; | ||
314 | buf->resid -= fbcnt; | ||
315 | break; | ||
316 | } | ||
317 | fbcnt -= buf->bv_resid; | ||
318 | buf->resid -= buf->bv_resid; | ||
319 | if (buf->resid == 0) { | ||
320 | d->ip.buf = NULL; | ||
321 | break; | ||
322 | } | ||
323 | buf->bv++; | ||
324 | buf->bv_resid = buf->bv->bv_len; | ||
325 | WARN_ON(buf->bv_resid == 0); | ||
326 | } while (fbcnt); | ||
327 | |||
230 | /* initialize the headers & frame */ | 328 | /* initialize the headers & frame */ |
231 | skb = f->skb; | 329 | skb = f->skb; |
232 | h = (struct aoe_hdr *) skb_mac_header(skb); | 330 | h = (struct aoe_hdr *) skb_mac_header(skb); |
@@ -234,10 +332,10 @@ aoecmd_ata_rw(struct aoedev *d) | |||
234 | skb_put(skb, sizeof *h + sizeof *ah); | 332 | skb_put(skb, sizeof *h + sizeof *ah); |
235 | memset(h, 0, skb->len); | 333 | memset(h, 0, skb->len); |
236 | f->tag = aoehdr_atainit(d, t, h); | 334 | f->tag = aoehdr_atainit(d, t, h); |
335 | fhash(f); | ||
237 | t->nout++; | 336 | t->nout++; |
238 | f->waited = 0; | 337 | f->waited = 0; |
239 | f->buf = buf; | 338 | f->buf = buf; |
240 | f->bufaddr = page_address(bv->bv_page) + buf->bv_off; | ||
241 | f->bcnt = bcnt; | 339 | f->bcnt = bcnt; |
242 | f->lba = buf->sector; | 340 | f->lba = buf->sector; |
243 | 341 | ||
@@ -252,10 +350,11 @@ aoecmd_ata_rw(struct aoedev *d) | |||
252 | ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ | 350 | ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ |
253 | } | 351 | } |
254 | if (bio_data_dir(buf->bio) == WRITE) { | 352 | if (bio_data_dir(buf->bio) == WRITE) { |
255 | skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt); | 353 | skb_fillup(skb, f->bv, f->bv_off, bcnt); |
256 | ah->aflags |= AOEAFL_WRITE; | 354 | ah->aflags |= AOEAFL_WRITE; |
257 | skb->len += bcnt; | 355 | skb->len += bcnt; |
258 | skb->data_len = bcnt; | 356 | skb->data_len = bcnt; |
357 | skb->truesize += bcnt; | ||
259 | t->wpkts++; | 358 | t->wpkts++; |
260 | } else { | 359 | } else { |
261 | t->rpkts++; | 360 | t->rpkts++; |
@@ -266,23 +365,15 @@ aoecmd_ata_rw(struct aoedev *d) | |||
266 | 365 | ||
267 | /* mark all tracking fields and load out */ | 366 | /* mark all tracking fields and load out */ |
268 | buf->nframesout += 1; | 367 | buf->nframesout += 1; |
269 | buf->bv_off += bcnt; | ||
270 | buf->bv_resid -= bcnt; | ||
271 | buf->resid -= bcnt; | ||
272 | buf->sector += bcnt >> 9; | 368 | buf->sector += bcnt >> 9; |
273 | if (buf->resid == 0) { | ||
274 | d->inprocess = NULL; | ||
275 | } else if (buf->bv_resid == 0) { | ||
276 | buf->bv = ++bv; | ||
277 | buf->bv_resid = bv->bv_len; | ||
278 | WARN_ON(buf->bv_resid == 0); | ||
279 | buf->bv_off = bv->bv_offset; | ||
280 | } | ||
281 | 369 | ||
282 | skb->dev = t->ifp->nd; | 370 | skb->dev = t->ifp->nd; |
283 | skb = skb_clone(skb, GFP_ATOMIC); | 371 | skb = skb_clone(skb, GFP_ATOMIC); |
284 | if (skb) | 372 | if (skb) { |
285 | __skb_queue_tail(&d->sendq, skb); | 373 | __skb_queue_head_init(&queue); |
374 | __skb_queue_tail(&queue, skb); | ||
375 | aoenet_xmit(&queue); | ||
376 | } | ||
286 | return 1; | 377 | return 1; |
287 | } | 378 | } |
288 | 379 | ||
@@ -329,17 +420,25 @@ cont: | |||
329 | } | 420 | } |
330 | 421 | ||
331 | static void | 422 | static void |
332 | resend(struct aoedev *d, struct aoetgt *t, struct frame *f) | 423 | resend(struct aoedev *d, struct frame *f) |
333 | { | 424 | { |
334 | struct sk_buff *skb; | 425 | struct sk_buff *skb; |
426 | struct sk_buff_head queue; | ||
335 | struct aoe_hdr *h; | 427 | struct aoe_hdr *h; |
336 | struct aoe_atahdr *ah; | 428 | struct aoe_atahdr *ah; |
429 | struct aoetgt *t; | ||
337 | char buf[128]; | 430 | char buf[128]; |
338 | u32 n; | 431 | u32 n; |
339 | 432 | ||
340 | ifrotate(t); | 433 | t = f->t; |
341 | n = newtag(t); | 434 | n = newtag(d); |
342 | skb = f->skb; | 435 | skb = f->skb; |
436 | if (ifrotate(t) == NULL) { | ||
437 | /* probably can't happen, but set it up to fail anyway */ | ||
438 | pr_info("aoe: resend: no interfaces to rotate to.\n"); | ||
439 | ktcomplete(f, NULL); | ||
440 | return; | ||
441 | } | ||
343 | h = (struct aoe_hdr *) skb_mac_header(skb); | 442 | h = (struct aoe_hdr *) skb_mac_header(skb); |
344 | ah = (struct aoe_atahdr *) (h+1); | 443 | ah = (struct aoe_atahdr *) (h+1); |
345 | 444 | ||
@@ -350,39 +449,22 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f) | |||
350 | aoechr_error(buf); | 449 | aoechr_error(buf); |
351 | 450 | ||
352 | f->tag = n; | 451 | f->tag = n; |
452 | fhash(f); | ||
353 | h->tag = cpu_to_be32(n); | 453 | h->tag = cpu_to_be32(n); |
354 | memcpy(h->dst, t->addr, sizeof h->dst); | 454 | memcpy(h->dst, t->addr, sizeof h->dst); |
355 | memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); | 455 | memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); |
356 | 456 | ||
357 | switch (ah->cmdstat) { | ||
358 | default: | ||
359 | break; | ||
360 | case ATA_CMD_PIO_READ: | ||
361 | case ATA_CMD_PIO_READ_EXT: | ||
362 | case ATA_CMD_PIO_WRITE: | ||
363 | case ATA_CMD_PIO_WRITE_EXT: | ||
364 | put_lba(ah, f->lba); | ||
365 | |||
366 | n = f->bcnt; | ||
367 | if (n > DEFAULTBCNT) | ||
368 | n = DEFAULTBCNT; | ||
369 | ah->scnt = n >> 9; | ||
370 | if (ah->aflags & AOEAFL_WRITE) { | ||
371 | skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), | ||
372 | offset_in_page(f->bufaddr), n); | ||
373 | skb->len = sizeof *h + sizeof *ah + n; | ||
374 | skb->data_len = n; | ||
375 | } | ||
376 | } | ||
377 | skb->dev = t->ifp->nd; | 457 | skb->dev = t->ifp->nd; |
378 | skb = skb_clone(skb, GFP_ATOMIC); | 458 | skb = skb_clone(skb, GFP_ATOMIC); |
379 | if (skb == NULL) | 459 | if (skb == NULL) |
380 | return; | 460 | return; |
381 | __skb_queue_tail(&d->sendq, skb); | 461 | __skb_queue_head_init(&queue); |
462 | __skb_queue_tail(&queue, skb); | ||
463 | aoenet_xmit(&queue); | ||
382 | } | 464 | } |
383 | 465 | ||
384 | static int | 466 | static int |
385 | tsince(int tag) | 467 | tsince(u32 tag) |
386 | { | 468 | { |
387 | int n; | 469 | int n; |
388 | 470 | ||
@@ -406,58 +488,65 @@ getif(struct aoetgt *t, struct net_device *nd) | |||
406 | return NULL; | 488 | return NULL; |
407 | } | 489 | } |
408 | 490 | ||
409 | static struct aoeif * | ||
410 | addif(struct aoetgt *t, struct net_device *nd) | ||
411 | { | ||
412 | struct aoeif *p; | ||
413 | |||
414 | p = getif(t, NULL); | ||
415 | if (!p) | ||
416 | return NULL; | ||
417 | p->nd = nd; | ||
418 | p->maxbcnt = DEFAULTBCNT; | ||
419 | p->lost = 0; | ||
420 | p->lostjumbo = 0; | ||
421 | return p; | ||
422 | } | ||
423 | |||
424 | static void | 491 | static void |
425 | ejectif(struct aoetgt *t, struct aoeif *ifp) | 492 | ejectif(struct aoetgt *t, struct aoeif *ifp) |
426 | { | 493 | { |
427 | struct aoeif *e; | 494 | struct aoeif *e; |
495 | struct net_device *nd; | ||
428 | ulong n; | 496 | ulong n; |
429 | 497 | ||
498 | nd = ifp->nd; | ||
430 | e = t->ifs + NAOEIFS - 1; | 499 | e = t->ifs + NAOEIFS - 1; |
431 | n = (e - ifp) * sizeof *ifp; | 500 | n = (e - ifp) * sizeof *ifp; |
432 | memmove(ifp, ifp+1, n); | 501 | memmove(ifp, ifp+1, n); |
433 | e->nd = NULL; | 502 | e->nd = NULL; |
503 | dev_put(nd); | ||
434 | } | 504 | } |
435 | 505 | ||
436 | static int | 506 | static int |
437 | sthtith(struct aoedev *d) | 507 | sthtith(struct aoedev *d) |
438 | { | 508 | { |
439 | struct frame *f, *e, *nf; | 509 | struct frame *f, *nf; |
510 | struct list_head *nx, *pos, *head; | ||
440 | struct sk_buff *skb; | 511 | struct sk_buff *skb; |
441 | struct aoetgt *ht = *d->htgt; | 512 | struct aoetgt *ht = d->htgt; |
442 | 513 | int i; | |
443 | f = ht->frames; | 514 | |
444 | e = f + ht->nframes; | 515 | for (i = 0; i < NFACTIVE; i++) { |
445 | for (; f < e; f++) { | 516 | head = &d->factive[i]; |
446 | if (f->tag == FREETAG) | 517 | list_for_each_safe(pos, nx, head) { |
447 | continue; | 518 | f = list_entry(pos, struct frame, head); |
448 | nf = freeframe(d); | 519 | if (f->t != ht) |
449 | if (!nf) | 520 | continue; |
450 | return 0; | 521 | |
451 | skb = nf->skb; | 522 | nf = newframe(d); |
452 | *nf = *f; | 523 | if (!nf) |
453 | f->skb = skb; | 524 | return 0; |
454 | f->tag = FREETAG; | 525 | |
455 | nf->waited = 0; | 526 | /* remove frame from active list */ |
456 | ht->nout--; | 527 | list_del(pos); |
457 | (*d->tgt)->nout++; | 528 | |
458 | resend(d, *d->tgt, nf); | 529 | /* reassign all pertinent bits to new outbound frame */ |
530 | skb = nf->skb; | ||
531 | nf->skb = f->skb; | ||
532 | nf->buf = f->buf; | ||
533 | nf->bcnt = f->bcnt; | ||
534 | nf->lba = f->lba; | ||
535 | nf->bv = f->bv; | ||
536 | nf->bv_off = f->bv_off; | ||
537 | nf->waited = 0; | ||
538 | f->skb = skb; | ||
539 | aoe_freetframe(f); | ||
540 | ht->nout--; | ||
541 | nf->t->nout++; | ||
542 | resend(d, nf); | ||
543 | } | ||
459 | } | 544 | } |
460 | /* he's clean, he's useless. take away his interfaces */ | 545 | /* We've cleaned up the outstanding so take away his |
546 | * interfaces so he won't be used. We should remove him from | ||
547 | * the target array here, but cleaning up a target is | ||
548 | * involved. PUNT! | ||
549 | */ | ||
461 | memset(ht->ifs, 0, sizeof ht->ifs); | 550 | memset(ht->ifs, 0, sizeof ht->ifs); |
462 | d->htgt = NULL; | 551 | d->htgt = NULL; |
463 | return 1; | 552 | return 1; |
@@ -476,13 +565,15 @@ ata_scnt(unsigned char *packet) { | |||
476 | static void | 565 | static void |
477 | rexmit_timer(ulong vp) | 566 | rexmit_timer(ulong vp) |
478 | { | 567 | { |
479 | struct sk_buff_head queue; | ||
480 | struct aoedev *d; | 568 | struct aoedev *d; |
481 | struct aoetgt *t, **tt, **te; | 569 | struct aoetgt *t, **tt, **te; |
482 | struct aoeif *ifp; | 570 | struct aoeif *ifp; |
483 | struct frame *f, *e; | 571 | struct frame *f; |
572 | struct list_head *head, *pos, *nx; | ||
573 | LIST_HEAD(flist); | ||
484 | register long timeout; | 574 | register long timeout; |
485 | ulong flags, n; | 575 | ulong flags, n; |
576 | int i; | ||
486 | 577 | ||
487 | d = (struct aoedev *) vp; | 578 | d = (struct aoedev *) vp; |
488 | 579 | ||
@@ -496,58 +587,22 @@ rexmit_timer(ulong vp) | |||
496 | spin_unlock_irqrestore(&d->lock, flags); | 587 | spin_unlock_irqrestore(&d->lock, flags); |
497 | return; | 588 | return; |
498 | } | 589 | } |
499 | tt = d->targets; | ||
500 | te = tt + NTARGETS; | ||
501 | for (; tt < te && *tt; tt++) { | ||
502 | t = *tt; | ||
503 | f = t->frames; | ||
504 | e = f + t->nframes; | ||
505 | for (; f < e; f++) { | ||
506 | if (f->tag == FREETAG | ||
507 | || tsince(f->tag) < timeout) | ||
508 | continue; | ||
509 | n = f->waited += timeout; | ||
510 | n /= HZ; | ||
511 | if (n > aoe_deadsecs) { | ||
512 | /* waited too long. device failure. */ | ||
513 | aoedev_downdev(d); | ||
514 | break; | ||
515 | } | ||
516 | |||
517 | if (n > HELPWAIT /* see if another target can help */ | ||
518 | && (tt != d->targets || d->targets[1])) | ||
519 | d->htgt = tt; | ||
520 | |||
521 | if (t->nout == t->maxout) { | ||
522 | if (t->maxout > 1) | ||
523 | t->maxout--; | ||
524 | t->lastwadj = jiffies; | ||
525 | } | ||
526 | |||
527 | ifp = getif(t, f->skb->dev); | ||
528 | if (ifp && ++ifp->lost > (t->nframes << 1) | ||
529 | && (ifp != t->ifs || t->ifs[1].nd)) { | ||
530 | ejectif(t, ifp); | ||
531 | ifp = NULL; | ||
532 | } | ||
533 | 590 | ||
534 | if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512 | 591 | /* collect all frames to rexmit into flist */ |
535 | && ifp && ++ifp->lostjumbo > (t->nframes << 1) | 592 | for (i = 0; i < NFACTIVE; i++) { |
536 | && ifp->maxbcnt != DEFAULTBCNT) { | 593 | head = &d->factive[i]; |
537 | printk(KERN_INFO | 594 | list_for_each_safe(pos, nx, head) { |
538 | "aoe: e%ld.%d: " | 595 | f = list_entry(pos, struct frame, head); |
539 | "too many lost jumbo on " | 596 | if (tsince(f->tag) < timeout) |
540 | "%s:%pm - " | 597 | break; /* end of expired frames */ |
541 | "falling back to %d frames.\n", | 598 | /* move to flist for later processing */ |
542 | d->aoemajor, d->aoeminor, | 599 | list_move_tail(pos, &flist); |
543 | ifp->nd->name, t->addr, | ||
544 | DEFAULTBCNT); | ||
545 | ifp->maxbcnt = 0; | ||
546 | } | ||
547 | resend(d, t, f); | ||
548 | } | 600 | } |
549 | 601 | } | |
550 | /* window check */ | 602 | /* window check */ |
603 | tt = d->targets; | ||
604 | te = tt + d->ntargets; | ||
605 | for (; tt < te && (t = *tt); tt++) { | ||
551 | if (t->nout == t->maxout | 606 | if (t->nout == t->maxout |
552 | && t->maxout < t->nframes | 607 | && t->maxout < t->nframes |
553 | && (jiffies - t->lastwadj)/HZ > 10) { | 608 | && (jiffies - t->lastwadj)/HZ > 10) { |
@@ -556,45 +611,173 @@ rexmit_timer(ulong vp) | |||
556 | } | 611 | } |
557 | } | 612 | } |
558 | 613 | ||
559 | if (!skb_queue_empty(&d->sendq)) { | 614 | if (!list_empty(&flist)) { /* retransmissions necessary */ |
560 | n = d->rttavg <<= 1; | 615 | n = d->rttavg <<= 1; |
561 | if (n > MAXTIMER) | 616 | if (n > MAXTIMER) |
562 | d->rttavg = MAXTIMER; | 617 | d->rttavg = MAXTIMER; |
563 | } | 618 | } |
564 | 619 | ||
565 | if (d->flags & DEVFL_KICKME || d->htgt) { | 620 | /* process expired frames */ |
566 | d->flags &= ~DEVFL_KICKME; | 621 | while (!list_empty(&flist)) { |
567 | aoecmd_work(d); | 622 | pos = flist.next; |
623 | f = list_entry(pos, struct frame, head); | ||
624 | n = f->waited += timeout; | ||
625 | n /= HZ; | ||
626 | if (n > aoe_deadsecs) { | ||
627 | /* Waited too long. Device failure. | ||
628 | * Hang all frames on first hash bucket for downdev | ||
629 | * to clean up. | ||
630 | */ | ||
631 | list_splice(&flist, &d->factive[0]); | ||
632 | aoedev_downdev(d); | ||
633 | break; | ||
634 | } | ||
635 | list_del(pos); | ||
636 | |||
637 | t = f->t; | ||
638 | if (n > aoe_deadsecs/2) | ||
639 | d->htgt = t; /* see if another target can help */ | ||
640 | |||
641 | if (t->nout == t->maxout) { | ||
642 | if (t->maxout > 1) | ||
643 | t->maxout--; | ||
644 | t->lastwadj = jiffies; | ||
645 | } | ||
646 | |||
647 | ifp = getif(t, f->skb->dev); | ||
648 | if (ifp && ++ifp->lost > (t->nframes << 1) | ||
649 | && (ifp != t->ifs || t->ifs[1].nd)) { | ||
650 | ejectif(t, ifp); | ||
651 | ifp = NULL; | ||
652 | } | ||
653 | resend(d, f); | ||
568 | } | 654 | } |
569 | 655 | ||
570 | __skb_queue_head_init(&queue); | 656 | if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) { |
571 | skb_queue_splice_init(&d->sendq, &queue); | 657 | d->flags &= ~DEVFL_KICKME; |
658 | d->blkq->request_fn(d->blkq); | ||
659 | } | ||
572 | 660 | ||
573 | d->timer.expires = jiffies + TIMERTICK; | 661 | d->timer.expires = jiffies + TIMERTICK; |
574 | add_timer(&d->timer); | 662 | add_timer(&d->timer); |
575 | 663 | ||
576 | spin_unlock_irqrestore(&d->lock, flags); | 664 | spin_unlock_irqrestore(&d->lock, flags); |
665 | } | ||
577 | 666 | ||
578 | aoenet_xmit(&queue); | 667 | static unsigned long |
668 | rqbiocnt(struct request *r) | ||
669 | { | ||
670 | struct bio *bio; | ||
671 | unsigned long n = 0; | ||
672 | |||
673 | __rq_for_each_bio(bio, r) | ||
674 | n++; | ||
675 | return n; | ||
676 | } | ||
677 | |||
678 | /* This can be removed if we are certain that no users of the block | ||
679 | * layer will ever use zero-count pages in bios. Otherwise we have to | ||
680 | * protect against the put_page sometimes done by the network layer. | ||
681 | * | ||
682 | * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for | ||
683 | * discussion. | ||
684 | * | ||
685 | * We cannot use get_page in the workaround, because it insists on a | ||
686 | * positive page count as a precondition. So we use _count directly. | ||
687 | */ | ||
688 | static void | ||
689 | bio_pageinc(struct bio *bio) | ||
690 | { | ||
691 | struct bio_vec *bv; | ||
692 | struct page *page; | ||
693 | int i; | ||
694 | |||
695 | bio_for_each_segment(bv, bio, i) { | ||
696 | page = bv->bv_page; | ||
697 | /* Non-zero page count for non-head members of | ||
698 | * compound pages is no longer allowed by the kernel, | ||
699 | * but this has never been seen here. | ||
700 | */ | ||
701 | if (unlikely(PageCompound(page))) | ||
702 | if (compound_trans_head(page) != page) { | ||
703 | pr_crit("page tail used for block I/O\n"); | ||
704 | BUG(); | ||
705 | } | ||
706 | atomic_inc(&page->_count); | ||
707 | } | ||
708 | } | ||
709 | |||
710 | static void | ||
711 | bio_pagedec(struct bio *bio) | ||
712 | { | ||
713 | struct bio_vec *bv; | ||
714 | int i; | ||
715 | |||
716 | bio_for_each_segment(bv, bio, i) | ||
717 | atomic_dec(&bv->bv_page->_count); | ||
718 | } | ||
719 | |||
720 | static void | ||
721 | bufinit(struct buf *buf, struct request *rq, struct bio *bio) | ||
722 | { | ||
723 | struct bio_vec *bv; | ||
724 | |||
725 | memset(buf, 0, sizeof(*buf)); | ||
726 | buf->rq = rq; | ||
727 | buf->bio = bio; | ||
728 | buf->resid = bio->bi_size; | ||
729 | buf->sector = bio->bi_sector; | ||
730 | bio_pageinc(bio); | ||
731 | buf->bv = bv = &bio->bi_io_vec[bio->bi_idx]; | ||
732 | buf->bv_resid = bv->bv_len; | ||
733 | WARN_ON(buf->bv_resid == 0); | ||
734 | } | ||
735 | |||
736 | static struct buf * | ||
737 | nextbuf(struct aoedev *d) | ||
738 | { | ||
739 | struct request *rq; | ||
740 | struct request_queue *q; | ||
741 | struct buf *buf; | ||
742 | struct bio *bio; | ||
743 | |||
744 | q = d->blkq; | ||
745 | if (q == NULL) | ||
746 | return NULL; /* initializing */ | ||
747 | if (d->ip.buf) | ||
748 | return d->ip.buf; | ||
749 | rq = d->ip.rq; | ||
750 | if (rq == NULL) { | ||
751 | rq = blk_peek_request(q); | ||
752 | if (rq == NULL) | ||
753 | return NULL; | ||
754 | blk_start_request(rq); | ||
755 | d->ip.rq = rq; | ||
756 | d->ip.nxbio = rq->bio; | ||
757 | rq->special = (void *) rqbiocnt(rq); | ||
758 | } | ||
759 | buf = mempool_alloc(d->bufpool, GFP_ATOMIC); | ||
760 | if (buf == NULL) { | ||
761 | pr_err("aoe: nextbuf: unable to mempool_alloc!\n"); | ||
762 | return NULL; | ||
763 | } | ||
764 | bio = d->ip.nxbio; | ||
765 | bufinit(buf, rq, bio); | ||
766 | bio = bio->bi_next; | ||
767 | d->ip.nxbio = bio; | ||
768 | if (bio == NULL) | ||
769 | d->ip.rq = NULL; | ||
770 | return d->ip.buf = buf; | ||
579 | } | 771 | } |
580 | 772 | ||
581 | /* enters with d->lock held */ | 773 | /* enters with d->lock held */ |
582 | void | 774 | void |
583 | aoecmd_work(struct aoedev *d) | 775 | aoecmd_work(struct aoedev *d) |
584 | { | 776 | { |
585 | struct buf *buf; | ||
586 | loop: | ||
587 | if (d->htgt && !sthtith(d)) | 777 | if (d->htgt && !sthtith(d)) |
588 | return; | 778 | return; |
589 | if (d->inprocess == NULL) { | 779 | while (aoecmd_ata_rw(d)) |
590 | if (list_empty(&d->bufq)) | 780 | ; |
591 | return; | ||
592 | buf = container_of(d->bufq.next, struct buf, bufs); | ||
593 | list_del(d->bufq.next); | ||
594 | d->inprocess = buf; | ||
595 | } | ||
596 | if (aoecmd_ata_rw(d)) | ||
597 | goto loop; | ||
598 | } | 781 | } |
599 | 782 | ||
600 | /* this function performs work that has been deferred until sleeping is OK | 783 | /* this function performs work that has been deferred until sleeping is OK |
@@ -603,28 +786,25 @@ void | |||
603 | aoecmd_sleepwork(struct work_struct *work) | 786 | aoecmd_sleepwork(struct work_struct *work) |
604 | { | 787 | { |
605 | struct aoedev *d = container_of(work, struct aoedev, work); | 788 | struct aoedev *d = container_of(work, struct aoedev, work); |
789 | struct block_device *bd; | ||
790 | u64 ssize; | ||
606 | 791 | ||
607 | if (d->flags & DEVFL_GDALLOC) | 792 | if (d->flags & DEVFL_GDALLOC) |
608 | aoeblk_gdalloc(d); | 793 | aoeblk_gdalloc(d); |
609 | 794 | ||
610 | if (d->flags & DEVFL_NEWSIZE) { | 795 | if (d->flags & DEVFL_NEWSIZE) { |
611 | struct block_device *bd; | ||
612 | unsigned long flags; | ||
613 | u64 ssize; | ||
614 | |||
615 | ssize = get_capacity(d->gd); | 796 | ssize = get_capacity(d->gd); |
616 | bd = bdget_disk(d->gd, 0); | 797 | bd = bdget_disk(d->gd, 0); |
617 | |||
618 | if (bd) { | 798 | if (bd) { |
619 | mutex_lock(&bd->bd_inode->i_mutex); | 799 | mutex_lock(&bd->bd_inode->i_mutex); |
620 | i_size_write(bd->bd_inode, (loff_t)ssize<<9); | 800 | i_size_write(bd->bd_inode, (loff_t)ssize<<9); |
621 | mutex_unlock(&bd->bd_inode->i_mutex); | 801 | mutex_unlock(&bd->bd_inode->i_mutex); |
622 | bdput(bd); | 802 | bdput(bd); |
623 | } | 803 | } |
624 | spin_lock_irqsave(&d->lock, flags); | 804 | spin_lock_irq(&d->lock); |
625 | d->flags |= DEVFL_UP; | 805 | d->flags |= DEVFL_UP; |
626 | d->flags &= ~DEVFL_NEWSIZE; | 806 | d->flags &= ~DEVFL_NEWSIZE; |
627 | spin_unlock_irqrestore(&d->lock, flags); | 807 | spin_unlock_irq(&d->lock); |
628 | } | 808 | } |
629 | } | 809 | } |
630 | 810 | ||
@@ -717,163 +897,299 @@ gettgt(struct aoedev *d, char *addr) | |||
717 | return NULL; | 897 | return NULL; |
718 | } | 898 | } |
719 | 899 | ||
720 | static inline void | 900 | static void |
721 | diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector) | 901 | bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt) |
902 | { | ||
903 | ulong fcnt; | ||
904 | char *p; | ||
905 | int soff = 0; | ||
906 | loop: | ||
907 | fcnt = bv->bv_len - (off - bv->bv_offset); | ||
908 | if (fcnt > cnt) | ||
909 | fcnt = cnt; | ||
910 | p = page_address(bv->bv_page) + off; | ||
911 | skb_copy_bits(skb, soff, p, fcnt); | ||
912 | soff += fcnt; | ||
913 | cnt -= fcnt; | ||
914 | if (cnt <= 0) | ||
915 | return; | ||
916 | bv++; | ||
917 | off = bv->bv_offset; | ||
918 | goto loop; | ||
919 | } | ||
920 | |||
921 | void | ||
922 | aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) | ||
923 | { | ||
924 | struct bio *bio; | ||
925 | int bok; | ||
926 | struct request_queue *q; | ||
927 | |||
928 | q = d->blkq; | ||
929 | if (rq == d->ip.rq) | ||
930 | d->ip.rq = NULL; | ||
931 | do { | ||
932 | bio = rq->bio; | ||
933 | bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
934 | } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size)); | ||
935 | |||
936 | /* cf. http://lkml.org/lkml/2006/10/31/28 */ | ||
937 | if (!fastfail) | ||
938 | q->request_fn(q); | ||
939 | } | ||
940 | |||
941 | static void | ||
942 | aoe_end_buf(struct aoedev *d, struct buf *buf) | ||
943 | { | ||
944 | struct request *rq; | ||
945 | unsigned long n; | ||
946 | |||
947 | if (buf == d->ip.buf) | ||
948 | d->ip.buf = NULL; | ||
949 | rq = buf->rq; | ||
950 | bio_pagedec(buf->bio); | ||
951 | mempool_free(buf, d->bufpool); | ||
952 | n = (unsigned long) rq->special; | ||
953 | rq->special = (void *) --n; | ||
954 | if (n == 0) | ||
955 | aoe_end_request(d, rq, 0); | ||
956 | } | ||
957 | |||
958 | static void | ||
959 | ktiocomplete(struct frame *f) | ||
722 | { | 960 | { |
723 | unsigned long n_sect = bio->bi_size >> 9; | 961 | struct aoe_hdr *hin, *hout; |
724 | const int rw = bio_data_dir(bio); | 962 | struct aoe_atahdr *ahin, *ahout; |
725 | struct hd_struct *part; | 963 | struct buf *buf; |
726 | int cpu; | 964 | struct sk_buff *skb; |
965 | struct aoetgt *t; | ||
966 | struct aoeif *ifp; | ||
967 | struct aoedev *d; | ||
968 | long n; | ||
969 | |||
970 | if (f == NULL) | ||
971 | return; | ||
972 | |||
973 | t = f->t; | ||
974 | d = t->d; | ||
975 | |||
976 | hout = (struct aoe_hdr *) skb_mac_header(f->skb); | ||
977 | ahout = (struct aoe_atahdr *) (hout+1); | ||
978 | buf = f->buf; | ||
979 | skb = f->r_skb; | ||
980 | if (skb == NULL) | ||
981 | goto noskb; /* just fail the buf. */ | ||
982 | |||
983 | hin = (struct aoe_hdr *) skb->data; | ||
984 | skb_pull(skb, sizeof(*hin)); | ||
985 | ahin = (struct aoe_atahdr *) skb->data; | ||
986 | skb_pull(skb, sizeof(*ahin)); | ||
987 | if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ | ||
988 | pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n", | ||
989 | ahout->cmdstat, ahin->cmdstat, | ||
990 | d->aoemajor, d->aoeminor); | ||
991 | noskb: if (buf) | ||
992 | clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); | ||
993 | goto badrsp; | ||
994 | } | ||
727 | 995 | ||
728 | cpu = part_stat_lock(); | 996 | n = ahout->scnt << 9; |
729 | part = disk_map_sector_rcu(disk, sector); | 997 | switch (ahout->cmdstat) { |
998 | case ATA_CMD_PIO_READ: | ||
999 | case ATA_CMD_PIO_READ_EXT: | ||
1000 | if (skb->len < n) { | ||
1001 | pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n", | ||
1002 | skb->len, n); | ||
1003 | clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); | ||
1004 | break; | ||
1005 | } | ||
1006 | bvcpy(f->bv, f->bv_off, skb, n); | ||
1007 | case ATA_CMD_PIO_WRITE: | ||
1008 | case ATA_CMD_PIO_WRITE_EXT: | ||
1009 | spin_lock_irq(&d->lock); | ||
1010 | ifp = getif(t, skb->dev); | ||
1011 | if (ifp) | ||
1012 | ifp->lost = 0; | ||
1013 | if (d->htgt == t) /* I'll help myself, thank you. */ | ||
1014 | d->htgt = NULL; | ||
1015 | spin_unlock_irq(&d->lock); | ||
1016 | break; | ||
1017 | case ATA_CMD_ID_ATA: | ||
1018 | if (skb->len < 512) { | ||
1019 | pr_info("aoe: runt data size in ataid. skb->len=%d\n", | ||
1020 | skb->len); | ||
1021 | break; | ||
1022 | } | ||
1023 | if (skb_linearize(skb)) | ||
1024 | break; | ||
1025 | spin_lock_irq(&d->lock); | ||
1026 | ataid_complete(d, t, skb->data); | ||
1027 | spin_unlock_irq(&d->lock); | ||
1028 | break; | ||
1029 | default: | ||
1030 | pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n", | ||
1031 | ahout->cmdstat, | ||
1032 | be16_to_cpu(get_unaligned(&hin->major)), | ||
1033 | hin->minor); | ||
1034 | } | ||
1035 | badrsp: | ||
1036 | spin_lock_irq(&d->lock); | ||
1037 | |||
1038 | aoe_freetframe(f); | ||
1039 | |||
1040 | if (buf && --buf->nframesout == 0 && buf->resid == 0) | ||
1041 | aoe_end_buf(d, buf); | ||
1042 | |||
1043 | aoecmd_work(d); | ||
1044 | |||
1045 | spin_unlock_irq(&d->lock); | ||
1046 | aoedev_put(d); | ||
1047 | dev_kfree_skb(skb); | ||
1048 | } | ||
1049 | |||
1050 | /* Enters with iocq.lock held. | ||
1051 | * Returns true iff responses needing processing remain. | ||
1052 | */ | ||
1053 | static int | ||
1054 | ktio(void) | ||
1055 | { | ||
1056 | struct frame *f; | ||
1057 | struct list_head *pos; | ||
1058 | int i; | ||
730 | 1059 | ||
731 | part_stat_inc(cpu, part, ios[rw]); | 1060 | for (i = 0; ; ++i) { |
732 | part_stat_add(cpu, part, ticks[rw], duration); | 1061 | if (i == MAXIOC) |
733 | part_stat_add(cpu, part, sectors[rw], n_sect); | 1062 | return 1; |
734 | part_stat_add(cpu, part, io_ticks, duration); | 1063 | if (list_empty(&iocq.head)) |
1064 | return 0; | ||
1065 | pos = iocq.head.next; | ||
1066 | list_del(pos); | ||
1067 | spin_unlock_irq(&iocq.lock); | ||
1068 | f = list_entry(pos, struct frame, head); | ||
1069 | ktiocomplete(f); | ||
1070 | spin_lock_irq(&iocq.lock); | ||
1071 | } | ||
1072 | } | ||
735 | 1073 | ||
736 | part_stat_unlock(); | 1074 | static int |
1075 | kthread(void *vp) | ||
1076 | { | ||
1077 | struct ktstate *k; | ||
1078 | DECLARE_WAITQUEUE(wait, current); | ||
1079 | int more; | ||
1080 | |||
1081 | k = vp; | ||
1082 | current->flags |= PF_NOFREEZE; | ||
1083 | set_user_nice(current, -10); | ||
1084 | complete(&k->rendez); /* tell spawner we're running */ | ||
1085 | do { | ||
1086 | spin_lock_irq(k->lock); | ||
1087 | more = k->fn(); | ||
1088 | if (!more) { | ||
1089 | add_wait_queue(k->waitq, &wait); | ||
1090 | __set_current_state(TASK_INTERRUPTIBLE); | ||
1091 | } | ||
1092 | spin_unlock_irq(k->lock); | ||
1093 | if (!more) { | ||
1094 | schedule(); | ||
1095 | remove_wait_queue(k->waitq, &wait); | ||
1096 | } else | ||
1097 | cond_resched(); | ||
1098 | } while (!kthread_should_stop()); | ||
1099 | complete(&k->rendez); /* tell spawner we're stopping */ | ||
1100 | return 0; | ||
737 | } | 1101 | } |
738 | 1102 | ||
739 | void | 1103 | void |
1104 | aoe_ktstop(struct ktstate *k) | ||
1105 | { | ||
1106 | kthread_stop(k->task); | ||
1107 | wait_for_completion(&k->rendez); | ||
1108 | } | ||
1109 | |||
1110 | int | ||
1111 | aoe_ktstart(struct ktstate *k) | ||
1112 | { | ||
1113 | struct task_struct *task; | ||
1114 | |||
1115 | init_completion(&k->rendez); | ||
1116 | task = kthread_run(kthread, k, k->name); | ||
1117 | if (task == NULL || IS_ERR(task)) | ||
1118 | return -ENOMEM; | ||
1119 | k->task = task; | ||
1120 | wait_for_completion(&k->rendez); /* allow kthread to start */ | ||
1121 | init_completion(&k->rendez); /* for waiting for exit later */ | ||
1122 | return 0; | ||
1123 | } | ||
1124 | |||
1125 | /* pass it off to kthreads for processing */ | ||
1126 | static void | ||
1127 | ktcomplete(struct frame *f, struct sk_buff *skb) | ||
1128 | { | ||
1129 | ulong flags; | ||
1130 | |||
1131 | f->r_skb = skb; | ||
1132 | spin_lock_irqsave(&iocq.lock, flags); | ||
1133 | list_add_tail(&f->head, &iocq.head); | ||
1134 | spin_unlock_irqrestore(&iocq.lock, flags); | ||
1135 | wake_up(&ktiowq); | ||
1136 | } | ||
1137 | |||
1138 | struct sk_buff * | ||
740 | aoecmd_ata_rsp(struct sk_buff *skb) | 1139 | aoecmd_ata_rsp(struct sk_buff *skb) |
741 | { | 1140 | { |
742 | struct sk_buff_head queue; | ||
743 | struct aoedev *d; | 1141 | struct aoedev *d; |
744 | struct aoe_hdr *hin, *hout; | 1142 | struct aoe_hdr *h; |
745 | struct aoe_atahdr *ahin, *ahout; | ||
746 | struct frame *f; | 1143 | struct frame *f; |
747 | struct buf *buf; | ||
748 | struct aoetgt *t; | 1144 | struct aoetgt *t; |
749 | struct aoeif *ifp; | 1145 | u32 n; |
750 | register long n; | ||
751 | ulong flags; | 1146 | ulong flags; |
752 | char ebuf[128]; | 1147 | char ebuf[128]; |
753 | u16 aoemajor; | 1148 | u16 aoemajor; |
754 | 1149 | ||
755 | hin = (struct aoe_hdr *) skb_mac_header(skb); | 1150 | h = (struct aoe_hdr *) skb->data; |
756 | aoemajor = get_unaligned_be16(&hin->major); | 1151 | aoemajor = be16_to_cpu(get_unaligned(&h->major)); |
757 | d = aoedev_by_aoeaddr(aoemajor, hin->minor); | 1152 | d = aoedev_by_aoeaddr(aoemajor, h->minor, 0); |
758 | if (d == NULL) { | 1153 | if (d == NULL) { |
759 | snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " | 1154 | snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " |
760 | "for unknown device %d.%d\n", | 1155 | "for unknown device %d.%d\n", |
761 | aoemajor, hin->minor); | 1156 | aoemajor, h->minor); |
762 | aoechr_error(ebuf); | 1157 | aoechr_error(ebuf); |
763 | return; | 1158 | return skb; |
764 | } | 1159 | } |
765 | 1160 | ||
766 | spin_lock_irqsave(&d->lock, flags); | 1161 | spin_lock_irqsave(&d->lock, flags); |
767 | 1162 | ||
768 | n = get_unaligned_be32(&hin->tag); | 1163 | n = be32_to_cpu(get_unaligned(&h->tag)); |
769 | t = gettgt(d, hin->src); | 1164 | f = getframe(d, n); |
770 | if (t == NULL) { | ||
771 | printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n", | ||
772 | d->aoemajor, d->aoeminor, hin->src); | ||
773 | spin_unlock_irqrestore(&d->lock, flags); | ||
774 | return; | ||
775 | } | ||
776 | f = getframe(t, n); | ||
777 | if (f == NULL) { | 1165 | if (f == NULL) { |
778 | calc_rttavg(d, -tsince(n)); | 1166 | calc_rttavg(d, -tsince(n)); |
779 | spin_unlock_irqrestore(&d->lock, flags); | 1167 | spin_unlock_irqrestore(&d->lock, flags); |
1168 | aoedev_put(d); | ||
780 | snprintf(ebuf, sizeof ebuf, | 1169 | snprintf(ebuf, sizeof ebuf, |
781 | "%15s e%d.%d tag=%08x@%08lx\n", | 1170 | "%15s e%d.%d tag=%08x@%08lx\n", |
782 | "unexpected rsp", | 1171 | "unexpected rsp", |
783 | get_unaligned_be16(&hin->major), | 1172 | get_unaligned_be16(&h->major), |
784 | hin->minor, | 1173 | h->minor, |
785 | get_unaligned_be32(&hin->tag), | 1174 | get_unaligned_be32(&h->tag), |
786 | jiffies); | 1175 | jiffies); |
787 | aoechr_error(ebuf); | 1176 | aoechr_error(ebuf); |
788 | return; | 1177 | return skb; |
789 | } | 1178 | } |
790 | 1179 | t = f->t; | |
791 | calc_rttavg(d, tsince(f->tag)); | 1180 | calc_rttavg(d, tsince(f->tag)); |
792 | |||
793 | ahin = (struct aoe_atahdr *) (hin+1); | ||
794 | hout = (struct aoe_hdr *) skb_mac_header(f->skb); | ||
795 | ahout = (struct aoe_atahdr *) (hout+1); | ||
796 | buf = f->buf; | ||
797 | |||
798 | if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ | ||
799 | printk(KERN_ERR | ||
800 | "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n", | ||
801 | ahout->cmdstat, ahin->cmdstat, | ||
802 | d->aoemajor, d->aoeminor); | ||
803 | if (buf) | ||
804 | buf->flags |= BUFFL_FAIL; | ||
805 | } else { | ||
806 | if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */ | ||
807 | d->htgt = NULL; | ||
808 | n = ahout->scnt << 9; | ||
809 | switch (ahout->cmdstat) { | ||
810 | case ATA_CMD_PIO_READ: | ||
811 | case ATA_CMD_PIO_READ_EXT: | ||
812 | if (skb->len - sizeof *hin - sizeof *ahin < n) { | ||
813 | printk(KERN_ERR | ||
814 | "aoe: %s. skb->len=%d need=%ld\n", | ||
815 | "runt data size in read", skb->len, n); | ||
816 | /* fail frame f? just returning will rexmit. */ | ||
817 | spin_unlock_irqrestore(&d->lock, flags); | ||
818 | return; | ||
819 | } | ||
820 | memcpy(f->bufaddr, ahin+1, n); | ||
821 | case ATA_CMD_PIO_WRITE: | ||
822 | case ATA_CMD_PIO_WRITE_EXT: | ||
823 | ifp = getif(t, skb->dev); | ||
824 | if (ifp) { | ||
825 | ifp->lost = 0; | ||
826 | if (n > DEFAULTBCNT) | ||
827 | ifp->lostjumbo = 0; | ||
828 | } | ||
829 | if (f->bcnt -= n) { | ||
830 | f->lba += n >> 9; | ||
831 | f->bufaddr += n; | ||
832 | resend(d, t, f); | ||
833 | goto xmit; | ||
834 | } | ||
835 | break; | ||
836 | case ATA_CMD_ID_ATA: | ||
837 | if (skb->len - sizeof *hin - sizeof *ahin < 512) { | ||
838 | printk(KERN_INFO | ||
839 | "aoe: runt data size in ataid. skb->len=%d\n", | ||
840 | skb->len); | ||
841 | spin_unlock_irqrestore(&d->lock, flags); | ||
842 | return; | ||
843 | } | ||
844 | ataid_complete(d, t, (char *) (ahin+1)); | ||
845 | break; | ||
846 | default: | ||
847 | printk(KERN_INFO | ||
848 | "aoe: unrecognized ata command %2.2Xh for %d.%d\n", | ||
849 | ahout->cmdstat, | ||
850 | get_unaligned_be16(&hin->major), | ||
851 | hin->minor); | ||
852 | } | ||
853 | } | ||
854 | |||
855 | if (buf && --buf->nframesout == 0 && buf->resid == 0) { | ||
856 | diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector); | ||
857 | if (buf->flags & BUFFL_FAIL) | ||
858 | bio_endio(buf->bio, -EIO); | ||
859 | else { | ||
860 | bio_flush_dcache_pages(buf->bio); | ||
861 | bio_endio(buf->bio, 0); | ||
862 | } | ||
863 | mempool_free(buf, d->bufpool); | ||
864 | } | ||
865 | |||
866 | f->buf = NULL; | ||
867 | f->tag = FREETAG; | ||
868 | t->nout--; | 1181 | t->nout--; |
869 | |||
870 | aoecmd_work(d); | 1182 | aoecmd_work(d); |
871 | xmit: | ||
872 | __skb_queue_head_init(&queue); | ||
873 | skb_queue_splice_init(&d->sendq, &queue); | ||
874 | 1183 | ||
875 | spin_unlock_irqrestore(&d->lock, flags); | 1184 | spin_unlock_irqrestore(&d->lock, flags); |
876 | aoenet_xmit(&queue); | 1185 | |
1186 | ktcomplete(f, skb); | ||
1187 | |||
1188 | /* | ||
1189 | * Note here that we do not perform an aoedev_put, as we are | ||
1190 | * leaving this reference for the ktio to release. | ||
1191 | */ | ||
1192 | return NULL; | ||
877 | } | 1193 | } |
878 | 1194 | ||
879 | void | 1195 | void |
@@ -895,7 +1211,7 @@ aoecmd_ata_id(struct aoedev *d) | |||
895 | struct sk_buff *skb; | 1211 | struct sk_buff *skb; |
896 | struct aoetgt *t; | 1212 | struct aoetgt *t; |
897 | 1213 | ||
898 | f = freeframe(d); | 1214 | f = newframe(d); |
899 | if (f == NULL) | 1215 | if (f == NULL) |
900 | return NULL; | 1216 | return NULL; |
901 | 1217 | ||
@@ -908,6 +1224,7 @@ aoecmd_ata_id(struct aoedev *d) | |||
908 | skb_put(skb, sizeof *h + sizeof *ah); | 1224 | skb_put(skb, sizeof *h + sizeof *ah); |
909 | memset(h, 0, skb->len); | 1225 | memset(h, 0, skb->len); |
910 | f->tag = aoehdr_atainit(d, t, h); | 1226 | f->tag = aoehdr_atainit(d, t, h); |
1227 | fhash(f); | ||
911 | t->nout++; | 1228 | t->nout++; |
912 | f->waited = 0; | 1229 | f->waited = 0; |
913 | 1230 | ||
@@ -928,7 +1245,6 @@ static struct aoetgt * | |||
928 | addtgt(struct aoedev *d, char *addr, ulong nframes) | 1245 | addtgt(struct aoedev *d, char *addr, ulong nframes) |
929 | { | 1246 | { |
930 | struct aoetgt *t, **tt, **te; | 1247 | struct aoetgt *t, **tt, **te; |
931 | struct frame *f, *e; | ||
932 | 1248 | ||
933 | tt = d->targets; | 1249 | tt = d->targets; |
934 | te = tt + NTARGETS; | 1250 | te = tt + NTARGETS; |
@@ -940,26 +1256,73 @@ addtgt(struct aoedev *d, char *addr, ulong nframes) | |||
940 | "aoe: device addtgt failure; too many targets\n"); | 1256 | "aoe: device addtgt failure; too many targets\n"); |
941 | return NULL; | 1257 | return NULL; |
942 | } | 1258 | } |
943 | t = kcalloc(1, sizeof *t, GFP_ATOMIC); | 1259 | t = kzalloc(sizeof(*t), GFP_ATOMIC); |
944 | f = kcalloc(nframes, sizeof *f, GFP_ATOMIC); | 1260 | if (!t) { |
945 | if (!t || !f) { | ||
946 | kfree(f); | ||
947 | kfree(t); | ||
948 | printk(KERN_INFO "aoe: cannot allocate memory to add target\n"); | 1261 | printk(KERN_INFO "aoe: cannot allocate memory to add target\n"); |
949 | return NULL; | 1262 | return NULL; |
950 | } | 1263 | } |
951 | 1264 | ||
1265 | d->ntargets++; | ||
952 | t->nframes = nframes; | 1266 | t->nframes = nframes; |
953 | t->frames = f; | 1267 | t->d = d; |
954 | e = f + nframes; | ||
955 | for (; f < e; f++) | ||
956 | f->tag = FREETAG; | ||
957 | memcpy(t->addr, addr, sizeof t->addr); | 1268 | memcpy(t->addr, addr, sizeof t->addr); |
958 | t->ifp = t->ifs; | 1269 | t->ifp = t->ifs; |
959 | t->maxout = t->nframes; | 1270 | t->maxout = t->nframes; |
1271 | INIT_LIST_HEAD(&t->ffree); | ||
960 | return *tt = t; | 1272 | return *tt = t; |
961 | } | 1273 | } |
962 | 1274 | ||
1275 | static void | ||
1276 | setdbcnt(struct aoedev *d) | ||
1277 | { | ||
1278 | struct aoetgt **t, **e; | ||
1279 | int bcnt = 0; | ||
1280 | |||
1281 | t = d->targets; | ||
1282 | e = t + NTARGETS; | ||
1283 | for (; t < e && *t; t++) | ||
1284 | if (bcnt == 0 || bcnt > (*t)->minbcnt) | ||
1285 | bcnt = (*t)->minbcnt; | ||
1286 | if (bcnt != d->maxbcnt) { | ||
1287 | d->maxbcnt = bcnt; | ||
1288 | pr_info("aoe: e%ld.%d: setting %d byte data frames\n", | ||
1289 | d->aoemajor, d->aoeminor, bcnt); | ||
1290 | } | ||
1291 | } | ||
1292 | |||
1293 | static void | ||
1294 | setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt) | ||
1295 | { | ||
1296 | struct aoedev *d; | ||
1297 | struct aoeif *p, *e; | ||
1298 | int minbcnt; | ||
1299 | |||
1300 | d = t->d; | ||
1301 | minbcnt = bcnt; | ||
1302 | p = t->ifs; | ||
1303 | e = p + NAOEIFS; | ||
1304 | for (; p < e; p++) { | ||
1305 | if (p->nd == NULL) | ||
1306 | break; /* end of the valid interfaces */ | ||
1307 | if (p->nd == nd) { | ||
1308 | p->bcnt = bcnt; /* we're updating */ | ||
1309 | nd = NULL; | ||
1310 | } else if (minbcnt > p->bcnt) | ||
1311 | minbcnt = p->bcnt; /* find the min interface */ | ||
1312 | } | ||
1313 | if (nd) { | ||
1314 | if (p == e) { | ||
1315 | pr_err("aoe: device setifbcnt failure; too many interfaces.\n"); | ||
1316 | return; | ||
1317 | } | ||
1318 | dev_hold(nd); | ||
1319 | p->nd = nd; | ||
1320 | p->bcnt = bcnt; | ||
1321 | } | ||
1322 | t->minbcnt = minbcnt; | ||
1323 | setdbcnt(d); | ||
1324 | } | ||
1325 | |||
963 | void | 1326 | void |
964 | aoecmd_cfg_rsp(struct sk_buff *skb) | 1327 | aoecmd_cfg_rsp(struct sk_buff *skb) |
965 | { | 1328 | { |
@@ -967,11 +1330,12 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
967 | struct aoe_hdr *h; | 1330 | struct aoe_hdr *h; |
968 | struct aoe_cfghdr *ch; | 1331 | struct aoe_cfghdr *ch; |
969 | struct aoetgt *t; | 1332 | struct aoetgt *t; |
970 | struct aoeif *ifp; | 1333 | ulong flags, aoemajor; |
971 | ulong flags, sysminor, aoemajor; | ||
972 | struct sk_buff *sl; | 1334 | struct sk_buff *sl; |
1335 | struct sk_buff_head queue; | ||
973 | u16 n; | 1336 | u16 n; |
974 | 1337 | ||
1338 | sl = NULL; | ||
975 | h = (struct aoe_hdr *) skb_mac_header(skb); | 1339 | h = (struct aoe_hdr *) skb_mac_header(skb); |
976 | ch = (struct aoe_cfghdr *) (h+1); | 1340 | ch = (struct aoe_cfghdr *) (h+1); |
977 | 1341 | ||
@@ -985,10 +1349,13 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
985 | "Check shelf dip switches.\n"); | 1349 | "Check shelf dip switches.\n"); |
986 | return; | 1350 | return; |
987 | } | 1351 | } |
988 | 1352 | if (aoemajor == 0xffff) { | |
989 | sysminor = SYSMINOR(aoemajor, h->minor); | 1353 | pr_info("aoe: e%ld.%d: broadcast shelf number invalid\n", |
990 | if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { | 1354 | aoemajor, (int) h->minor); |
991 | printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n", | 1355 | return; |
1356 | } | ||
1357 | if (h->minor == 0xff) { | ||
1358 | pr_info("aoe: e%ld.%d: broadcast slot number invalid\n", | ||
992 | aoemajor, (int) h->minor); | 1359 | aoemajor, (int) h->minor); |
993 | return; | 1360 | return; |
994 | } | 1361 | } |
@@ -997,9 +1364,9 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
997 | if (n > aoe_maxout) /* keep it reasonable */ | 1364 | if (n > aoe_maxout) /* keep it reasonable */ |
998 | n = aoe_maxout; | 1365 | n = aoe_maxout; |
999 | 1366 | ||
1000 | d = aoedev_by_sysminor_m(sysminor); | 1367 | d = aoedev_by_aoeaddr(aoemajor, h->minor, 1); |
1001 | if (d == NULL) { | 1368 | if (d == NULL) { |
1002 | printk(KERN_INFO "aoe: device sysminor_m failure\n"); | 1369 | pr_info("aoe: device allocation failure\n"); |
1003 | return; | 1370 | return; |
1004 | } | 1371 | } |
1005 | 1372 | ||
@@ -1008,52 +1375,26 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1008 | t = gettgt(d, h->src); | 1375 | t = gettgt(d, h->src); |
1009 | if (!t) { | 1376 | if (!t) { |
1010 | t = addtgt(d, h->src, n); | 1377 | t = addtgt(d, h->src, n); |
1011 | if (!t) { | 1378 | if (!t) |
1012 | spin_unlock_irqrestore(&d->lock, flags); | 1379 | goto bail; |
1013 | return; | ||
1014 | } | ||
1015 | } | ||
1016 | ifp = getif(t, skb->dev); | ||
1017 | if (!ifp) { | ||
1018 | ifp = addif(t, skb->dev); | ||
1019 | if (!ifp) { | ||
1020 | printk(KERN_INFO | ||
1021 | "aoe: device addif failure; " | ||
1022 | "too many interfaces?\n"); | ||
1023 | spin_unlock_irqrestore(&d->lock, flags); | ||
1024 | return; | ||
1025 | } | ||
1026 | } | ||
1027 | if (ifp->maxbcnt) { | ||
1028 | n = ifp->nd->mtu; | ||
1029 | n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr); | ||
1030 | n /= 512; | ||
1031 | if (n > ch->scnt) | ||
1032 | n = ch->scnt; | ||
1033 | n = n ? n * 512 : DEFAULTBCNT; | ||
1034 | if (n != ifp->maxbcnt) { | ||
1035 | printk(KERN_INFO | ||
1036 | "aoe: e%ld.%d: setting %d%s%s:%pm\n", | ||
1037 | d->aoemajor, d->aoeminor, n, | ||
1038 | " byte data frames on ", ifp->nd->name, | ||
1039 | t->addr); | ||
1040 | ifp->maxbcnt = n; | ||
1041 | } | ||
1042 | } | 1380 | } |
1381 | n = skb->dev->mtu; | ||
1382 | n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); | ||
1383 | n /= 512; | ||
1384 | if (n > ch->scnt) | ||
1385 | n = ch->scnt; | ||
1386 | n = n ? n * 512 : DEFAULTBCNT; | ||
1387 | setifbcnt(t, skb->dev, n); | ||
1043 | 1388 | ||
1044 | /* don't change users' perspective */ | 1389 | /* don't change users' perspective */ |
1045 | if (d->nopen) { | 1390 | if (d->nopen == 0) { |
1046 | spin_unlock_irqrestore(&d->lock, flags); | 1391 | d->fw_ver = be16_to_cpu(ch->fwver); |
1047 | return; | 1392 | sl = aoecmd_ata_id(d); |
1048 | } | 1393 | } |
1049 | d->fw_ver = be16_to_cpu(ch->fwver); | 1394 | bail: |
1050 | |||
1051 | sl = aoecmd_ata_id(d); | ||
1052 | |||
1053 | spin_unlock_irqrestore(&d->lock, flags); | 1395 | spin_unlock_irqrestore(&d->lock, flags); |
1054 | 1396 | aoedev_put(d); | |
1055 | if (sl) { | 1397 | if (sl) { |
1056 | struct sk_buff_head queue; | ||
1057 | __skb_queue_head_init(&queue); | 1398 | __skb_queue_head_init(&queue); |
1058 | __skb_queue_tail(&queue, sl); | 1399 | __skb_queue_tail(&queue, sl); |
1059 | aoenet_xmit(&queue); | 1400 | aoenet_xmit(&queue); |
@@ -1064,20 +1405,74 @@ void | |||
1064 | aoecmd_cleanslate(struct aoedev *d) | 1405 | aoecmd_cleanslate(struct aoedev *d) |
1065 | { | 1406 | { |
1066 | struct aoetgt **t, **te; | 1407 | struct aoetgt **t, **te; |
1067 | struct aoeif *p, *e; | ||
1068 | 1408 | ||
1069 | d->mintimer = MINTIMER; | 1409 | d->mintimer = MINTIMER; |
1410 | d->maxbcnt = 0; | ||
1070 | 1411 | ||
1071 | t = d->targets; | 1412 | t = d->targets; |
1072 | te = t + NTARGETS; | 1413 | te = t + NTARGETS; |
1073 | for (; t < te && *t; t++) { | 1414 | for (; t < te && *t; t++) |
1074 | (*t)->maxout = (*t)->nframes; | 1415 | (*t)->maxout = (*t)->nframes; |
1075 | p = (*t)->ifs; | 1416 | } |
1076 | e = p + NAOEIFS; | 1417 | |
1077 | for (; p < e; p++) { | 1418 | void |
1078 | p->lostjumbo = 0; | 1419 | aoe_failbuf(struct aoedev *d, struct buf *buf) |
1079 | p->lost = 0; | 1420 | { |
1080 | p->maxbcnt = DEFAULTBCNT; | 1421 | if (buf == NULL) |
1422 | return; | ||
1423 | buf->resid = 0; | ||
1424 | clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); | ||
1425 | if (buf->nframesout == 0) | ||
1426 | aoe_end_buf(d, buf); | ||
1427 | } | ||
1428 | |||
1429 | void | ||
1430 | aoe_flush_iocq(void) | ||
1431 | { | ||
1432 | struct frame *f; | ||
1433 | struct aoedev *d; | ||
1434 | LIST_HEAD(flist); | ||
1435 | struct list_head *pos; | ||
1436 | struct sk_buff *skb; | ||
1437 | ulong flags; | ||
1438 | |||
1439 | spin_lock_irqsave(&iocq.lock, flags); | ||
1440 | list_splice_init(&iocq.head, &flist); | ||
1441 | spin_unlock_irqrestore(&iocq.lock, flags); | ||
1442 | while (!list_empty(&flist)) { | ||
1443 | pos = flist.next; | ||
1444 | list_del(pos); | ||
1445 | f = list_entry(pos, struct frame, head); | ||
1446 | d = f->t->d; | ||
1447 | skb = f->r_skb; | ||
1448 | spin_lock_irqsave(&d->lock, flags); | ||
1449 | if (f->buf) { | ||
1450 | f->buf->nframesout--; | ||
1451 | aoe_failbuf(d, f->buf); | ||
1081 | } | 1452 | } |
1453 | aoe_freetframe(f); | ||
1454 | spin_unlock_irqrestore(&d->lock, flags); | ||
1455 | dev_kfree_skb(skb); | ||
1456 | aoedev_put(d); | ||
1082 | } | 1457 | } |
1083 | } | 1458 | } |
1459 | |||
1460 | int __init | ||
1461 | aoecmd_init(void) | ||
1462 | { | ||
1463 | INIT_LIST_HEAD(&iocq.head); | ||
1464 | spin_lock_init(&iocq.lock); | ||
1465 | init_waitqueue_head(&ktiowq); | ||
1466 | kts.name = "aoe_ktio"; | ||
1467 | kts.fn = ktio; | ||
1468 | kts.waitq = &ktiowq; | ||
1469 | kts.lock = &iocq.lock; | ||
1470 | return aoe_ktstart(&kts); | ||
1471 | } | ||
1472 | |||
1473 | void | ||
1474 | aoecmd_exit(void) | ||
1475 | { | ||
1476 | aoe_ktstop(&kts); | ||
1477 | aoe_flush_iocq(); | ||
1478 | } | ||
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 6b5110a47458..90e5b537f94b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* | 2 | /* |
3 | * aoedev.c | 3 | * aoedev.c |
4 | * AoE device utility functions; maintains device list. | 4 | * AoE device utility functions; maintains device list. |
@@ -9,6 +9,9 @@ | |||
9 | #include <linux/netdevice.h> | 9 | #include <linux/netdevice.h> |
10 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/bitmap.h> | ||
13 | #include <linux/kdev_t.h> | ||
14 | #include <linux/moduleparam.h> | ||
12 | #include "aoe.h" | 15 | #include "aoe.h" |
13 | 16 | ||
14 | static void dummy_timer(ulong); | 17 | static void dummy_timer(ulong); |
@@ -16,23 +19,121 @@ static void aoedev_freedev(struct aoedev *); | |||
16 | static void freetgt(struct aoedev *d, struct aoetgt *t); | 19 | static void freetgt(struct aoedev *d, struct aoetgt *t); |
17 | static void skbpoolfree(struct aoedev *d); | 20 | static void skbpoolfree(struct aoedev *d); |
18 | 21 | ||
22 | static int aoe_dyndevs = 1; | ||
23 | module_param(aoe_dyndevs, int, 0644); | ||
24 | MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); | ||
25 | |||
19 | static struct aoedev *devlist; | 26 | static struct aoedev *devlist; |
20 | static DEFINE_SPINLOCK(devlist_lock); | 27 | static DEFINE_SPINLOCK(devlist_lock); |
21 | 28 | ||
22 | struct aoedev * | 29 | /* Because some systems will have one, many, or no |
23 | aoedev_by_aoeaddr(int maj, int min) | 30 | * - partitions, |
31 | * - slots per shelf, | ||
32 | * - or shelves, | ||
33 | * we need some flexibility in the way the minor numbers | ||
34 | * are allocated. So they are dynamic. | ||
35 | */ | ||
36 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) | ||
37 | |||
38 | static DEFINE_SPINLOCK(used_minors_lock); | ||
39 | static DECLARE_BITMAP(used_minors, N_DEVS); | ||
40 | |||
41 | static int | ||
42 | minor_get_dyn(ulong *sysminor) | ||
24 | { | 43 | { |
25 | struct aoedev *d; | ||
26 | ulong flags; | 44 | ulong flags; |
45 | ulong n; | ||
46 | int error = 0; | ||
47 | |||
48 | spin_lock_irqsave(&used_minors_lock, flags); | ||
49 | n = find_first_zero_bit(used_minors, N_DEVS); | ||
50 | if (n < N_DEVS) | ||
51 | set_bit(n, used_minors); | ||
52 | else | ||
53 | error = -1; | ||
54 | spin_unlock_irqrestore(&used_minors_lock, flags); | ||
55 | |||
56 | *sysminor = n * AOE_PARTITIONS; | ||
57 | return error; | ||
58 | } | ||
27 | 59 | ||
28 | spin_lock_irqsave(&devlist_lock, flags); | 60 | static int |
61 | minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) | ||
62 | { | ||
63 | ulong flags; | ||
64 | ulong n; | ||
65 | int error = 0; | ||
66 | enum { | ||
67 | /* for backwards compatibility when !aoe_dyndevs, | ||
68 | * a static number of supported slots per shelf */ | ||
69 | NPERSHELF = 16, | ||
70 | }; | ||
71 | |||
72 | n = aoemaj * NPERSHELF + aoemin; | ||
73 | if (aoemin >= NPERSHELF || n >= N_DEVS) { | ||
74 | pr_err("aoe: %s with e%ld.%d\n", | ||
75 | "cannot use static minor device numbers", | ||
76 | aoemaj, aoemin); | ||
77 | error = -1; | ||
78 | } else { | ||
79 | spin_lock_irqsave(&used_minors_lock, flags); | ||
80 | if (test_bit(n, used_minors)) { | ||
81 | pr_err("aoe: %s %lu\n", | ||
82 | "existing device already has static minor number", | ||
83 | n); | ||
84 | error = -1; | ||
85 | } else | ||
86 | set_bit(n, used_minors); | ||
87 | spin_unlock_irqrestore(&used_minors_lock, flags); | ||
88 | } | ||
29 | 89 | ||
30 | for (d=devlist; d; d=d->next) | 90 | *sysminor = n; |
31 | if (d->aoemajor == maj && d->aoeminor == min) | 91 | return error; |
32 | break; | 92 | } |
93 | |||
94 | static int | ||
95 | minor_get(ulong *sysminor, ulong aoemaj, int aoemin) | ||
96 | { | ||
97 | if (aoe_dyndevs) | ||
98 | return minor_get_dyn(sysminor); | ||
99 | else | ||
100 | return minor_get_static(sysminor, aoemaj, aoemin); | ||
101 | } | ||
102 | |||
103 | static void | ||
104 | minor_free(ulong minor) | ||
105 | { | ||
106 | ulong flags; | ||
107 | |||
108 | minor /= AOE_PARTITIONS; | ||
109 | BUG_ON(minor >= N_DEVS); | ||
110 | |||
111 | spin_lock_irqsave(&used_minors_lock, flags); | ||
112 | BUG_ON(!test_bit(minor, used_minors)); | ||
113 | clear_bit(minor, used_minors); | ||
114 | spin_unlock_irqrestore(&used_minors_lock, flags); | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | ||
119 | * automatically get a reference count and must be responsible | ||
120 | * for performing a aoedev_put. With the addition of async | ||
121 | * kthread processing I'm no longer confident that we can | ||
122 | * guarantee consistency in the face of device flushes. | ||
123 | * | ||
124 | * For the time being, we only bother to add extra references for | ||
125 | * frames sitting on the iocq. When the kthreads finish processing | ||
126 | * these frames, they will aoedev_put the device. | ||
127 | */ | ||
128 | |||
129 | void | ||
130 | aoedev_put(struct aoedev *d) | ||
131 | { | ||
132 | ulong flags; | ||
33 | 133 | ||
134 | spin_lock_irqsave(&devlist_lock, flags); | ||
135 | d->ref--; | ||
34 | spin_unlock_irqrestore(&devlist_lock, flags); | 136 | spin_unlock_irqrestore(&devlist_lock, flags); |
35 | return d; | ||
36 | } | 137 | } |
37 | 138 | ||
38 | static void | 139 | static void |
@@ -47,54 +148,74 @@ dummy_timer(ulong vp) | |||
47 | add_timer(&d->timer); | 148 | add_timer(&d->timer); |
48 | } | 149 | } |
49 | 150 | ||
151 | static void | ||
152 | aoe_failip(struct aoedev *d) | ||
153 | { | ||
154 | struct request *rq; | ||
155 | struct bio *bio; | ||
156 | unsigned long n; | ||
157 | |||
158 | aoe_failbuf(d, d->ip.buf); | ||
159 | |||
160 | rq = d->ip.rq; | ||
161 | if (rq == NULL) | ||
162 | return; | ||
163 | while ((bio = d->ip.nxbio)) { | ||
164 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
165 | d->ip.nxbio = bio->bi_next; | ||
166 | n = (unsigned long) rq->special; | ||
167 | rq->special = (void *) --n; | ||
168 | } | ||
169 | if ((unsigned long) rq->special == 0) | ||
170 | aoe_end_request(d, rq, 0); | ||
171 | } | ||
172 | |||
50 | void | 173 | void |
51 | aoedev_downdev(struct aoedev *d) | 174 | aoedev_downdev(struct aoedev *d) |
52 | { | 175 | { |
53 | struct aoetgt **t, **te; | 176 | struct aoetgt *t, **tt, **te; |
54 | struct frame *f, *e; | 177 | struct frame *f; |
55 | struct buf *buf; | 178 | struct list_head *head, *pos, *nx; |
56 | struct bio *bio; | 179 | struct request *rq; |
180 | int i; | ||
57 | 181 | ||
58 | t = d->targets; | 182 | d->flags &= ~DEVFL_UP; |
59 | te = t + NTARGETS; | 183 | |
60 | for (; t < te && *t; t++) { | 184 | /* clean out active buffers */ |
61 | f = (*t)->frames; | 185 | for (i = 0; i < NFACTIVE; i++) { |
62 | e = f + (*t)->nframes; | 186 | head = &d->factive[i]; |
63 | for (; f < e; f->tag = FREETAG, f->buf = NULL, f++) { | 187 | list_for_each_safe(pos, nx, head) { |
64 | if (f->tag == FREETAG || f->buf == NULL) | 188 | f = list_entry(pos, struct frame, head); |
65 | continue; | 189 | list_del(pos); |
66 | buf = f->buf; | 190 | if (f->buf) { |
67 | bio = buf->bio; | 191 | f->buf->nframesout--; |
68 | if (--buf->nframesout == 0 | 192 | aoe_failbuf(d, f->buf); |
69 | && buf != d->inprocess) { | ||
70 | mempool_free(buf, d->bufpool); | ||
71 | bio_endio(bio, -EIO); | ||
72 | } | 193 | } |
194 | aoe_freetframe(f); | ||
73 | } | 195 | } |
74 | (*t)->maxout = (*t)->nframes; | ||
75 | (*t)->nout = 0; | ||
76 | } | 196 | } |
77 | buf = d->inprocess; | 197 | /* reset window dressings */ |
78 | if (buf) { | 198 | tt = d->targets; |
79 | bio = buf->bio; | 199 | te = tt + NTARGETS; |
80 | mempool_free(buf, d->bufpool); | 200 | for (; tt < te && (t = *tt); tt++) { |
81 | bio_endio(bio, -EIO); | 201 | t->maxout = t->nframes; |
202 | t->nout = 0; | ||
82 | } | 203 | } |
83 | d->inprocess = NULL; | 204 | |
205 | /* clean out the in-process request (if any) */ | ||
206 | aoe_failip(d); | ||
84 | d->htgt = NULL; | 207 | d->htgt = NULL; |
85 | 208 | ||
86 | while (!list_empty(&d->bufq)) { | 209 | /* fast fail all pending I/O */ |
87 | buf = container_of(d->bufq.next, struct buf, bufs); | 210 | if (d->blkq) { |
88 | list_del(d->bufq.next); | 211 | while ((rq = blk_peek_request(d->blkq))) { |
89 | bio = buf->bio; | 212 | blk_start_request(rq); |
90 | mempool_free(buf, d->bufpool); | 213 | aoe_end_request(d, rq, 1); |
91 | bio_endio(bio, -EIO); | 214 | } |
92 | } | 215 | } |
93 | 216 | ||
94 | if (d->gd) | 217 | if (d->gd) |
95 | set_capacity(d->gd, 0); | 218 | set_capacity(d->gd, 0); |
96 | |||
97 | d->flags &= ~DEVFL_UP; | ||
98 | } | 219 | } |
99 | 220 | ||
100 | static void | 221 | static void |
@@ -107,6 +228,7 @@ aoedev_freedev(struct aoedev *d) | |||
107 | aoedisk_rm_sysfs(d); | 228 | aoedisk_rm_sysfs(d); |
108 | del_gendisk(d->gd); | 229 | del_gendisk(d->gd); |
109 | put_disk(d->gd); | 230 | put_disk(d->gd); |
231 | blk_cleanup_queue(d->blkq); | ||
110 | } | 232 | } |
111 | t = d->targets; | 233 | t = d->targets; |
112 | e = t + NTARGETS; | 234 | e = t + NTARGETS; |
@@ -115,7 +237,7 @@ aoedev_freedev(struct aoedev *d) | |||
115 | if (d->bufpool) | 237 | if (d->bufpool) |
116 | mempool_destroy(d->bufpool); | 238 | mempool_destroy(d->bufpool); |
117 | skbpoolfree(d); | 239 | skbpoolfree(d); |
118 | blk_cleanup_queue(d->blkq); | 240 | minor_free(d->sysminor); |
119 | kfree(d); | 241 | kfree(d); |
120 | } | 242 | } |
121 | 243 | ||
@@ -142,7 +264,8 @@ aoedev_flush(const char __user *str, size_t cnt) | |||
142 | spin_lock(&d->lock); | 264 | spin_lock(&d->lock); |
143 | if ((!all && (d->flags & DEVFL_UP)) | 265 | if ((!all && (d->flags & DEVFL_UP)) |
144 | || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) | 266 | || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) |
145 | || d->nopen) { | 267 | || d->nopen |
268 | || d->ref) { | ||
146 | spin_unlock(&d->lock); | 269 | spin_unlock(&d->lock); |
147 | dd = &d->next; | 270 | dd = &d->next; |
148 | continue; | 271 | continue; |
@@ -163,12 +286,15 @@ aoedev_flush(const char __user *str, size_t cnt) | |||
163 | return 0; | 286 | return 0; |
164 | } | 287 | } |
165 | 288 | ||
166 | /* I'm not really sure that this is a realistic problem, but if the | 289 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
167 | network driver goes gonzo let's just leak memory after complaining. */ | 290 | * driver changing link and not processing its transmit ring. The |
291 | * problem is hard enough to solve by returning an error that I'm | ||
292 | * still punting on "solving" this. | ||
293 | */ | ||
168 | static void | 294 | static void |
169 | skbfree(struct sk_buff *skb) | 295 | skbfree(struct sk_buff *skb) |
170 | { | 296 | { |
171 | enum { Sms = 100, Tms = 3*1000}; | 297 | enum { Sms = 250, Tms = 30 * 1000}; |
172 | int i = Tms / Sms; | 298 | int i = Tms / Sms; |
173 | 299 | ||
174 | if (skb == NULL) | 300 | if (skb == NULL) |
@@ -182,6 +308,7 @@ skbfree(struct sk_buff *skb) | |||
182 | "cannot free skb -- memory leaked."); | 308 | "cannot free skb -- memory leaked."); |
183 | return; | 309 | return; |
184 | } | 310 | } |
311 | skb->truesize -= skb->data_len; | ||
185 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; | 312 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
186 | skb_trim(skb, 0); | 313 | skb_trim(skb, 0); |
187 | dev_kfree_skb(skb); | 314 | dev_kfree_skb(skb); |
@@ -198,26 +325,29 @@ skbpoolfree(struct aoedev *d) | |||
198 | __skb_queue_head_init(&d->skbpool); | 325 | __skb_queue_head_init(&d->skbpool); |
199 | } | 326 | } |
200 | 327 | ||
201 | /* find it or malloc it */ | 328 | /* find it or allocate it */ |
202 | struct aoedev * | 329 | struct aoedev * |
203 | aoedev_by_sysminor_m(ulong sysminor) | 330 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
204 | { | 331 | { |
205 | struct aoedev *d; | 332 | struct aoedev *d; |
333 | int i; | ||
206 | ulong flags; | 334 | ulong flags; |
335 | ulong sysminor; | ||
207 | 336 | ||
208 | spin_lock_irqsave(&devlist_lock, flags); | 337 | spin_lock_irqsave(&devlist_lock, flags); |
209 | 338 | ||
210 | for (d=devlist; d; d=d->next) | 339 | for (d=devlist; d; d=d->next) |
211 | if (d->sysminor == sysminor) | 340 | if (d->aoemajor == maj && d->aoeminor == min) { |
341 | d->ref++; | ||
212 | break; | 342 | break; |
213 | if (d) | 343 | } |
344 | if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) | ||
214 | goto out; | 345 | goto out; |
215 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | 346 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); |
216 | if (!d) | 347 | if (!d) |
217 | goto out; | 348 | goto out; |
218 | INIT_WORK(&d->work, aoecmd_sleepwork); | 349 | INIT_WORK(&d->work, aoecmd_sleepwork); |
219 | spin_lock_init(&d->lock); | 350 | spin_lock_init(&d->lock); |
220 | skb_queue_head_init(&d->sendq); | ||
221 | skb_queue_head_init(&d->skbpool); | 351 | skb_queue_head_init(&d->skbpool); |
222 | init_timer(&d->timer); | 352 | init_timer(&d->timer); |
223 | d->timer.data = (ulong) d; | 353 | d->timer.data = (ulong) d; |
@@ -226,10 +356,12 @@ aoedev_by_sysminor_m(ulong sysminor) | |||
226 | add_timer(&d->timer); | 356 | add_timer(&d->timer); |
227 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | 357 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ |
228 | d->tgt = d->targets; | 358 | d->tgt = d->targets; |
229 | INIT_LIST_HEAD(&d->bufq); | 359 | d->ref = 1; |
360 | for (i = 0; i < NFACTIVE; i++) | ||
361 | INIT_LIST_HEAD(&d->factive[i]); | ||
230 | d->sysminor = sysminor; | 362 | d->sysminor = sysminor; |
231 | d->aoemajor = AOEMAJOR(sysminor); | 363 | d->aoemajor = maj; |
232 | d->aoeminor = AOEMINOR(sysminor); | 364 | d->aoeminor = min; |
233 | d->mintimer = MINTIMER; | 365 | d->mintimer = MINTIMER; |
234 | d->next = devlist; | 366 | d->next = devlist; |
235 | devlist = d; | 367 | devlist = d; |
@@ -241,13 +373,23 @@ aoedev_by_sysminor_m(ulong sysminor) | |||
241 | static void | 373 | static void |
242 | freetgt(struct aoedev *d, struct aoetgt *t) | 374 | freetgt(struct aoedev *d, struct aoetgt *t) |
243 | { | 375 | { |
244 | struct frame *f, *e; | 376 | struct frame *f; |
377 | struct list_head *pos, *nx, *head; | ||
378 | struct aoeif *ifp; | ||
245 | 379 | ||
246 | f = t->frames; | 380 | for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { |
247 | e = f + t->nframes; | 381 | if (!ifp->nd) |
248 | for (; f < e; f++) | 382 | break; |
383 | dev_put(ifp->nd); | ||
384 | } | ||
385 | |||
386 | head = &t->ffree; | ||
387 | list_for_each_safe(pos, nx, head) { | ||
388 | list_del(pos); | ||
389 | f = list_entry(pos, struct frame, head); | ||
249 | skbfree(f->skb); | 390 | skbfree(f->skb); |
250 | kfree(t->frames); | 391 | kfree(f); |
392 | } | ||
251 | kfree(t); | 393 | kfree(t); |
252 | } | 394 | } |
253 | 395 | ||
@@ -257,6 +399,7 @@ aoedev_exit(void) | |||
257 | struct aoedev *d; | 399 | struct aoedev *d; |
258 | ulong flags; | 400 | ulong flags; |
259 | 401 | ||
402 | aoe_flush_iocq(); | ||
260 | while ((d = devlist)) { | 403 | while ((d = devlist)) { |
261 | devlist = d->next; | 404 | devlist = d->next; |
262 | 405 | ||
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index 7f83ad90e76f..04793c2c701b 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* | 2 | /* |
3 | * aoemain.c | 3 | * aoemain.c |
4 | * Module initialization routines, discover timer | 4 | * Module initialization routines, discover timer |
@@ -61,6 +61,7 @@ aoe_exit(void) | |||
61 | 61 | ||
62 | aoenet_exit(); | 62 | aoenet_exit(); |
63 | unregister_blkdev(AOE_MAJOR, DEVICE_NAME); | 63 | unregister_blkdev(AOE_MAJOR, DEVICE_NAME); |
64 | aoecmd_exit(); | ||
64 | aoechr_exit(); | 65 | aoechr_exit(); |
65 | aoedev_exit(); | 66 | aoedev_exit(); |
66 | aoeblk_exit(); /* free cache after de-allocating bufs */ | 67 | aoeblk_exit(); /* free cache after de-allocating bufs */ |
@@ -83,17 +84,20 @@ aoe_init(void) | |||
83 | ret = aoenet_init(); | 84 | ret = aoenet_init(); |
84 | if (ret) | 85 | if (ret) |
85 | goto net_fail; | 86 | goto net_fail; |
87 | ret = aoecmd_init(); | ||
88 | if (ret) | ||
89 | goto cmd_fail; | ||
86 | ret = register_blkdev(AOE_MAJOR, DEVICE_NAME); | 90 | ret = register_blkdev(AOE_MAJOR, DEVICE_NAME); |
87 | if (ret < 0) { | 91 | if (ret < 0) { |
88 | printk(KERN_ERR "aoe: can't register major\n"); | 92 | printk(KERN_ERR "aoe: can't register major\n"); |
89 | goto blkreg_fail; | 93 | goto blkreg_fail; |
90 | } | 94 | } |
91 | |||
92 | printk(KERN_INFO "aoe: AoE v%s initialised.\n", VERSION); | 95 | printk(KERN_INFO "aoe: AoE v%s initialised.\n", VERSION); |
93 | discover_timer(TINIT); | 96 | discover_timer(TINIT); |
94 | return 0; | 97 | return 0; |
95 | |||
96 | blkreg_fail: | 98 | blkreg_fail: |
99 | aoecmd_exit(); | ||
100 | cmd_fail: | ||
97 | aoenet_exit(); | 101 | aoenet_exit(); |
98 | net_fail: | 102 | net_fail: |
99 | aoeblk_exit(); | 103 | aoeblk_exit(); |
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 4d3bc0d49df5..162c6471275c 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* | 2 | /* |
3 | * aoenet.c | 3 | * aoenet.c |
4 | * Ethernet portion of AoE driver | 4 | * Ethernet portion of AoE driver |
@@ -33,6 +33,9 @@ static char aoe_iflist[IFLISTSZ]; | |||
33 | module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600); | 33 | module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600); |
34 | MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\""); | 34 | MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\""); |
35 | 35 | ||
36 | static wait_queue_head_t txwq; | ||
37 | static struct ktstate kts; | ||
38 | |||
36 | #ifndef MODULE | 39 | #ifndef MODULE |
37 | static int __init aoe_iflist_setup(char *str) | 40 | static int __init aoe_iflist_setup(char *str) |
38 | { | 41 | { |
@@ -44,6 +47,23 @@ static int __init aoe_iflist_setup(char *str) | |||
44 | __setup("aoe_iflist=", aoe_iflist_setup); | 47 | __setup("aoe_iflist=", aoe_iflist_setup); |
45 | #endif | 48 | #endif |
46 | 49 | ||
50 | static spinlock_t txlock; | ||
51 | static struct sk_buff_head skbtxq; | ||
52 | |||
53 | /* enters with txlock held */ | ||
54 | static int | ||
55 | tx(void) | ||
56 | { | ||
57 | struct sk_buff *skb; | ||
58 | |||
59 | while ((skb = skb_dequeue(&skbtxq))) { | ||
60 | spin_unlock_irq(&txlock); | ||
61 | dev_queue_xmit(skb); | ||
62 | spin_lock_irq(&txlock); | ||
63 | } | ||
64 | return 0; | ||
65 | } | ||
66 | |||
47 | int | 67 | int |
48 | is_aoe_netif(struct net_device *ifp) | 68 | is_aoe_netif(struct net_device *ifp) |
49 | { | 69 | { |
@@ -88,10 +108,14 @@ void | |||
88 | aoenet_xmit(struct sk_buff_head *queue) | 108 | aoenet_xmit(struct sk_buff_head *queue) |
89 | { | 109 | { |
90 | struct sk_buff *skb, *tmp; | 110 | struct sk_buff *skb, *tmp; |
111 | ulong flags; | ||
91 | 112 | ||
92 | skb_queue_walk_safe(queue, skb, tmp) { | 113 | skb_queue_walk_safe(queue, skb, tmp) { |
93 | __skb_unlink(skb, queue); | 114 | __skb_unlink(skb, queue); |
94 | dev_queue_xmit(skb); | 115 | spin_lock_irqsave(&txlock, flags); |
116 | skb_queue_tail(&skbtxq, skb); | ||
117 | spin_unlock_irqrestore(&txlock, flags); | ||
118 | wake_up(&txwq); | ||
95 | } | 119 | } |
96 | } | 120 | } |
97 | 121 | ||
@@ -102,7 +126,9 @@ static int | |||
102 | aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) | 126 | aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) |
103 | { | 127 | { |
104 | struct aoe_hdr *h; | 128 | struct aoe_hdr *h; |
129 | struct aoe_atahdr *ah; | ||
105 | u32 n; | 130 | u32 n; |
131 | int sn; | ||
106 | 132 | ||
107 | if (dev_net(ifp) != &init_net) | 133 | if (dev_net(ifp) != &init_net) |
108 | goto exit; | 134 | goto exit; |
@@ -110,13 +136,16 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, | |||
110 | skb = skb_share_check(skb, GFP_ATOMIC); | 136 | skb = skb_share_check(skb, GFP_ATOMIC); |
111 | if (skb == NULL) | 137 | if (skb == NULL) |
112 | return 0; | 138 | return 0; |
113 | if (skb_linearize(skb)) | ||
114 | goto exit; | ||
115 | if (!is_aoe_netif(ifp)) | 139 | if (!is_aoe_netif(ifp)) |
116 | goto exit; | 140 | goto exit; |
117 | skb_push(skb, ETH_HLEN); /* (1) */ | 141 | skb_push(skb, ETH_HLEN); /* (1) */ |
118 | 142 | sn = sizeof(*h) + sizeof(*ah); | |
119 | h = (struct aoe_hdr *) skb_mac_header(skb); | 143 | if (skb->len >= sn) { |
144 | sn -= skb_headlen(skb); | ||
145 | if (sn > 0 && !__pskb_pull_tail(skb, sn)) | ||
146 | goto exit; | ||
147 | } | ||
148 | h = (struct aoe_hdr *) skb->data; | ||
120 | n = get_unaligned_be32(&h->tag); | 149 | n = get_unaligned_be32(&h->tag); |
121 | if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31)) | 150 | if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31)) |
122 | goto exit; | 151 | goto exit; |
@@ -137,7 +166,8 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, | |||
137 | 166 | ||
138 | switch (h->cmd) { | 167 | switch (h->cmd) { |
139 | case AOECMD_ATA: | 168 | case AOECMD_ATA: |
140 | aoecmd_ata_rsp(skb); | 169 | /* ata_rsp may keep skb for later processing or give it back */ |
170 | skb = aoecmd_ata_rsp(skb); | ||
141 | break; | 171 | break; |
142 | case AOECMD_CFG: | 172 | case AOECMD_CFG: |
143 | aoecmd_cfg_rsp(skb); | 173 | aoecmd_cfg_rsp(skb); |
@@ -145,8 +175,12 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, | |||
145 | default: | 175 | default: |
146 | if (h->cmd >= AOECMD_VEND_MIN) | 176 | if (h->cmd >= AOECMD_VEND_MIN) |
147 | break; /* don't complain about vendor commands */ | 177 | break; /* don't complain about vendor commands */ |
148 | printk(KERN_INFO "aoe: unknown cmd %d\n", h->cmd); | 178 | pr_info("aoe: unknown AoE command type 0x%02x\n", h->cmd); |
179 | break; | ||
149 | } | 180 | } |
181 | |||
182 | if (!skb) | ||
183 | return 0; | ||
150 | exit: | 184 | exit: |
151 | dev_kfree_skb(skb); | 185 | dev_kfree_skb(skb); |
152 | return 0; | 186 | return 0; |
@@ -160,6 +194,15 @@ static struct packet_type aoe_pt __read_mostly = { | |||
160 | int __init | 194 | int __init |
161 | aoenet_init(void) | 195 | aoenet_init(void) |
162 | { | 196 | { |
197 | skb_queue_head_init(&skbtxq); | ||
198 | init_waitqueue_head(&txwq); | ||
199 | spin_lock_init(&txlock); | ||
200 | kts.lock = &txlock; | ||
201 | kts.fn = tx; | ||
202 | kts.waitq = &txwq; | ||
203 | kts.name = "aoe_tx"; | ||
204 | if (aoe_ktstart(&kts)) | ||
205 | return -EAGAIN; | ||
163 | dev_add_pack(&aoe_pt); | 206 | dev_add_pack(&aoe_pt); |
164 | return 0; | 207 | return 0; |
165 | } | 208 | } |
@@ -167,6 +210,8 @@ aoenet_init(void) | |||
167 | void | 210 | void |
168 | aoenet_exit(void) | 211 | aoenet_exit(void) |
169 | { | 212 | { |
213 | aoe_ktstop(&kts); | ||
214 | skb_queue_purge(&skbtxq); | ||
170 | dev_remove_pack(&aoe_pt); | 215 | dev_remove_pack(&aoe_pt); |
171 | } | 216 | } |
172 | 217 | ||
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 38aa6dda6b81..da3311129a0c 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c | |||
@@ -795,6 +795,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, | |||
795 | } | 795 | } |
796 | break; | 796 | break; |
797 | case CMD_PROTOCOL_ERR: | 797 | case CMD_PROTOCOL_ERR: |
798 | cmd->result = DID_ERROR << 16; | ||
798 | dev_warn(&h->pdev->dev, | 799 | dev_warn(&h->pdev->dev, |
799 | "%p has protocol error\n", c); | 800 | "%p has protocol error\n", c); |
800 | break; | 801 | break; |
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a7d6347aaa79..17c675c52295 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -672,7 +672,6 @@ static void __reschedule_timeout(int drive, const char *message) | |||
672 | 672 | ||
673 | if (drive == current_reqD) | 673 | if (drive == current_reqD) |
674 | drive = current_drive; | 674 | drive = current_drive; |
675 | __cancel_delayed_work(&fd_timeout); | ||
676 | 675 | ||
677 | if (drive < 0 || drive >= N_DRIVE) { | 676 | if (drive < 0 || drive >= N_DRIVE) { |
678 | delay = 20UL * HZ; | 677 | delay = 20UL * HZ; |
@@ -680,7 +679,7 @@ static void __reschedule_timeout(int drive, const char *message) | |||
680 | } else | 679 | } else |
681 | delay = UDP->timeout; | 680 | delay = UDP->timeout; |
682 | 681 | ||
683 | queue_delayed_work(floppy_wq, &fd_timeout, delay); | 682 | mod_delayed_work(floppy_wq, &fd_timeout, delay); |
684 | if (UDP->flags & FD_DEBUG) | 683 | if (UDP->flags & FD_DEBUG) |
685 | DPRINT("reschedule timeout %s\n", message); | 684 | DPRINT("reschedule timeout %s\n", message); |
686 | timeout_message = message; | 685 | timeout_message = message; |
@@ -891,7 +890,7 @@ static void unlock_fdc(void) | |||
891 | 890 | ||
892 | raw_cmd = NULL; | 891 | raw_cmd = NULL; |
893 | command_status = FD_COMMAND_NONE; | 892 | command_status = FD_COMMAND_NONE; |
894 | __cancel_delayed_work(&fd_timeout); | 893 | cancel_delayed_work(&fd_timeout); |
895 | do_floppy = NULL; | 894 | do_floppy = NULL; |
896 | cont = NULL; | 895 | cont = NULL; |
897 | clear_bit(0, &fdc_busy); | 896 | clear_bit(0, &fdc_busy); |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3bba65510d23..e9d594fd12cb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -1038,10 +1038,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | |||
1038 | { | 1038 | { |
1039 | int err; | 1039 | int err; |
1040 | struct loop_func_table *xfer; | 1040 | struct loop_func_table *xfer; |
1041 | uid_t uid = current_uid(); | 1041 | kuid_t uid = current_uid(); |
1042 | 1042 | ||
1043 | if (lo->lo_encrypt_key_size && | 1043 | if (lo->lo_encrypt_key_size && |
1044 | lo->lo_key_owner != uid && | 1044 | !uid_eq(lo->lo_key_owner, uid) && |
1045 | !capable(CAP_SYS_ADMIN)) | 1045 | !capable(CAP_SYS_ADMIN)) |
1046 | return -EPERM; | 1046 | return -EPERM; |
1047 | if (lo->lo_state != Lo_bound) | 1047 | if (lo->lo_state != Lo_bound) |
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a8fddeb3d638..f946d31d6917 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -1148,11 +1148,15 @@ static bool mtip_pause_ncq(struct mtip_port *port, | |||
1148 | reply = port->rxfis + RX_FIS_D2H_REG; | 1148 | reply = port->rxfis + RX_FIS_D2H_REG; |
1149 | task_file_data = readl(port->mmio+PORT_TFDATA); | 1149 | task_file_data = readl(port->mmio+PORT_TFDATA); |
1150 | 1150 | ||
1151 | if ((task_file_data & 1) || (fis->command == ATA_CMD_SEC_ERASE_UNIT)) | 1151 | if (fis->command == ATA_CMD_SEC_ERASE_UNIT) |
1152 | clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); | ||
1153 | |||
1154 | if ((task_file_data & 1)) | ||
1152 | return false; | 1155 | return false; |
1153 | 1156 | ||
1154 | if (fis->command == ATA_CMD_SEC_ERASE_PREP) { | 1157 | if (fis->command == ATA_CMD_SEC_ERASE_PREP) { |
1155 | set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); | 1158 | set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); |
1159 | set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); | ||
1156 | port->ic_pause_timer = jiffies; | 1160 | port->ic_pause_timer = jiffies; |
1157 | return true; | 1161 | return true; |
1158 | } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && | 1162 | } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && |
@@ -1900,7 +1904,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, | |||
1900 | int rv = 0, xfer_sz = command[3]; | 1904 | int rv = 0, xfer_sz = command[3]; |
1901 | 1905 | ||
1902 | if (xfer_sz) { | 1906 | if (xfer_sz) { |
1903 | if (user_buffer) | 1907 | if (!user_buffer) |
1904 | return -EFAULT; | 1908 | return -EFAULT; |
1905 | 1909 | ||
1906 | buf = dmam_alloc_coherent(&port->dd->pdev->dev, | 1910 | buf = dmam_alloc_coherent(&port->dd->pdev->dev, |
@@ -2043,7 +2047,7 @@ static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout) | |||
2043 | *timeout = 240000; /* 4 minutes */ | 2047 | *timeout = 240000; /* 4 minutes */ |
2044 | break; | 2048 | break; |
2045 | case ATA_CMD_STANDBYNOW1: | 2049 | case ATA_CMD_STANDBYNOW1: |
2046 | *timeout = 10000; /* 10 seconds */ | 2050 | *timeout = 120000; /* 2 minutes */ |
2047 | break; | 2051 | break; |
2048 | case 0xF7: | 2052 | case 0xF7: |
2049 | case 0xFA: | 2053 | case 0xFA: |
@@ -2588,9 +2592,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, | |||
2588 | if (!len || size) | 2592 | if (!len || size) |
2589 | return 0; | 2593 | return 0; |
2590 | 2594 | ||
2591 | if (size < 0) | ||
2592 | return -EINVAL; | ||
2593 | |||
2594 | size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); | 2595 | size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); |
2595 | 2596 | ||
2596 | for (n = dd->slot_groups-1; n >= 0; n--) | 2597 | for (n = dd->slot_groups-1; n >= 0; n--) |
@@ -2660,9 +2661,6 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, | |||
2660 | if (!len || size) | 2661 | if (!len || size) |
2661 | return 0; | 2662 | return 0; |
2662 | 2663 | ||
2663 | if (size < 0) | ||
2664 | return -EINVAL; | ||
2665 | |||
2666 | size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", | 2664 | size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", |
2667 | dd->port->flags); | 2665 | dd->port->flags); |
2668 | size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", | 2666 | size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", |
@@ -3214,8 +3212,8 @@ static int mtip_hw_init(struct driver_data *dd) | |||
3214 | "Unable to check write protect progress\n"); | 3212 | "Unable to check write protect progress\n"); |
3215 | else | 3213 | else |
3216 | dev_info(&dd->pdev->dev, | 3214 | dev_info(&dd->pdev->dev, |
3217 | "Write protect progress: %d%% (%d blocks)\n", | 3215 | "Write protect progress: %u%% (%u blocks)\n", |
3218 | attr242.cur, attr242.data); | 3216 | attr242.cur, le32_to_cpu(attr242.data)); |
3219 | return rv; | 3217 | return rv; |
3220 | 3218 | ||
3221 | out3: | 3219 | out3: |
@@ -3619,6 +3617,10 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) | |||
3619 | bio_endio(bio, -ENODATA); | 3617 | bio_endio(bio, -ENODATA); |
3620 | return; | 3618 | return; |
3621 | } | 3619 | } |
3620 | if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) { | ||
3621 | bio_endio(bio, -ENODATA); | ||
3622 | return; | ||
3623 | } | ||
3622 | } | 3624 | } |
3623 | 3625 | ||
3624 | if (unlikely(!bio_has_data(bio))) { | 3626 | if (unlikely(!bio_has_data(bio))) { |
@@ -4168,7 +4170,13 @@ static void mtip_pci_shutdown(struct pci_dev *pdev) | |||
4168 | 4170 | ||
4169 | /* Table of device ids supported by this driver. */ | 4171 | /* Table of device ids supported by this driver. */ |
4170 | static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = { | 4172 | static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = { |
4171 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) }, | 4173 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) }, |
4174 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) }, | ||
4175 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) }, | ||
4176 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P325M_DEVICE_ID) }, | ||
4177 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420H_DEVICE_ID) }, | ||
4178 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420M_DEVICE_ID) }, | ||
4179 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P425M_DEVICE_ID) }, | ||
4172 | { 0 } | 4180 | { 0 } |
4173 | }; | 4181 | }; |
4174 | 4182 | ||
@@ -4199,12 +4207,12 @@ static int __init mtip_init(void) | |||
4199 | { | 4207 | { |
4200 | int error; | 4208 | int error; |
4201 | 4209 | ||
4202 | printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); | 4210 | pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); |
4203 | 4211 | ||
4204 | /* Allocate a major block device number to use with this driver. */ | 4212 | /* Allocate a major block device number to use with this driver. */ |
4205 | error = register_blkdev(0, MTIP_DRV_NAME); | 4213 | error = register_blkdev(0, MTIP_DRV_NAME); |
4206 | if (error <= 0) { | 4214 | if (error <= 0) { |
4207 | printk(KERN_ERR "Unable to register block device (%d)\n", | 4215 | pr_err("Unable to register block device (%d)\n", |
4208 | error); | 4216 | error); |
4209 | return -EBUSY; | 4217 | return -EBUSY; |
4210 | } | 4218 | } |
@@ -4213,7 +4221,7 @@ static int __init mtip_init(void) | |||
4213 | if (!dfs_parent) { | 4221 | if (!dfs_parent) { |
4214 | dfs_parent = debugfs_create_dir("rssd", NULL); | 4222 | dfs_parent = debugfs_create_dir("rssd", NULL); |
4215 | if (IS_ERR_OR_NULL(dfs_parent)) { | 4223 | if (IS_ERR_OR_NULL(dfs_parent)) { |
4216 | printk(KERN_WARNING "Error creating debugfs parent\n"); | 4224 | pr_warn("Error creating debugfs parent\n"); |
4217 | dfs_parent = NULL; | 4225 | dfs_parent = NULL; |
4218 | } | 4226 | } |
4219 | } | 4227 | } |
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index f51fc23d17bb..18627a1d04c5 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h | |||
@@ -76,7 +76,13 @@ | |||
76 | 76 | ||
77 | /* Micron Vendor ID & P320x SSD Device ID */ | 77 | /* Micron Vendor ID & P320x SSD Device ID */ |
78 | #define PCI_VENDOR_ID_MICRON 0x1344 | 78 | #define PCI_VENDOR_ID_MICRON 0x1344 |
79 | #define P320_DEVICE_ID 0x5150 | 79 | #define P320H_DEVICE_ID 0x5150 |
80 | #define P320M_DEVICE_ID 0x5151 | ||
81 | #define P320S_DEVICE_ID 0x5152 | ||
82 | #define P325M_DEVICE_ID 0x5153 | ||
83 | #define P420H_DEVICE_ID 0x5160 | ||
84 | #define P420M_DEVICE_ID 0x5161 | ||
85 | #define P425M_DEVICE_ID 0x5163 | ||
80 | 86 | ||
81 | /* Driver name and version strings */ | 87 | /* Driver name and version strings */ |
82 | #define MTIP_DRV_NAME "mtip32xx" | 88 | #define MTIP_DRV_NAME "mtip32xx" |
@@ -131,10 +137,12 @@ enum { | |||
131 | MTIP_PF_SVC_THD_STOP_BIT = 8, | 137 | MTIP_PF_SVC_THD_STOP_BIT = 8, |
132 | 138 | ||
133 | /* below are bit numbers in 'dd_flag' defined in driver_data */ | 139 | /* below are bit numbers in 'dd_flag' defined in driver_data */ |
140 | MTIP_DDF_SEC_LOCK_BIT = 0, | ||
134 | MTIP_DDF_REMOVE_PENDING_BIT = 1, | 141 | MTIP_DDF_REMOVE_PENDING_BIT = 1, |
135 | MTIP_DDF_OVER_TEMP_BIT = 2, | 142 | MTIP_DDF_OVER_TEMP_BIT = 2, |
136 | MTIP_DDF_WRITE_PROTECT_BIT = 3, | 143 | MTIP_DDF_WRITE_PROTECT_BIT = 3, |
137 | MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ | 144 | MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ |
145 | (1 << MTIP_DDF_SEC_LOCK_BIT) | \ | ||
138 | (1 << MTIP_DDF_OVER_TEMP_BIT) | \ | 146 | (1 << MTIP_DDF_OVER_TEMP_BIT) | \ |
139 | (1 << MTIP_DDF_WRITE_PROTECT_BIT)), | 147 | (1 << MTIP_DDF_WRITE_PROTECT_BIT)), |
140 | 148 | ||
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d07c9f7fded6..043ddcca4abf 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -78,6 +78,8 @@ static const char *ioctl_cmd_to_ascii(int cmd) | |||
78 | case NBD_SET_SOCK: return "set-sock"; | 78 | case NBD_SET_SOCK: return "set-sock"; |
79 | case NBD_SET_BLKSIZE: return "set-blksize"; | 79 | case NBD_SET_BLKSIZE: return "set-blksize"; |
80 | case NBD_SET_SIZE: return "set-size"; | 80 | case NBD_SET_SIZE: return "set-size"; |
81 | case NBD_SET_TIMEOUT: return "set-timeout"; | ||
82 | case NBD_SET_FLAGS: return "set-flags"; | ||
81 | case NBD_DO_IT: return "do-it"; | 83 | case NBD_DO_IT: return "do-it"; |
82 | case NBD_CLEAR_SOCK: return "clear-sock"; | 84 | case NBD_CLEAR_SOCK: return "clear-sock"; |
83 | case NBD_CLEAR_QUE: return "clear-que"; | 85 | case NBD_CLEAR_QUE: return "clear-que"; |
@@ -96,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd) | |||
96 | case NBD_CMD_READ: return "read"; | 98 | case NBD_CMD_READ: return "read"; |
97 | case NBD_CMD_WRITE: return "write"; | 99 | case NBD_CMD_WRITE: return "write"; |
98 | case NBD_CMD_DISC: return "disconnect"; | 100 | case NBD_CMD_DISC: return "disconnect"; |
101 | case NBD_CMD_TRIM: return "trim/discard"; | ||
99 | } | 102 | } |
100 | return "invalid"; | 103 | return "invalid"; |
101 | } | 104 | } |
@@ -449,6 +452,14 @@ static void nbd_clear_que(struct nbd_device *nbd) | |||
449 | req->errors++; | 452 | req->errors++; |
450 | nbd_end_request(req); | 453 | nbd_end_request(req); |
451 | } | 454 | } |
455 | |||
456 | while (!list_empty(&nbd->waiting_queue)) { | ||
457 | req = list_entry(nbd->waiting_queue.next, struct request, | ||
458 | queuelist); | ||
459 | list_del_init(&req->queuelist); | ||
460 | req->errors++; | ||
461 | nbd_end_request(req); | ||
462 | } | ||
452 | } | 463 | } |
453 | 464 | ||
454 | 465 | ||
@@ -459,8 +470,12 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) | |||
459 | 470 | ||
460 | nbd_cmd(req) = NBD_CMD_READ; | 471 | nbd_cmd(req) = NBD_CMD_READ; |
461 | if (rq_data_dir(req) == WRITE) { | 472 | if (rq_data_dir(req) == WRITE) { |
462 | nbd_cmd(req) = NBD_CMD_WRITE; | 473 | if ((req->cmd_flags & REQ_DISCARD)) { |
463 | if (nbd->flags & NBD_READ_ONLY) { | 474 | WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM)); |
475 | nbd_cmd(req) = NBD_CMD_TRIM; | ||
476 | } else | ||
477 | nbd_cmd(req) = NBD_CMD_WRITE; | ||
478 | if (nbd->flags & NBD_FLAG_READ_ONLY) { | ||
464 | dev_err(disk_to_dev(nbd->disk), | 479 | dev_err(disk_to_dev(nbd->disk), |
465 | "Write on read-only\n"); | 480 | "Write on read-only\n"); |
466 | goto error_out; | 481 | goto error_out; |
@@ -598,6 +613,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
598 | nbd->file = NULL; | 613 | nbd->file = NULL; |
599 | nbd_clear_que(nbd); | 614 | nbd_clear_que(nbd); |
600 | BUG_ON(!list_empty(&nbd->queue_head)); | 615 | BUG_ON(!list_empty(&nbd->queue_head)); |
616 | BUG_ON(!list_empty(&nbd->waiting_queue)); | ||
601 | if (file) | 617 | if (file) |
602 | fput(file); | 618 | fput(file); |
603 | return 0; | 619 | return 0; |
@@ -642,6 +658,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
642 | nbd->xmit_timeout = arg * HZ; | 658 | nbd->xmit_timeout = arg * HZ; |
643 | return 0; | 659 | return 0; |
644 | 660 | ||
661 | case NBD_SET_FLAGS: | ||
662 | nbd->flags = arg; | ||
663 | return 0; | ||
664 | |||
645 | case NBD_SET_SIZE_BLOCKS: | 665 | case NBD_SET_SIZE_BLOCKS: |
646 | nbd->bytesize = ((u64) arg) * nbd->blksize; | 666 | nbd->bytesize = ((u64) arg) * nbd->blksize; |
647 | bdev->bd_inode->i_size = nbd->bytesize; | 667 | bdev->bd_inode->i_size = nbd->bytesize; |
@@ -661,6 +681,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
661 | 681 | ||
662 | mutex_unlock(&nbd->tx_lock); | 682 | mutex_unlock(&nbd->tx_lock); |
663 | 683 | ||
684 | if (nbd->flags & NBD_FLAG_SEND_TRIM) | ||
685 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, | ||
686 | nbd->disk->queue); | ||
687 | |||
664 | thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); | 688 | thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); |
665 | if (IS_ERR(thread)) { | 689 | if (IS_ERR(thread)) { |
666 | mutex_lock(&nbd->tx_lock); | 690 | mutex_lock(&nbd->tx_lock); |
@@ -678,6 +702,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
678 | nbd->file = NULL; | 702 | nbd->file = NULL; |
679 | nbd_clear_que(nbd); | 703 | nbd_clear_que(nbd); |
680 | dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); | 704 | dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); |
705 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); | ||
681 | if (file) | 706 | if (file) |
682 | fput(file); | 707 | fput(file); |
683 | nbd->bytesize = 0; | 708 | nbd->bytesize = 0; |
@@ -796,6 +821,9 @@ static int __init nbd_init(void) | |||
796 | * Tell the block layer that we are not a rotational device | 821 | * Tell the block layer that we are not a rotational device |
797 | */ | 822 | */ |
798 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); | 823 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); |
824 | disk->queue->limits.discard_granularity = 512; | ||
825 | disk->queue->limits.max_discard_sectors = UINT_MAX; | ||
826 | disk->queue->limits.discard_zeroes_data = 0; | ||
799 | } | 827 | } |
800 | 828 | ||
801 | if (register_blkdev(NBD_MAJOR, "nbd")) { | 829 | if (register_blkdev(NBD_MAJOR, "nbd")) { |
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 38a2d0631882..931769e133e5 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c | |||
@@ -79,6 +79,7 @@ struct nvme_dev { | |||
79 | char serial[20]; | 79 | char serial[20]; |
80 | char model[40]; | 80 | char model[40]; |
81 | char firmware_rev[8]; | 81 | char firmware_rev[8]; |
82 | u32 max_hw_sectors; | ||
82 | }; | 83 | }; |
83 | 84 | ||
84 | /* | 85 | /* |
@@ -835,15 +836,15 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, | |||
835 | } | 836 | } |
836 | 837 | ||
837 | static int nvme_get_features(struct nvme_dev *dev, unsigned fid, | 838 | static int nvme_get_features(struct nvme_dev *dev, unsigned fid, |
838 | unsigned dword11, dma_addr_t dma_addr) | 839 | unsigned nsid, dma_addr_t dma_addr) |
839 | { | 840 | { |
840 | struct nvme_command c; | 841 | struct nvme_command c; |
841 | 842 | ||
842 | memset(&c, 0, sizeof(c)); | 843 | memset(&c, 0, sizeof(c)); |
843 | c.features.opcode = nvme_admin_get_features; | 844 | c.features.opcode = nvme_admin_get_features; |
845 | c.features.nsid = cpu_to_le32(nsid); | ||
844 | c.features.prp1 = cpu_to_le64(dma_addr); | 846 | c.features.prp1 = cpu_to_le64(dma_addr); |
845 | c.features.fid = cpu_to_le32(fid); | 847 | c.features.fid = cpu_to_le32(fid); |
846 | c.features.dword11 = cpu_to_le32(dword11); | ||
847 | 848 | ||
848 | return nvme_submit_admin_cmd(dev, &c, NULL); | 849 | return nvme_submit_admin_cmd(dev, &c, NULL); |
849 | } | 850 | } |
@@ -862,11 +863,51 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid, | |||
862 | return nvme_submit_admin_cmd(dev, &c, result); | 863 | return nvme_submit_admin_cmd(dev, &c, result); |
863 | } | 864 | } |
864 | 865 | ||
866 | /** | ||
867 | * nvme_cancel_ios - Cancel outstanding I/Os | ||
868 | * @queue: The queue to cancel I/Os on | ||
869 | * @timeout: True to only cancel I/Os which have timed out | ||
870 | */ | ||
871 | static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) | ||
872 | { | ||
873 | int depth = nvmeq->q_depth - 1; | ||
874 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
875 | unsigned long now = jiffies; | ||
876 | int cmdid; | ||
877 | |||
878 | for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | ||
879 | void *ctx; | ||
880 | nvme_completion_fn fn; | ||
881 | static struct nvme_completion cqe = { | ||
882 | .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, | ||
883 | }; | ||
884 | |||
885 | if (timeout && !time_after(now, info[cmdid].timeout)) | ||
886 | continue; | ||
887 | dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); | ||
888 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); | ||
889 | fn(nvmeq->dev, ctx, &cqe); | ||
890 | } | ||
891 | } | ||
892 | |||
893 | static void nvme_free_queue_mem(struct nvme_queue *nvmeq) | ||
894 | { | ||
895 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | ||
896 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
897 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
898 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
899 | kfree(nvmeq); | ||
900 | } | ||
901 | |||
865 | static void nvme_free_queue(struct nvme_dev *dev, int qid) | 902 | static void nvme_free_queue(struct nvme_dev *dev, int qid) |
866 | { | 903 | { |
867 | struct nvme_queue *nvmeq = dev->queues[qid]; | 904 | struct nvme_queue *nvmeq = dev->queues[qid]; |
868 | int vector = dev->entry[nvmeq->cq_vector].vector; | 905 | int vector = dev->entry[nvmeq->cq_vector].vector; |
869 | 906 | ||
907 | spin_lock_irq(&nvmeq->q_lock); | ||
908 | nvme_cancel_ios(nvmeq, false); | ||
909 | spin_unlock_irq(&nvmeq->q_lock); | ||
910 | |||
870 | irq_set_affinity_hint(vector, NULL); | 911 | irq_set_affinity_hint(vector, NULL); |
871 | free_irq(vector, nvmeq); | 912 | free_irq(vector, nvmeq); |
872 | 913 | ||
@@ -876,18 +917,15 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) | |||
876 | adapter_delete_cq(dev, qid); | 917 | adapter_delete_cq(dev, qid); |
877 | } | 918 | } |
878 | 919 | ||
879 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | 920 | nvme_free_queue_mem(nvmeq); |
880 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
881 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
882 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
883 | kfree(nvmeq); | ||
884 | } | 921 | } |
885 | 922 | ||
886 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | 923 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, |
887 | int depth, int vector) | 924 | int depth, int vector) |
888 | { | 925 | { |
889 | struct device *dmadev = &dev->pci_dev->dev; | 926 | struct device *dmadev = &dev->pci_dev->dev; |
890 | unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); | 927 | unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * |
928 | sizeof(struct nvme_cmd_info)); | ||
891 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); | 929 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); |
892 | if (!nvmeq) | 930 | if (!nvmeq) |
893 | return NULL; | 931 | return NULL; |
@@ -975,7 +1013,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, | |||
975 | 1013 | ||
976 | static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) | 1014 | static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) |
977 | { | 1015 | { |
978 | int result; | 1016 | int result = 0; |
979 | u32 aqa; | 1017 | u32 aqa; |
980 | u64 cap; | 1018 | u64 cap; |
981 | unsigned long timeout; | 1019 | unsigned long timeout; |
@@ -1005,17 +1043,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1005 | timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; | 1043 | timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; |
1006 | dev->db_stride = NVME_CAP_STRIDE(cap); | 1044 | dev->db_stride = NVME_CAP_STRIDE(cap); |
1007 | 1045 | ||
1008 | while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { | 1046 | while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { |
1009 | msleep(100); | 1047 | msleep(100); |
1010 | if (fatal_signal_pending(current)) | 1048 | if (fatal_signal_pending(current)) |
1011 | return -EINTR; | 1049 | result = -EINTR; |
1012 | if (time_after(jiffies, timeout)) { | 1050 | if (time_after(jiffies, timeout)) { |
1013 | dev_err(&dev->pci_dev->dev, | 1051 | dev_err(&dev->pci_dev->dev, |
1014 | "Device not ready; aborting initialisation\n"); | 1052 | "Device not ready; aborting initialisation\n"); |
1015 | return -ENODEV; | 1053 | result = -ENODEV; |
1016 | } | 1054 | } |
1017 | } | 1055 | } |
1018 | 1056 | ||
1057 | if (result) { | ||
1058 | nvme_free_queue_mem(nvmeq); | ||
1059 | return result; | ||
1060 | } | ||
1061 | |||
1019 | result = queue_request_irq(dev, nvmeq, "nvme admin"); | 1062 | result = queue_request_irq(dev, nvmeq, "nvme admin"); |
1020 | dev->queues[0] = nvmeq; | 1063 | dev->queues[0] = nvmeq; |
1021 | return result; | 1064 | return result; |
@@ -1037,6 +1080,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, | |||
1037 | offset = offset_in_page(addr); | 1080 | offset = offset_in_page(addr); |
1038 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); | 1081 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); |
1039 | pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); | 1082 | pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); |
1083 | if (!pages) | ||
1084 | return ERR_PTR(-ENOMEM); | ||
1040 | 1085 | ||
1041 | err = get_user_pages_fast(addr, count, 1, pages); | 1086 | err = get_user_pages_fast(addr, count, 1, pages); |
1042 | if (err < count) { | 1087 | if (err < count) { |
@@ -1146,14 +1191,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1146 | return status; | 1191 | return status; |
1147 | } | 1192 | } |
1148 | 1193 | ||
1149 | static int nvme_user_admin_cmd(struct nvme_ns *ns, | 1194 | static int nvme_user_admin_cmd(struct nvme_dev *dev, |
1150 | struct nvme_admin_cmd __user *ucmd) | 1195 | struct nvme_admin_cmd __user *ucmd) |
1151 | { | 1196 | { |
1152 | struct nvme_dev *dev = ns->dev; | ||
1153 | struct nvme_admin_cmd cmd; | 1197 | struct nvme_admin_cmd cmd; |
1154 | struct nvme_command c; | 1198 | struct nvme_command c; |
1155 | int status, length; | 1199 | int status, length; |
1156 | struct nvme_iod *iod; | 1200 | struct nvme_iod *uninitialized_var(iod); |
1157 | 1201 | ||
1158 | if (!capable(CAP_SYS_ADMIN)) | 1202 | if (!capable(CAP_SYS_ADMIN)) |
1159 | return -EACCES; | 1203 | return -EACCES; |
@@ -1204,7 +1248,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | |||
1204 | case NVME_IOCTL_ID: | 1248 | case NVME_IOCTL_ID: |
1205 | return ns->ns_id; | 1249 | return ns->ns_id; |
1206 | case NVME_IOCTL_ADMIN_CMD: | 1250 | case NVME_IOCTL_ADMIN_CMD: |
1207 | return nvme_user_admin_cmd(ns, (void __user *)arg); | 1251 | return nvme_user_admin_cmd(ns->dev, (void __user *)arg); |
1208 | case NVME_IOCTL_SUBMIT_IO: | 1252 | case NVME_IOCTL_SUBMIT_IO: |
1209 | return nvme_submit_io(ns, (void __user *)arg); | 1253 | return nvme_submit_io(ns, (void __user *)arg); |
1210 | default: | 1254 | default: |
@@ -1218,26 +1262,6 @@ static const struct block_device_operations nvme_fops = { | |||
1218 | .compat_ioctl = nvme_ioctl, | 1262 | .compat_ioctl = nvme_ioctl, |
1219 | }; | 1263 | }; |
1220 | 1264 | ||
1221 | static void nvme_timeout_ios(struct nvme_queue *nvmeq) | ||
1222 | { | ||
1223 | int depth = nvmeq->q_depth - 1; | ||
1224 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
1225 | unsigned long now = jiffies; | ||
1226 | int cmdid; | ||
1227 | |||
1228 | for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | ||
1229 | void *ctx; | ||
1230 | nvme_completion_fn fn; | ||
1231 | static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; | ||
1232 | |||
1233 | if (!time_after(now, info[cmdid].timeout)) | ||
1234 | continue; | ||
1235 | dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); | ||
1236 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); | ||
1237 | fn(nvmeq->dev, ctx, &cqe); | ||
1238 | } | ||
1239 | } | ||
1240 | |||
1241 | static void nvme_resubmit_bios(struct nvme_queue *nvmeq) | 1265 | static void nvme_resubmit_bios(struct nvme_queue *nvmeq) |
1242 | { | 1266 | { |
1243 | while (bio_list_peek(&nvmeq->sq_cong)) { | 1267 | while (bio_list_peek(&nvmeq->sq_cong)) { |
@@ -1269,7 +1293,7 @@ static int nvme_kthread(void *data) | |||
1269 | spin_lock_irq(&nvmeq->q_lock); | 1293 | spin_lock_irq(&nvmeq->q_lock); |
1270 | if (nvme_process_cq(nvmeq)) | 1294 | if (nvme_process_cq(nvmeq)) |
1271 | printk("process_cq did something\n"); | 1295 | printk("process_cq did something\n"); |
1272 | nvme_timeout_ios(nvmeq); | 1296 | nvme_cancel_ios(nvmeq, true); |
1273 | nvme_resubmit_bios(nvmeq); | 1297 | nvme_resubmit_bios(nvmeq); |
1274 | spin_unlock_irq(&nvmeq->q_lock); | 1298 | spin_unlock_irq(&nvmeq->q_lock); |
1275 | } | 1299 | } |
@@ -1339,6 +1363,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, | |||
1339 | ns->disk = disk; | 1363 | ns->disk = disk; |
1340 | lbaf = id->flbas & 0xf; | 1364 | lbaf = id->flbas & 0xf; |
1341 | ns->lba_shift = id->lbaf[lbaf].ds; | 1365 | ns->lba_shift = id->lbaf[lbaf].ds; |
1366 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | ||
1367 | if (dev->max_hw_sectors) | ||
1368 | blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); | ||
1342 | 1369 | ||
1343 | disk->major = nvme_major; | 1370 | disk->major = nvme_major; |
1344 | disk->minors = NVME_MINORS; | 1371 | disk->minors = NVME_MINORS; |
@@ -1383,7 +1410,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) | |||
1383 | 1410 | ||
1384 | static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) | 1411 | static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) |
1385 | { | 1412 | { |
1386 | int result, cpu, i, nr_io_queues, db_bar_size; | 1413 | int result, cpu, i, nr_io_queues, db_bar_size, q_depth; |
1387 | 1414 | ||
1388 | nr_io_queues = num_online_cpus(); | 1415 | nr_io_queues = num_online_cpus(); |
1389 | result = set_queue_count(dev, nr_io_queues); | 1416 | result = set_queue_count(dev, nr_io_queues); |
@@ -1429,9 +1456,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) | |||
1429 | cpu = cpumask_next(cpu, cpu_online_mask); | 1456 | cpu = cpumask_next(cpu, cpu_online_mask); |
1430 | } | 1457 | } |
1431 | 1458 | ||
1459 | q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, | ||
1460 | NVME_Q_DEPTH); | ||
1432 | for (i = 0; i < nr_io_queues; i++) { | 1461 | for (i = 0; i < nr_io_queues; i++) { |
1433 | dev->queues[i + 1] = nvme_create_queue(dev, i + 1, | 1462 | dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); |
1434 | NVME_Q_DEPTH, i); | ||
1435 | if (IS_ERR(dev->queues[i + 1])) | 1463 | if (IS_ERR(dev->queues[i + 1])) |
1436 | return PTR_ERR(dev->queues[i + 1]); | 1464 | return PTR_ERR(dev->queues[i + 1]); |
1437 | dev->queue_count++; | 1465 | dev->queue_count++; |
@@ -1480,6 +1508,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev) | |||
1480 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); | 1508 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); |
1481 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); | 1509 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); |
1482 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); | 1510 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); |
1511 | if (ctrl->mdts) { | ||
1512 | int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; | ||
1513 | dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); | ||
1514 | } | ||
1483 | 1515 | ||
1484 | id_ns = mem; | 1516 | id_ns = mem; |
1485 | for (i = 1; i <= nn; i++) { | 1517 | for (i = 1; i <= nn; i++) { |
@@ -1523,8 +1555,6 @@ static int nvme_dev_remove(struct nvme_dev *dev) | |||
1523 | list_del(&dev->node); | 1555 | list_del(&dev->node); |
1524 | spin_unlock(&dev_list_lock); | 1556 | spin_unlock(&dev_list_lock); |
1525 | 1557 | ||
1526 | /* TODO: wait all I/O finished or cancel them */ | ||
1527 | |||
1528 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { | 1558 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { |
1529 | list_del(&ns->list); | 1559 | list_del(&ns->list); |
1530 | del_gendisk(ns->disk); | 1560 | del_gendisk(ns->disk); |
@@ -1560,15 +1590,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) | |||
1560 | dma_pool_destroy(dev->prp_small_pool); | 1590 | dma_pool_destroy(dev->prp_small_pool); |
1561 | } | 1591 | } |
1562 | 1592 | ||
1563 | /* XXX: Use an ida or something to let remove / add work correctly */ | 1593 | static DEFINE_IDA(nvme_instance_ida); |
1564 | static void nvme_set_instance(struct nvme_dev *dev) | 1594 | |
1595 | static int nvme_set_instance(struct nvme_dev *dev) | ||
1565 | { | 1596 | { |
1566 | static int instance; | 1597 | int instance, error; |
1567 | dev->instance = instance++; | 1598 | |
1599 | do { | ||
1600 | if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) | ||
1601 | return -ENODEV; | ||
1602 | |||
1603 | spin_lock(&dev_list_lock); | ||
1604 | error = ida_get_new(&nvme_instance_ida, &instance); | ||
1605 | spin_unlock(&dev_list_lock); | ||
1606 | } while (error == -EAGAIN); | ||
1607 | |||
1608 | if (error) | ||
1609 | return -ENODEV; | ||
1610 | |||
1611 | dev->instance = instance; | ||
1612 | return 0; | ||
1568 | } | 1613 | } |
1569 | 1614 | ||
1570 | static void nvme_release_instance(struct nvme_dev *dev) | 1615 | static void nvme_release_instance(struct nvme_dev *dev) |
1571 | { | 1616 | { |
1617 | spin_lock(&dev_list_lock); | ||
1618 | ida_remove(&nvme_instance_ida, dev->instance); | ||
1619 | spin_unlock(&dev_list_lock); | ||
1572 | } | 1620 | } |
1573 | 1621 | ||
1574 | static int __devinit nvme_probe(struct pci_dev *pdev, | 1622 | static int __devinit nvme_probe(struct pci_dev *pdev, |
@@ -1601,7 +1649,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev, | |||
1601 | pci_set_drvdata(pdev, dev); | 1649 | pci_set_drvdata(pdev, dev); |
1602 | dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); | 1650 | dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); |
1603 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); | 1651 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); |
1604 | nvme_set_instance(dev); | 1652 | result = nvme_set_instance(dev); |
1653 | if (result) | ||
1654 | goto disable; | ||
1655 | |||
1605 | dev->entry[0].vector = pdev->irq; | 1656 | dev->entry[0].vector = pdev->irq; |
1606 | 1657 | ||
1607 | result = nvme_setup_prp_pools(dev); | 1658 | result = nvme_setup_prp_pools(dev); |
@@ -1675,7 +1726,7 @@ static void __devexit nvme_remove(struct pci_dev *pdev) | |||
1675 | #define nvme_suspend NULL | 1726 | #define nvme_suspend NULL |
1676 | #define nvme_resume NULL | 1727 | #define nvme_resume NULL |
1677 | 1728 | ||
1678 | static struct pci_error_handlers nvme_err_handler = { | 1729 | static const struct pci_error_handlers nvme_err_handler = { |
1679 | .error_detected = nvme_error_detected, | 1730 | .error_detected = nvme_error_detected, |
1680 | .mmio_enabled = nvme_dump_registers, | 1731 | .mmio_enabled = nvme_dump_registers, |
1681 | .link_reset = nvme_link_reset, | 1732 | .link_reset = nvme_link_reset, |
@@ -1704,15 +1755,17 @@ static struct pci_driver nvme_driver = { | |||
1704 | 1755 | ||
1705 | static int __init nvme_init(void) | 1756 | static int __init nvme_init(void) |
1706 | { | 1757 | { |
1707 | int result = -EBUSY; | 1758 | int result; |
1708 | 1759 | ||
1709 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); | 1760 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); |
1710 | if (IS_ERR(nvme_thread)) | 1761 | if (IS_ERR(nvme_thread)) |
1711 | return PTR_ERR(nvme_thread); | 1762 | return PTR_ERR(nvme_thread); |
1712 | 1763 | ||
1713 | nvme_major = register_blkdev(nvme_major, "nvme"); | 1764 | result = register_blkdev(nvme_major, "nvme"); |
1714 | if (nvme_major <= 0) | 1765 | if (result < 0) |
1715 | goto kill_kthread; | 1766 | goto kill_kthread; |
1767 | else if (result > 0) | ||
1768 | nvme_major = result; | ||
1716 | 1769 | ||
1717 | result = pci_register_driver(&nvme_driver); | 1770 | result = pci_register_driver(&nvme_driver); |
1718 | if (result) | 1771 | if (result) |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9917943a3572..bb3d9be3b1b4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -41,6 +41,8 @@ | |||
41 | 41 | ||
42 | #include "rbd_types.h" | 42 | #include "rbd_types.h" |
43 | 43 | ||
44 | #define RBD_DEBUG /* Activate rbd_assert() calls */ | ||
45 | |||
44 | /* | 46 | /* |
45 | * The basic unit of block I/O is a sector. It is interpreted in a | 47 | * The basic unit of block I/O is a sector. It is interpreted in a |
46 | * number of contexts in Linux (blk, bio, genhd), but the default is | 48 | * number of contexts in Linux (blk, bio, genhd), but the default is |
@@ -50,16 +52,24 @@ | |||
50 | #define SECTOR_SHIFT 9 | 52 | #define SECTOR_SHIFT 9 |
51 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) | 53 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) |
52 | 54 | ||
55 | /* It might be useful to have this defined elsewhere too */ | ||
56 | |||
57 | #define U64_MAX ((u64) (~0ULL)) | ||
58 | |||
53 | #define RBD_DRV_NAME "rbd" | 59 | #define RBD_DRV_NAME "rbd" |
54 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" | 60 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" |
55 | 61 | ||
56 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ | 62 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ |
57 | 63 | ||
58 | #define RBD_MAX_SNAP_NAME_LEN 32 | 64 | #define RBD_MAX_SNAP_NAME_LEN 32 |
65 | #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ | ||
59 | #define RBD_MAX_OPT_LEN 1024 | 66 | #define RBD_MAX_OPT_LEN 1024 |
60 | 67 | ||
61 | #define RBD_SNAP_HEAD_NAME "-" | 68 | #define RBD_SNAP_HEAD_NAME "-" |
62 | 69 | ||
70 | #define RBD_IMAGE_ID_LEN_MAX 64 | ||
71 | #define RBD_OBJ_PREFIX_LEN_MAX 64 | ||
72 | |||
63 | /* | 73 | /* |
64 | * An RBD device name will be "rbd#", where the "rbd" comes from | 74 | * An RBD device name will be "rbd#", where the "rbd" comes from |
65 | * RBD_DRV_NAME above, and # is a unique integer identifier. | 75 | * RBD_DRV_NAME above, and # is a unique integer identifier. |
@@ -69,21 +79,22 @@ | |||
69 | #define DEV_NAME_LEN 32 | 79 | #define DEV_NAME_LEN 32 |
70 | #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) | 80 | #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) |
71 | 81 | ||
72 | #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 | 82 | #define RBD_READ_ONLY_DEFAULT false |
73 | 83 | ||
74 | /* | 84 | /* |
75 | * block device image metadata (in-memory version) | 85 | * block device image metadata (in-memory version) |
76 | */ | 86 | */ |
77 | struct rbd_image_header { | 87 | struct rbd_image_header { |
78 | u64 image_size; | 88 | /* These four fields never change for a given rbd image */ |
79 | char *object_prefix; | 89 | char *object_prefix; |
90 | u64 features; | ||
80 | __u8 obj_order; | 91 | __u8 obj_order; |
81 | __u8 crypt_type; | 92 | __u8 crypt_type; |
82 | __u8 comp_type; | 93 | __u8 comp_type; |
83 | struct ceph_snap_context *snapc; | ||
84 | size_t snap_names_len; | ||
85 | u32 total_snaps; | ||
86 | 94 | ||
95 | /* The remaining fields need to be updated occasionally */ | ||
96 | u64 image_size; | ||
97 | struct ceph_snap_context *snapc; | ||
87 | char *snap_names; | 98 | char *snap_names; |
88 | u64 *snap_sizes; | 99 | u64 *snap_sizes; |
89 | 100 | ||
@@ -91,7 +102,7 @@ struct rbd_image_header { | |||
91 | }; | 102 | }; |
92 | 103 | ||
93 | struct rbd_options { | 104 | struct rbd_options { |
94 | int notify_timeout; | 105 | bool read_only; |
95 | }; | 106 | }; |
96 | 107 | ||
97 | /* | 108 | /* |
@@ -99,7 +110,6 @@ struct rbd_options { | |||
99 | */ | 110 | */ |
100 | struct rbd_client { | 111 | struct rbd_client { |
101 | struct ceph_client *client; | 112 | struct ceph_client *client; |
102 | struct rbd_options *rbd_opts; | ||
103 | struct kref kref; | 113 | struct kref kref; |
104 | struct list_head node; | 114 | struct list_head node; |
105 | }; | 115 | }; |
@@ -141,6 +151,16 @@ struct rbd_snap { | |||
141 | u64 size; | 151 | u64 size; |
142 | struct list_head node; | 152 | struct list_head node; |
143 | u64 id; | 153 | u64 id; |
154 | u64 features; | ||
155 | }; | ||
156 | |||
157 | struct rbd_mapping { | ||
158 | char *snap_name; | ||
159 | u64 snap_id; | ||
160 | u64 size; | ||
161 | u64 features; | ||
162 | bool snap_exists; | ||
163 | bool read_only; | ||
144 | }; | 164 | }; |
145 | 165 | ||
146 | /* | 166 | /* |
@@ -151,8 +171,9 @@ struct rbd_device { | |||
151 | 171 | ||
152 | int major; /* blkdev assigned major */ | 172 | int major; /* blkdev assigned major */ |
153 | struct gendisk *disk; /* blkdev's gendisk and rq */ | 173 | struct gendisk *disk; /* blkdev's gendisk and rq */ |
154 | struct request_queue *q; | ||
155 | 174 | ||
175 | u32 image_format; /* Either 1 or 2 */ | ||
176 | struct rbd_options rbd_opts; | ||
156 | struct rbd_client *rbd_client; | 177 | struct rbd_client *rbd_client; |
157 | 178 | ||
158 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ | 179 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ |
@@ -160,6 +181,8 @@ struct rbd_device { | |||
160 | spinlock_t lock; /* queue lock */ | 181 | spinlock_t lock; /* queue lock */ |
161 | 182 | ||
162 | struct rbd_image_header header; | 183 | struct rbd_image_header header; |
184 | char *image_id; | ||
185 | size_t image_id_len; | ||
163 | char *image_name; | 186 | char *image_name; |
164 | size_t image_name_len; | 187 | size_t image_name_len; |
165 | char *header_name; | 188 | char *header_name; |
@@ -171,13 +194,8 @@ struct rbd_device { | |||
171 | 194 | ||
172 | /* protects updating the header */ | 195 | /* protects updating the header */ |
173 | struct rw_semaphore header_rwsem; | 196 | struct rw_semaphore header_rwsem; |
174 | /* name of the snapshot this device reads from */ | 197 | |
175 | char *snap_name; | 198 | struct rbd_mapping mapping; |
176 | /* id of the snapshot this device reads from */ | ||
177 | u64 snap_id; /* current snapshot id */ | ||
178 | /* whether the snap_id this device reads from still exists */ | ||
179 | bool snap_exists; | ||
180 | int read_only; | ||
181 | 199 | ||
182 | struct list_head node; | 200 | struct list_head node; |
183 | 201 | ||
@@ -196,12 +214,10 @@ static DEFINE_SPINLOCK(rbd_dev_list_lock); | |||
196 | static LIST_HEAD(rbd_client_list); /* clients */ | 214 | static LIST_HEAD(rbd_client_list); /* clients */ |
197 | static DEFINE_SPINLOCK(rbd_client_list_lock); | 215 | static DEFINE_SPINLOCK(rbd_client_list_lock); |
198 | 216 | ||
199 | static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); | 217 | static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); |
218 | static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); | ||
219 | |||
200 | static void rbd_dev_release(struct device *dev); | 220 | static void rbd_dev_release(struct device *dev); |
201 | static ssize_t rbd_snap_add(struct device *dev, | ||
202 | struct device_attribute *attr, | ||
203 | const char *buf, | ||
204 | size_t count); | ||
205 | static void __rbd_remove_snap_dev(struct rbd_snap *snap); | 221 | static void __rbd_remove_snap_dev(struct rbd_snap *snap); |
206 | 222 | ||
207 | static ssize_t rbd_add(struct bus_type *bus, const char *buf, | 223 | static ssize_t rbd_add(struct bus_type *bus, const char *buf, |
@@ -229,6 +245,18 @@ static struct device rbd_root_dev = { | |||
229 | .release = rbd_root_dev_release, | 245 | .release = rbd_root_dev_release, |
230 | }; | 246 | }; |
231 | 247 | ||
248 | #ifdef RBD_DEBUG | ||
249 | #define rbd_assert(expr) \ | ||
250 | if (unlikely(!(expr))) { \ | ||
251 | printk(KERN_ERR "\nAssertion failure in %s() " \ | ||
252 | "at line %d:\n\n" \ | ||
253 | "\trbd_assert(%s);\n\n", \ | ||
254 | __func__, __LINE__, #expr); \ | ||
255 | BUG(); \ | ||
256 | } | ||
257 | #else /* !RBD_DEBUG */ | ||
258 | # define rbd_assert(expr) ((void) 0) | ||
259 | #endif /* !RBD_DEBUG */ | ||
232 | 260 | ||
233 | static struct device *rbd_get_dev(struct rbd_device *rbd_dev) | 261 | static struct device *rbd_get_dev(struct rbd_device *rbd_dev) |
234 | { | 262 | { |
@@ -246,13 +274,12 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) | |||
246 | { | 274 | { |
247 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; | 275 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; |
248 | 276 | ||
249 | rbd_get_dev(rbd_dev); | 277 | if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) |
250 | |||
251 | set_device_ro(bdev, rbd_dev->read_only); | ||
252 | |||
253 | if ((mode & FMODE_WRITE) && rbd_dev->read_only) | ||
254 | return -EROFS; | 278 | return -EROFS; |
255 | 279 | ||
280 | rbd_get_dev(rbd_dev); | ||
281 | set_device_ro(bdev, rbd_dev->mapping.read_only); | ||
282 | |||
256 | return 0; | 283 | return 0; |
257 | } | 284 | } |
258 | 285 | ||
@@ -275,8 +302,7 @@ static const struct block_device_operations rbd_bd_ops = { | |||
275 | * Initialize an rbd client instance. | 302 | * Initialize an rbd client instance. |
276 | * We own *ceph_opts. | 303 | * We own *ceph_opts. |
277 | */ | 304 | */ |
278 | static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts, | 305 | static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) |
279 | struct rbd_options *rbd_opts) | ||
280 | { | 306 | { |
281 | struct rbd_client *rbdc; | 307 | struct rbd_client *rbdc; |
282 | int ret = -ENOMEM; | 308 | int ret = -ENOMEM; |
@@ -300,8 +326,6 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts, | |||
300 | if (ret < 0) | 326 | if (ret < 0) |
301 | goto out_err; | 327 | goto out_err; |
302 | 328 | ||
303 | rbdc->rbd_opts = rbd_opts; | ||
304 | |||
305 | spin_lock(&rbd_client_list_lock); | 329 | spin_lock(&rbd_client_list_lock); |
306 | list_add_tail(&rbdc->node, &rbd_client_list); | 330 | list_add_tail(&rbdc->node, &rbd_client_list); |
307 | spin_unlock(&rbd_client_list_lock); | 331 | spin_unlock(&rbd_client_list_lock); |
@@ -323,36 +347,52 @@ out_opt: | |||
323 | } | 347 | } |
324 | 348 | ||
325 | /* | 349 | /* |
326 | * Find a ceph client with specific addr and configuration. | 350 | * Find a ceph client with specific addr and configuration. If |
351 | * found, bump its reference count. | ||
327 | */ | 352 | */ |
328 | static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts) | 353 | static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) |
329 | { | 354 | { |
330 | struct rbd_client *client_node; | 355 | struct rbd_client *client_node; |
356 | bool found = false; | ||
331 | 357 | ||
332 | if (ceph_opts->flags & CEPH_OPT_NOSHARE) | 358 | if (ceph_opts->flags & CEPH_OPT_NOSHARE) |
333 | return NULL; | 359 | return NULL; |
334 | 360 | ||
335 | list_for_each_entry(client_node, &rbd_client_list, node) | 361 | spin_lock(&rbd_client_list_lock); |
336 | if (!ceph_compare_options(ceph_opts, client_node->client)) | 362 | list_for_each_entry(client_node, &rbd_client_list, node) { |
337 | return client_node; | 363 | if (!ceph_compare_options(ceph_opts, client_node->client)) { |
338 | return NULL; | 364 | kref_get(&client_node->kref); |
365 | found = true; | ||
366 | break; | ||
367 | } | ||
368 | } | ||
369 | spin_unlock(&rbd_client_list_lock); | ||
370 | |||
371 | return found ? client_node : NULL; | ||
339 | } | 372 | } |
340 | 373 | ||
341 | /* | 374 | /* |
342 | * mount options | 375 | * mount options |
343 | */ | 376 | */ |
344 | enum { | 377 | enum { |
345 | Opt_notify_timeout, | ||
346 | Opt_last_int, | 378 | Opt_last_int, |
347 | /* int args above */ | 379 | /* int args above */ |
348 | Opt_last_string, | 380 | Opt_last_string, |
349 | /* string args above */ | 381 | /* string args above */ |
382 | Opt_read_only, | ||
383 | Opt_read_write, | ||
384 | /* Boolean args above */ | ||
385 | Opt_last_bool, | ||
350 | }; | 386 | }; |
351 | 387 | ||
352 | static match_table_t rbd_opts_tokens = { | 388 | static match_table_t rbd_opts_tokens = { |
353 | {Opt_notify_timeout, "notify_timeout=%d"}, | ||
354 | /* int args above */ | 389 | /* int args above */ |
355 | /* string args above */ | 390 | /* string args above */ |
391 | {Opt_read_only, "mapping.read_only"}, | ||
392 | {Opt_read_only, "ro"}, /* Alternate spelling */ | ||
393 | {Opt_read_write, "read_write"}, | ||
394 | {Opt_read_write, "rw"}, /* Alternate spelling */ | ||
395 | /* Boolean args above */ | ||
356 | {-1, NULL} | 396 | {-1, NULL} |
357 | }; | 397 | }; |
358 | 398 | ||
@@ -377,16 +417,22 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
377 | } else if (token > Opt_last_int && token < Opt_last_string) { | 417 | } else if (token > Opt_last_int && token < Opt_last_string) { |
378 | dout("got string token %d val %s\n", token, | 418 | dout("got string token %d val %s\n", token, |
379 | argstr[0].from); | 419 | argstr[0].from); |
420 | } else if (token > Opt_last_string && token < Opt_last_bool) { | ||
421 | dout("got Boolean token %d\n", token); | ||
380 | } else { | 422 | } else { |
381 | dout("got token %d\n", token); | 423 | dout("got token %d\n", token); |
382 | } | 424 | } |
383 | 425 | ||
384 | switch (token) { | 426 | switch (token) { |
385 | case Opt_notify_timeout: | 427 | case Opt_read_only: |
386 | rbd_opts->notify_timeout = intval; | 428 | rbd_opts->read_only = true; |
429 | break; | ||
430 | case Opt_read_write: | ||
431 | rbd_opts->read_only = false; | ||
387 | break; | 432 | break; |
388 | default: | 433 | default: |
389 | BUG_ON(token); | 434 | rbd_assert(false); |
435 | break; | ||
390 | } | 436 | } |
391 | return 0; | 437 | return 0; |
392 | } | 438 | } |
@@ -395,48 +441,33 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
395 | * Get a ceph client with specific addr and configuration, if one does | 441 | * Get a ceph client with specific addr and configuration, if one does |
396 | * not exist create it. | 442 | * not exist create it. |
397 | */ | 443 | */ |
398 | static struct rbd_client *rbd_get_client(const char *mon_addr, | 444 | static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, |
399 | size_t mon_addr_len, | 445 | size_t mon_addr_len, char *options) |
400 | char *options) | ||
401 | { | 446 | { |
402 | struct rbd_client *rbdc; | 447 | struct rbd_options *rbd_opts = &rbd_dev->rbd_opts; |
403 | struct ceph_options *ceph_opts; | 448 | struct ceph_options *ceph_opts; |
404 | struct rbd_options *rbd_opts; | 449 | struct rbd_client *rbdc; |
405 | |||
406 | rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); | ||
407 | if (!rbd_opts) | ||
408 | return ERR_PTR(-ENOMEM); | ||
409 | 450 | ||
410 | rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; | 451 | rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; |
411 | 452 | ||
412 | ceph_opts = ceph_parse_options(options, mon_addr, | 453 | ceph_opts = ceph_parse_options(options, mon_addr, |
413 | mon_addr + mon_addr_len, | 454 | mon_addr + mon_addr_len, |
414 | parse_rbd_opts_token, rbd_opts); | 455 | parse_rbd_opts_token, rbd_opts); |
415 | if (IS_ERR(ceph_opts)) { | 456 | if (IS_ERR(ceph_opts)) |
416 | kfree(rbd_opts); | 457 | return PTR_ERR(ceph_opts); |
417 | return ERR_CAST(ceph_opts); | ||
418 | } | ||
419 | 458 | ||
420 | spin_lock(&rbd_client_list_lock); | 459 | rbdc = rbd_client_find(ceph_opts); |
421 | rbdc = __rbd_client_find(ceph_opts); | ||
422 | if (rbdc) { | 460 | if (rbdc) { |
423 | /* using an existing client */ | 461 | /* using an existing client */ |
424 | kref_get(&rbdc->kref); | ||
425 | spin_unlock(&rbd_client_list_lock); | ||
426 | |||
427 | ceph_destroy_options(ceph_opts); | 462 | ceph_destroy_options(ceph_opts); |
428 | kfree(rbd_opts); | 463 | } else { |
429 | 464 | rbdc = rbd_client_create(ceph_opts); | |
430 | return rbdc; | 465 | if (IS_ERR(rbdc)) |
466 | return PTR_ERR(rbdc); | ||
431 | } | 467 | } |
432 | spin_unlock(&rbd_client_list_lock); | 468 | rbd_dev->rbd_client = rbdc; |
433 | |||
434 | rbdc = rbd_client_create(ceph_opts, rbd_opts); | ||
435 | 469 | ||
436 | if (IS_ERR(rbdc)) | 470 | return 0; |
437 | kfree(rbd_opts); | ||
438 | |||
439 | return rbdc; | ||
440 | } | 471 | } |
441 | 472 | ||
442 | /* | 473 | /* |
@@ -454,7 +485,6 @@ static void rbd_client_release(struct kref *kref) | |||
454 | spin_unlock(&rbd_client_list_lock); | 485 | spin_unlock(&rbd_client_list_lock); |
455 | 486 | ||
456 | ceph_destroy_client(rbdc->client); | 487 | ceph_destroy_client(rbdc->client); |
457 | kfree(rbdc->rbd_opts); | ||
458 | kfree(rbdc); | 488 | kfree(rbdc); |
459 | } | 489 | } |
460 | 490 | ||
@@ -480,10 +510,38 @@ static void rbd_coll_release(struct kref *kref) | |||
480 | kfree(coll); | 510 | kfree(coll); |
481 | } | 511 | } |
482 | 512 | ||
513 | static bool rbd_image_format_valid(u32 image_format) | ||
514 | { | ||
515 | return image_format == 1 || image_format == 2; | ||
516 | } | ||
517 | |||
483 | static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) | 518 | static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) |
484 | { | 519 | { |
485 | return !memcmp(&ondisk->text, | 520 | size_t size; |
486 | RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)); | 521 | u32 snap_count; |
522 | |||
523 | /* The header has to start with the magic rbd header text */ | ||
524 | if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) | ||
525 | return false; | ||
526 | |||
527 | /* | ||
528 | * The size of a snapshot header has to fit in a size_t, and | ||
529 | * that limits the number of snapshots. | ||
530 | */ | ||
531 | snap_count = le32_to_cpu(ondisk->snap_count); | ||
532 | size = SIZE_MAX - sizeof (struct ceph_snap_context); | ||
533 | if (snap_count > size / sizeof (__le64)) | ||
534 | return false; | ||
535 | |||
536 | /* | ||
537 | * Not only that, but the size of the entire the snapshot | ||
538 | * header must also be representable in a size_t. | ||
539 | */ | ||
540 | size -= snap_count * sizeof (__le64); | ||
541 | if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) | ||
542 | return false; | ||
543 | |||
544 | return true; | ||
487 | } | 545 | } |
488 | 546 | ||
489 | /* | 547 | /* |
@@ -491,179 +549,203 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) | |||
491 | * header. | 549 | * header. |
492 | */ | 550 | */ |
493 | static int rbd_header_from_disk(struct rbd_image_header *header, | 551 | static int rbd_header_from_disk(struct rbd_image_header *header, |
494 | struct rbd_image_header_ondisk *ondisk, | 552 | struct rbd_image_header_ondisk *ondisk) |
495 | u32 allocated_snaps) | ||
496 | { | 553 | { |
497 | u32 snap_count; | 554 | u32 snap_count; |
555 | size_t len; | ||
556 | size_t size; | ||
557 | u32 i; | ||
498 | 558 | ||
499 | if (!rbd_dev_ondisk_valid(ondisk)) | 559 | memset(header, 0, sizeof (*header)); |
500 | return -ENXIO; | ||
501 | 560 | ||
502 | snap_count = le32_to_cpu(ondisk->snap_count); | 561 | snap_count = le32_to_cpu(ondisk->snap_count); |
503 | if (snap_count > (SIZE_MAX - sizeof(struct ceph_snap_context)) | 562 | |
504 | / sizeof (u64)) | 563 | len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); |
505 | return -EINVAL; | 564 | header->object_prefix = kmalloc(len + 1, GFP_KERNEL); |
506 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + | 565 | if (!header->object_prefix) |
507 | snap_count * sizeof(u64), | ||
508 | GFP_KERNEL); | ||
509 | if (!header->snapc) | ||
510 | return -ENOMEM; | 566 | return -ENOMEM; |
567 | memcpy(header->object_prefix, ondisk->object_prefix, len); | ||
568 | header->object_prefix[len] = '\0'; | ||
511 | 569 | ||
512 | if (snap_count) { | 570 | if (snap_count) { |
513 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | 571 | u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); |
514 | header->snap_names = kmalloc(header->snap_names_len, | 572 | |
515 | GFP_KERNEL); | 573 | /* Save a copy of the snapshot names */ |
574 | |||
575 | if (snap_names_len > (u64) SIZE_MAX) | ||
576 | return -EIO; | ||
577 | header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); | ||
516 | if (!header->snap_names) | 578 | if (!header->snap_names) |
517 | goto err_snapc; | 579 | goto out_err; |
518 | header->snap_sizes = kmalloc(snap_count * sizeof(u64), | 580 | /* |
519 | GFP_KERNEL); | 581 | * Note that rbd_dev_v1_header_read() guarantees |
582 | * the ondisk buffer we're working with has | ||
583 | * snap_names_len bytes beyond the end of the | ||
584 | * snapshot id array, this memcpy() is safe. | ||
585 | */ | ||
586 | memcpy(header->snap_names, &ondisk->snaps[snap_count], | ||
587 | snap_names_len); | ||
588 | |||
589 | /* Record each snapshot's size */ | ||
590 | |||
591 | size = snap_count * sizeof (*header->snap_sizes); | ||
592 | header->snap_sizes = kmalloc(size, GFP_KERNEL); | ||
520 | if (!header->snap_sizes) | 593 | if (!header->snap_sizes) |
521 | goto err_names; | 594 | goto out_err; |
595 | for (i = 0; i < snap_count; i++) | ||
596 | header->snap_sizes[i] = | ||
597 | le64_to_cpu(ondisk->snaps[i].image_size); | ||
522 | } else { | 598 | } else { |
523 | WARN_ON(ondisk->snap_names_len); | 599 | WARN_ON(ondisk->snap_names_len); |
524 | header->snap_names_len = 0; | ||
525 | header->snap_names = NULL; | 600 | header->snap_names = NULL; |
526 | header->snap_sizes = NULL; | 601 | header->snap_sizes = NULL; |
527 | } | 602 | } |
528 | 603 | ||
529 | header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1, | 604 | header->features = 0; /* No features support in v1 images */ |
530 | GFP_KERNEL); | ||
531 | if (!header->object_prefix) | ||
532 | goto err_sizes; | ||
533 | |||
534 | memcpy(header->object_prefix, ondisk->block_name, | ||
535 | sizeof(ondisk->block_name)); | ||
536 | header->object_prefix[sizeof (ondisk->block_name)] = '\0'; | ||
537 | |||
538 | header->image_size = le64_to_cpu(ondisk->image_size); | ||
539 | header->obj_order = ondisk->options.order; | 605 | header->obj_order = ondisk->options.order; |
540 | header->crypt_type = ondisk->options.crypt_type; | 606 | header->crypt_type = ondisk->options.crypt_type; |
541 | header->comp_type = ondisk->options.comp_type; | 607 | header->comp_type = ondisk->options.comp_type; |
542 | 608 | ||
609 | /* Allocate and fill in the snapshot context */ | ||
610 | |||
611 | header->image_size = le64_to_cpu(ondisk->image_size); | ||
612 | size = sizeof (struct ceph_snap_context); | ||
613 | size += snap_count * sizeof (header->snapc->snaps[0]); | ||
614 | header->snapc = kzalloc(size, GFP_KERNEL); | ||
615 | if (!header->snapc) | ||
616 | goto out_err; | ||
617 | |||
543 | atomic_set(&header->snapc->nref, 1); | 618 | atomic_set(&header->snapc->nref, 1); |
544 | header->snapc->seq = le64_to_cpu(ondisk->snap_seq); | 619 | header->snapc->seq = le64_to_cpu(ondisk->snap_seq); |
545 | header->snapc->num_snaps = snap_count; | 620 | header->snapc->num_snaps = snap_count; |
546 | header->total_snaps = snap_count; | 621 | for (i = 0; i < snap_count; i++) |
547 | 622 | header->snapc->snaps[i] = | |
548 | if (snap_count && allocated_snaps == snap_count) { | 623 | le64_to_cpu(ondisk->snaps[i].id); |
549 | int i; | ||
550 | |||
551 | for (i = 0; i < snap_count; i++) { | ||
552 | header->snapc->snaps[i] = | ||
553 | le64_to_cpu(ondisk->snaps[i].id); | ||
554 | header->snap_sizes[i] = | ||
555 | le64_to_cpu(ondisk->snaps[i].image_size); | ||
556 | } | ||
557 | |||
558 | /* copy snapshot names */ | ||
559 | memcpy(header->snap_names, &ondisk->snaps[snap_count], | ||
560 | header->snap_names_len); | ||
561 | } | ||
562 | 624 | ||
563 | return 0; | 625 | return 0; |
564 | 626 | ||
565 | err_sizes: | 627 | out_err: |
566 | kfree(header->snap_sizes); | 628 | kfree(header->snap_sizes); |
567 | header->snap_sizes = NULL; | 629 | header->snap_sizes = NULL; |
568 | err_names: | ||
569 | kfree(header->snap_names); | 630 | kfree(header->snap_names); |
570 | header->snap_names = NULL; | 631 | header->snap_names = NULL; |
571 | err_snapc: | 632 | kfree(header->object_prefix); |
572 | kfree(header->snapc); | 633 | header->object_prefix = NULL; |
573 | header->snapc = NULL; | ||
574 | 634 | ||
575 | return -ENOMEM; | 635 | return -ENOMEM; |
576 | } | 636 | } |
577 | 637 | ||
578 | static int snap_by_name(struct rbd_image_header *header, const char *snap_name, | 638 | static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) |
579 | u64 *seq, u64 *size) | ||
580 | { | 639 | { |
581 | int i; | ||
582 | char *p = header->snap_names; | ||
583 | 640 | ||
584 | for (i = 0; i < header->total_snaps; i++) { | 641 | struct rbd_snap *snap; |
585 | if (!strcmp(snap_name, p)) { | ||
586 | 642 | ||
587 | /* Found it. Pass back its id and/or size */ | 643 | list_for_each_entry(snap, &rbd_dev->snaps, node) { |
644 | if (!strcmp(snap_name, snap->name)) { | ||
645 | rbd_dev->mapping.snap_id = snap->id; | ||
646 | rbd_dev->mapping.size = snap->size; | ||
647 | rbd_dev->mapping.features = snap->features; | ||
588 | 648 | ||
589 | if (seq) | 649 | return 0; |
590 | *seq = header->snapc->snaps[i]; | ||
591 | if (size) | ||
592 | *size = header->snap_sizes[i]; | ||
593 | return i; | ||
594 | } | 650 | } |
595 | p += strlen(p) + 1; /* Skip ahead to the next name */ | ||
596 | } | 651 | } |
652 | |||
597 | return -ENOENT; | 653 | return -ENOENT; |
598 | } | 654 | } |
599 | 655 | ||
600 | static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size) | 656 | static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) |
601 | { | 657 | { |
602 | int ret; | 658 | int ret; |
603 | 659 | ||
604 | down_write(&rbd_dev->header_rwsem); | 660 | if (!memcmp(snap_name, RBD_SNAP_HEAD_NAME, |
605 | |||
606 | if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, | ||
607 | sizeof (RBD_SNAP_HEAD_NAME))) { | 661 | sizeof (RBD_SNAP_HEAD_NAME))) { |
608 | rbd_dev->snap_id = CEPH_NOSNAP; | 662 | rbd_dev->mapping.snap_id = CEPH_NOSNAP; |
609 | rbd_dev->snap_exists = false; | 663 | rbd_dev->mapping.size = rbd_dev->header.image_size; |
610 | rbd_dev->read_only = 0; | 664 | rbd_dev->mapping.features = rbd_dev->header.features; |
611 | if (size) | 665 | rbd_dev->mapping.snap_exists = false; |
612 | *size = rbd_dev->header.image_size; | 666 | rbd_dev->mapping.read_only = rbd_dev->rbd_opts.read_only; |
667 | ret = 0; | ||
613 | } else { | 668 | } else { |
614 | u64 snap_id = 0; | 669 | ret = snap_by_name(rbd_dev, snap_name); |
615 | |||
616 | ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name, | ||
617 | &snap_id, size); | ||
618 | if (ret < 0) | 670 | if (ret < 0) |
619 | goto done; | 671 | goto done; |
620 | rbd_dev->snap_id = snap_id; | 672 | rbd_dev->mapping.snap_exists = true; |
621 | rbd_dev->snap_exists = true; | 673 | rbd_dev->mapping.read_only = true; |
622 | rbd_dev->read_only = 1; | ||
623 | } | 674 | } |
624 | 675 | rbd_dev->mapping.snap_name = snap_name; | |
625 | ret = 0; | ||
626 | done: | 676 | done: |
627 | up_write(&rbd_dev->header_rwsem); | ||
628 | return ret; | 677 | return ret; |
629 | } | 678 | } |
630 | 679 | ||
631 | static void rbd_header_free(struct rbd_image_header *header) | 680 | static void rbd_header_free(struct rbd_image_header *header) |
632 | { | 681 | { |
633 | kfree(header->object_prefix); | 682 | kfree(header->object_prefix); |
683 | header->object_prefix = NULL; | ||
634 | kfree(header->snap_sizes); | 684 | kfree(header->snap_sizes); |
685 | header->snap_sizes = NULL; | ||
635 | kfree(header->snap_names); | 686 | kfree(header->snap_names); |
687 | header->snap_names = NULL; | ||
636 | ceph_put_snap_context(header->snapc); | 688 | ceph_put_snap_context(header->snapc); |
689 | header->snapc = NULL; | ||
637 | } | 690 | } |
638 | 691 | ||
639 | /* | 692 | static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) |
640 | * get the actual striped segment name, offset and length | ||
641 | */ | ||
642 | static u64 rbd_get_segment(struct rbd_image_header *header, | ||
643 | const char *object_prefix, | ||
644 | u64 ofs, u64 len, | ||
645 | char *seg_name, u64 *segofs) | ||
646 | { | 693 | { |
647 | u64 seg = ofs >> header->obj_order; | 694 | char *name; |
695 | u64 segment; | ||
696 | int ret; | ||
648 | 697 | ||
649 | if (seg_name) | 698 | name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); |
650 | snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, | 699 | if (!name) |
651 | "%s.%012llx", object_prefix, seg); | 700 | return NULL; |
701 | segment = offset >> rbd_dev->header.obj_order; | ||
702 | ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx", | ||
703 | rbd_dev->header.object_prefix, segment); | ||
704 | if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) { | ||
705 | pr_err("error formatting segment name for #%llu (%d)\n", | ||
706 | segment, ret); | ||
707 | kfree(name); | ||
708 | name = NULL; | ||
709 | } | ||
652 | 710 | ||
653 | ofs = ofs & ((1 << header->obj_order) - 1); | 711 | return name; |
654 | len = min_t(u64, len, (1 << header->obj_order) - ofs); | 712 | } |
655 | 713 | ||
656 | if (segofs) | 714 | static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) |
657 | *segofs = ofs; | 715 | { |
716 | u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; | ||
717 | |||
718 | return offset & (segment_size - 1); | ||
719 | } | ||
720 | |||
721 | static u64 rbd_segment_length(struct rbd_device *rbd_dev, | ||
722 | u64 offset, u64 length) | ||
723 | { | ||
724 | u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; | ||
658 | 725 | ||
659 | return len; | 726 | offset &= segment_size - 1; |
727 | |||
728 | rbd_assert(length <= U64_MAX - offset); | ||
729 | if (offset + length > segment_size) | ||
730 | length = segment_size - offset; | ||
731 | |||
732 | return length; | ||
660 | } | 733 | } |
661 | 734 | ||
662 | static int rbd_get_num_segments(struct rbd_image_header *header, | 735 | static int rbd_get_num_segments(struct rbd_image_header *header, |
663 | u64 ofs, u64 len) | 736 | u64 ofs, u64 len) |
664 | { | 737 | { |
665 | u64 start_seg = ofs >> header->obj_order; | 738 | u64 start_seg; |
666 | u64 end_seg = (ofs + len - 1) >> header->obj_order; | 739 | u64 end_seg; |
740 | |||
741 | if (!len) | ||
742 | return 0; | ||
743 | if (len - 1 > U64_MAX - ofs) | ||
744 | return -ERANGE; | ||
745 | |||
746 | start_seg = ofs >> header->obj_order; | ||
747 | end_seg = (ofs + len - 1) >> header->obj_order; | ||
748 | |||
667 | return end_seg - start_seg + 1; | 749 | return end_seg - start_seg + 1; |
668 | } | 750 | } |
669 | 751 | ||
@@ -725,7 +807,9 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | |||
725 | struct bio_pair **bp, | 807 | struct bio_pair **bp, |
726 | int len, gfp_t gfpmask) | 808 | int len, gfp_t gfpmask) |
727 | { | 809 | { |
728 | struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; | 810 | struct bio *old_chain = *old; |
811 | struct bio *new_chain = NULL; | ||
812 | struct bio *tail; | ||
729 | int total = 0; | 813 | int total = 0; |
730 | 814 | ||
731 | if (*bp) { | 815 | if (*bp) { |
@@ -734,9 +818,12 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | |||
734 | } | 818 | } |
735 | 819 | ||
736 | while (old_chain && (total < len)) { | 820 | while (old_chain && (total < len)) { |
821 | struct bio *tmp; | ||
822 | |||
737 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | 823 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); |
738 | if (!tmp) | 824 | if (!tmp) |
739 | goto err_out; | 825 | goto err_out; |
826 | gfpmask &= ~__GFP_WAIT; /* can't wait after the first */ | ||
740 | 827 | ||
741 | if (total + old_chain->bi_size > len) { | 828 | if (total + old_chain->bi_size > len) { |
742 | struct bio_pair *bp; | 829 | struct bio_pair *bp; |
@@ -764,24 +851,18 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | |||
764 | } | 851 | } |
765 | 852 | ||
766 | tmp->bi_bdev = NULL; | 853 | tmp->bi_bdev = NULL; |
767 | gfpmask &= ~__GFP_WAIT; | ||
768 | tmp->bi_next = NULL; | 854 | tmp->bi_next = NULL; |
769 | 855 | if (new_chain) | |
770 | if (!new_chain) { | ||
771 | new_chain = tail = tmp; | ||
772 | } else { | ||
773 | tail->bi_next = tmp; | 856 | tail->bi_next = tmp; |
774 | tail = tmp; | 857 | else |
775 | } | 858 | new_chain = tmp; |
859 | tail = tmp; | ||
776 | old_chain = old_chain->bi_next; | 860 | old_chain = old_chain->bi_next; |
777 | 861 | ||
778 | total += tmp->bi_size; | 862 | total += tmp->bi_size; |
779 | } | 863 | } |
780 | 864 | ||
781 | BUG_ON(total < len); | 865 | rbd_assert(total == len); |
782 | |||
783 | if (tail) | ||
784 | tail->bi_next = NULL; | ||
785 | 866 | ||
786 | *old = old_chain; | 867 | *old = old_chain; |
787 | 868 | ||
@@ -939,8 +1020,9 @@ static int rbd_do_request(struct request *rq, | |||
939 | layout->fl_stripe_count = cpu_to_le32(1); | 1020 | layout->fl_stripe_count = cpu_to_le32(1); |
940 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | 1021 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); |
941 | layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); | 1022 | layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); |
942 | ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, | 1023 | ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, |
943 | req, ops); | 1024 | req, ops); |
1025 | rbd_assert(ret == 0); | ||
944 | 1026 | ||
945 | ceph_osdc_build_request(req, ofs, &len, | 1027 | ceph_osdc_build_request(req, ofs, &len, |
946 | ops, | 1028 | ops, |
@@ -1031,8 +1113,8 @@ static int rbd_req_sync_op(struct rbd_device *rbd_dev, | |||
1031 | int flags, | 1113 | int flags, |
1032 | struct ceph_osd_req_op *ops, | 1114 | struct ceph_osd_req_op *ops, |
1033 | const char *object_name, | 1115 | const char *object_name, |
1034 | u64 ofs, u64 len, | 1116 | u64 ofs, u64 inbound_size, |
1035 | char *buf, | 1117 | char *inbound, |
1036 | struct ceph_osd_request **linger_req, | 1118 | struct ceph_osd_request **linger_req, |
1037 | u64 *ver) | 1119 | u64 *ver) |
1038 | { | 1120 | { |
@@ -1040,15 +1122,15 @@ static int rbd_req_sync_op(struct rbd_device *rbd_dev, | |||
1040 | struct page **pages; | 1122 | struct page **pages; |
1041 | int num_pages; | 1123 | int num_pages; |
1042 | 1124 | ||
1043 | BUG_ON(ops == NULL); | 1125 | rbd_assert(ops != NULL); |
1044 | 1126 | ||
1045 | num_pages = calc_pages_for(ofs , len); | 1127 | num_pages = calc_pages_for(ofs, inbound_size); |
1046 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | 1128 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); |
1047 | if (IS_ERR(pages)) | 1129 | if (IS_ERR(pages)) |
1048 | return PTR_ERR(pages); | 1130 | return PTR_ERR(pages); |
1049 | 1131 | ||
1050 | ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, | 1132 | ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, |
1051 | object_name, ofs, len, NULL, | 1133 | object_name, ofs, inbound_size, NULL, |
1052 | pages, num_pages, | 1134 | pages, num_pages, |
1053 | flags, | 1135 | flags, |
1054 | ops, | 1136 | ops, |
@@ -1058,8 +1140,8 @@ static int rbd_req_sync_op(struct rbd_device *rbd_dev, | |||
1058 | if (ret < 0) | 1140 | if (ret < 0) |
1059 | goto done; | 1141 | goto done; |
1060 | 1142 | ||
1061 | if ((flags & CEPH_OSD_FLAG_READ) && buf) | 1143 | if ((flags & CEPH_OSD_FLAG_READ) && inbound) |
1062 | ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); | 1144 | ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); |
1063 | 1145 | ||
1064 | done: | 1146 | done: |
1065 | ceph_release_page_vector(pages, num_pages); | 1147 | ceph_release_page_vector(pages, num_pages); |
@@ -1086,14 +1168,11 @@ static int rbd_do_op(struct request *rq, | |||
1086 | struct ceph_osd_req_op *ops; | 1168 | struct ceph_osd_req_op *ops; |
1087 | u32 payload_len; | 1169 | u32 payload_len; |
1088 | 1170 | ||
1089 | seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); | 1171 | seg_name = rbd_segment_name(rbd_dev, ofs); |
1090 | if (!seg_name) | 1172 | if (!seg_name) |
1091 | return -ENOMEM; | 1173 | return -ENOMEM; |
1092 | 1174 | seg_len = rbd_segment_length(rbd_dev, ofs, len); | |
1093 | seg_len = rbd_get_segment(&rbd_dev->header, | 1175 | seg_ofs = rbd_segment_offset(rbd_dev, ofs); |
1094 | rbd_dev->header.object_prefix, | ||
1095 | ofs, len, | ||
1096 | seg_name, &seg_ofs); | ||
1097 | 1176 | ||
1098 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); | 1177 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); |
1099 | 1178 | ||
@@ -1105,7 +1184,7 @@ static int rbd_do_op(struct request *rq, | |||
1105 | /* we've taken care of segment sizes earlier when we | 1184 | /* we've taken care of segment sizes earlier when we |
1106 | cloned the bios. We should never have a segment | 1185 | cloned the bios. We should never have a segment |
1107 | truncated at this point */ | 1186 | truncated at this point */ |
1108 | BUG_ON(seg_len < len); | 1187 | rbd_assert(seg_len == len); |
1109 | 1188 | ||
1110 | ret = rbd_do_request(rq, rbd_dev, snapc, snapid, | 1189 | ret = rbd_do_request(rq, rbd_dev, snapc, snapid, |
1111 | seg_name, seg_ofs, seg_len, | 1190 | seg_name, seg_ofs, seg_len, |
@@ -1307,89 +1386,36 @@ static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) | |||
1307 | return ret; | 1386 | return ret; |
1308 | } | 1387 | } |
1309 | 1388 | ||
1310 | struct rbd_notify_info { | ||
1311 | struct rbd_device *rbd_dev; | ||
1312 | }; | ||
1313 | |||
1314 | static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | ||
1315 | { | ||
1316 | struct rbd_device *rbd_dev = (struct rbd_device *)data; | ||
1317 | if (!rbd_dev) | ||
1318 | return; | ||
1319 | |||
1320 | dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n", | ||
1321 | rbd_dev->header_name, (unsigned long long) notify_id, | ||
1322 | (unsigned int) opcode); | ||
1323 | } | ||
1324 | |||
1325 | /* | ||
1326 | * Request sync osd notify | ||
1327 | */ | ||
1328 | static int rbd_req_sync_notify(struct rbd_device *rbd_dev) | ||
1329 | { | ||
1330 | struct ceph_osd_req_op *ops; | ||
1331 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; | ||
1332 | struct ceph_osd_event *event; | ||
1333 | struct rbd_notify_info info; | ||
1334 | int payload_len = sizeof(u32) + sizeof(u32); | ||
1335 | int ret; | ||
1336 | |||
1337 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len); | ||
1338 | if (!ops) | ||
1339 | return -ENOMEM; | ||
1340 | |||
1341 | info.rbd_dev = rbd_dev; | ||
1342 | |||
1343 | ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, | ||
1344 | (void *)&info, &event); | ||
1345 | if (ret < 0) | ||
1346 | goto fail; | ||
1347 | |||
1348 | ops[0].watch.ver = 1; | ||
1349 | ops[0].watch.flag = 1; | ||
1350 | ops[0].watch.cookie = event->cookie; | ||
1351 | ops[0].watch.prot_ver = RADOS_NOTIFY_VER; | ||
1352 | ops[0].watch.timeout = 12; | ||
1353 | |||
1354 | ret = rbd_req_sync_op(rbd_dev, NULL, | ||
1355 | CEPH_NOSNAP, | ||
1356 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
1357 | ops, | ||
1358 | rbd_dev->header_name, | ||
1359 | 0, 0, NULL, NULL, NULL); | ||
1360 | if (ret < 0) | ||
1361 | goto fail_event; | ||
1362 | |||
1363 | ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT); | ||
1364 | dout("ceph_osdc_wait_event returned %d\n", ret); | ||
1365 | rbd_destroy_ops(ops); | ||
1366 | return 0; | ||
1367 | |||
1368 | fail_event: | ||
1369 | ceph_osdc_cancel_event(event); | ||
1370 | fail: | ||
1371 | rbd_destroy_ops(ops); | ||
1372 | return ret; | ||
1373 | } | ||
1374 | |||
1375 | /* | 1389 | /* |
1376 | * Request sync osd read | 1390 | * Synchronous osd object method call |
1377 | */ | 1391 | */ |
1378 | static int rbd_req_sync_exec(struct rbd_device *rbd_dev, | 1392 | static int rbd_req_sync_exec(struct rbd_device *rbd_dev, |
1379 | const char *object_name, | 1393 | const char *object_name, |
1380 | const char *class_name, | 1394 | const char *class_name, |
1381 | const char *method_name, | 1395 | const char *method_name, |
1382 | const char *data, | 1396 | const char *outbound, |
1383 | int len, | 1397 | size_t outbound_size, |
1398 | char *inbound, | ||
1399 | size_t inbound_size, | ||
1400 | int flags, | ||
1384 | u64 *ver) | 1401 | u64 *ver) |
1385 | { | 1402 | { |
1386 | struct ceph_osd_req_op *ops; | 1403 | struct ceph_osd_req_op *ops; |
1387 | int class_name_len = strlen(class_name); | 1404 | int class_name_len = strlen(class_name); |
1388 | int method_name_len = strlen(method_name); | 1405 | int method_name_len = strlen(method_name); |
1406 | int payload_size; | ||
1389 | int ret; | 1407 | int ret; |
1390 | 1408 | ||
1391 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, | 1409 | /* |
1392 | class_name_len + method_name_len + len); | 1410 | * Any input parameters required by the method we're calling |
1411 | * will be sent along with the class and method names as | ||
1412 | * part of the message payload. That data and its size are | ||
1413 | * supplied via the indata and indata_len fields (named from | ||
1414 | * the perspective of the server side) in the OSD request | ||
1415 | * operation. | ||
1416 | */ | ||
1417 | payload_size = class_name_len + method_name_len + outbound_size; | ||
1418 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); | ||
1393 | if (!ops) | 1419 | if (!ops) |
1394 | return -ENOMEM; | 1420 | return -ENOMEM; |
1395 | 1421 | ||
@@ -1398,14 +1424,14 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, | |||
1398 | ops[0].cls.method_name = method_name; | 1424 | ops[0].cls.method_name = method_name; |
1399 | ops[0].cls.method_len = (__u8) method_name_len; | 1425 | ops[0].cls.method_len = (__u8) method_name_len; |
1400 | ops[0].cls.argc = 0; | 1426 | ops[0].cls.argc = 0; |
1401 | ops[0].cls.indata = data; | 1427 | ops[0].cls.indata = outbound; |
1402 | ops[0].cls.indata_len = len; | 1428 | ops[0].cls.indata_len = outbound_size; |
1403 | 1429 | ||
1404 | ret = rbd_req_sync_op(rbd_dev, NULL, | 1430 | ret = rbd_req_sync_op(rbd_dev, NULL, |
1405 | CEPH_NOSNAP, | 1431 | CEPH_NOSNAP, |
1406 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1432 | flags, ops, |
1407 | ops, | 1433 | object_name, 0, inbound_size, inbound, |
1408 | object_name, 0, 0, NULL, NULL, ver); | 1434 | NULL, ver); |
1409 | 1435 | ||
1410 | rbd_destroy_ops(ops); | 1436 | rbd_destroy_ops(ops); |
1411 | 1437 | ||
@@ -1447,10 +1473,6 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1447 | struct rbd_req_coll *coll; | 1473 | struct rbd_req_coll *coll; |
1448 | struct ceph_snap_context *snapc; | 1474 | struct ceph_snap_context *snapc; |
1449 | 1475 | ||
1450 | /* peek at request from block layer */ | ||
1451 | if (!rq) | ||
1452 | break; | ||
1453 | |||
1454 | dout("fetched request\n"); | 1476 | dout("fetched request\n"); |
1455 | 1477 | ||
1456 | /* filter out block requests we don't understand */ | 1478 | /* filter out block requests we don't understand */ |
@@ -1465,7 +1487,7 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1465 | size = blk_rq_bytes(rq); | 1487 | size = blk_rq_bytes(rq); |
1466 | ofs = blk_rq_pos(rq) * SECTOR_SIZE; | 1488 | ofs = blk_rq_pos(rq) * SECTOR_SIZE; |
1467 | rq_bio = rq->bio; | 1489 | rq_bio = rq->bio; |
1468 | if (do_write && rbd_dev->read_only) { | 1490 | if (do_write && rbd_dev->mapping.read_only) { |
1469 | __blk_end_request_all(rq, -EROFS); | 1491 | __blk_end_request_all(rq, -EROFS); |
1470 | continue; | 1492 | continue; |
1471 | } | 1493 | } |
@@ -1474,7 +1496,8 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1474 | 1496 | ||
1475 | down_read(&rbd_dev->header_rwsem); | 1497 | down_read(&rbd_dev->header_rwsem); |
1476 | 1498 | ||
1477 | if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) { | 1499 | if (rbd_dev->mapping.snap_id != CEPH_NOSNAP && |
1500 | !rbd_dev->mapping.snap_exists) { | ||
1478 | up_read(&rbd_dev->header_rwsem); | 1501 | up_read(&rbd_dev->header_rwsem); |
1479 | dout("request for non-existent snapshot"); | 1502 | dout("request for non-existent snapshot"); |
1480 | spin_lock_irq(q->queue_lock); | 1503 | spin_lock_irq(q->queue_lock); |
@@ -1491,6 +1514,12 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1491 | size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); | 1514 | size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); |
1492 | 1515 | ||
1493 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); | 1516 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); |
1517 | if (num_segs <= 0) { | ||
1518 | spin_lock_irq(q->queue_lock); | ||
1519 | __blk_end_request_all(rq, num_segs); | ||
1520 | ceph_put_snap_context(snapc); | ||
1521 | continue; | ||
1522 | } | ||
1494 | coll = rbd_alloc_coll(num_segs); | 1523 | coll = rbd_alloc_coll(num_segs); |
1495 | if (!coll) { | 1524 | if (!coll) { |
1496 | spin_lock_irq(q->queue_lock); | 1525 | spin_lock_irq(q->queue_lock); |
@@ -1502,10 +1531,7 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1502 | do { | 1531 | do { |
1503 | /* a bio clone to be passed down to OSD req */ | 1532 | /* a bio clone to be passed down to OSD req */ |
1504 | dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); | 1533 | dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); |
1505 | op_size = rbd_get_segment(&rbd_dev->header, | 1534 | op_size = rbd_segment_length(rbd_dev, ofs, size); |
1506 | rbd_dev->header.object_prefix, | ||
1507 | ofs, size, | ||
1508 | NULL, NULL); | ||
1509 | kref_get(&coll->kref); | 1535 | kref_get(&coll->kref); |
1510 | bio = bio_chain_clone(&rq_bio, &next_bio, &bp, | 1536 | bio = bio_chain_clone(&rq_bio, &next_bio, &bp, |
1511 | op_size, GFP_ATOMIC); | 1537 | op_size, GFP_ATOMIC); |
@@ -1525,7 +1551,7 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1525 | coll, cur_seg); | 1551 | coll, cur_seg); |
1526 | else | 1552 | else |
1527 | rbd_req_read(rq, rbd_dev, | 1553 | rbd_req_read(rq, rbd_dev, |
1528 | rbd_dev->snap_id, | 1554 | rbd_dev->mapping.snap_id, |
1529 | ofs, | 1555 | ofs, |
1530 | op_size, bio, | 1556 | op_size, bio, |
1531 | coll, cur_seg); | 1557 | coll, cur_seg); |
@@ -1581,8 +1607,6 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) | |||
1581 | if (!disk) | 1607 | if (!disk) |
1582 | return; | 1608 | return; |
1583 | 1609 | ||
1584 | rbd_header_free(&rbd_dev->header); | ||
1585 | |||
1586 | if (disk->flags & GENHD_FL_UP) | 1610 | if (disk->flags & GENHD_FL_UP) |
1587 | del_gendisk(disk); | 1611 | del_gendisk(disk); |
1588 | if (disk->queue) | 1612 | if (disk->queue) |
@@ -1591,105 +1615,96 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) | |||
1591 | } | 1615 | } |
1592 | 1616 | ||
1593 | /* | 1617 | /* |
1594 | * reload the ondisk the header | 1618 | * Read the complete header for the given rbd device. |
1619 | * | ||
1620 | * Returns a pointer to a dynamically-allocated buffer containing | ||
1621 | * the complete and validated header. Caller can pass the address | ||
1622 | * of a variable that will be filled in with the version of the | ||
1623 | * header object at the time it was read. | ||
1624 | * | ||
1625 | * Returns a pointer-coded errno if a failure occurs. | ||
1595 | */ | 1626 | */ |
1596 | static int rbd_read_header(struct rbd_device *rbd_dev, | 1627 | static struct rbd_image_header_ondisk * |
1597 | struct rbd_image_header *header) | 1628 | rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) |
1598 | { | 1629 | { |
1599 | ssize_t rc; | 1630 | struct rbd_image_header_ondisk *ondisk = NULL; |
1600 | struct rbd_image_header_ondisk *dh; | ||
1601 | u32 snap_count = 0; | 1631 | u32 snap_count = 0; |
1602 | u64 ver; | 1632 | u64 names_size = 0; |
1603 | size_t len; | 1633 | u32 want_count; |
1634 | int ret; | ||
1604 | 1635 | ||
1605 | /* | 1636 | /* |
1606 | * First reads the fixed-size header to determine the number | 1637 | * The complete header will include an array of its 64-bit |
1607 | * of snapshots, then re-reads it, along with all snapshot | 1638 | * snapshot ids, followed by the names of those snapshots as |
1608 | * records as well as their stored names. | 1639 | * a contiguous block of NUL-terminated strings. Note that |
1640 | * the number of snapshots could change by the time we read | ||
1641 | * it in, in which case we re-read it. | ||
1609 | */ | 1642 | */ |
1610 | len = sizeof (*dh); | 1643 | do { |
1611 | while (1) { | 1644 | size_t size; |
1612 | dh = kmalloc(len, GFP_KERNEL); | 1645 | |
1613 | if (!dh) | 1646 | kfree(ondisk); |
1614 | return -ENOMEM; | 1647 | |
1615 | 1648 | size = sizeof (*ondisk); | |
1616 | rc = rbd_req_sync_read(rbd_dev, | 1649 | size += snap_count * sizeof (struct rbd_image_snap_ondisk); |
1617 | CEPH_NOSNAP, | 1650 | size += names_size; |
1651 | ondisk = kmalloc(size, GFP_KERNEL); | ||
1652 | if (!ondisk) | ||
1653 | return ERR_PTR(-ENOMEM); | ||
1654 | |||
1655 | ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, | ||
1618 | rbd_dev->header_name, | 1656 | rbd_dev->header_name, |
1619 | 0, len, | 1657 | 0, size, |
1620 | (char *)dh, &ver); | 1658 | (char *) ondisk, version); |
1621 | if (rc < 0) | 1659 | |
1622 | goto out_dh; | 1660 | if (ret < 0) |
1623 | 1661 | goto out_err; | |
1624 | rc = rbd_header_from_disk(header, dh, snap_count); | 1662 | if (WARN_ON((size_t) ret < size)) { |
1625 | if (rc < 0) { | 1663 | ret = -ENXIO; |
1626 | if (rc == -ENXIO) | 1664 | pr_warning("short header read for image %s" |
1627 | pr_warning("unrecognized header format" | 1665 | " (want %zd got %d)\n", |
1628 | " for image %s\n", | 1666 | rbd_dev->image_name, size, ret); |
1629 | rbd_dev->image_name); | 1667 | goto out_err; |
1630 | goto out_dh; | 1668 | } |
1669 | if (!rbd_dev_ondisk_valid(ondisk)) { | ||
1670 | ret = -ENXIO; | ||
1671 | pr_warning("invalid header for image %s\n", | ||
1672 | rbd_dev->image_name); | ||
1673 | goto out_err; | ||
1631 | } | 1674 | } |
1632 | 1675 | ||
1633 | if (snap_count == header->total_snaps) | 1676 | names_size = le64_to_cpu(ondisk->snap_names_len); |
1634 | break; | 1677 | want_count = snap_count; |
1678 | snap_count = le32_to_cpu(ondisk->snap_count); | ||
1679 | } while (snap_count != want_count); | ||
1635 | 1680 | ||
1636 | snap_count = header->total_snaps; | 1681 | return ondisk; |
1637 | len = sizeof (*dh) + | ||
1638 | snap_count * sizeof(struct rbd_image_snap_ondisk) + | ||
1639 | header->snap_names_len; | ||
1640 | 1682 | ||
1641 | rbd_header_free(header); | 1683 | out_err: |
1642 | kfree(dh); | 1684 | kfree(ondisk); |
1643 | } | ||
1644 | header->obj_version = ver; | ||
1645 | 1685 | ||
1646 | out_dh: | 1686 | return ERR_PTR(ret); |
1647 | kfree(dh); | ||
1648 | return rc; | ||
1649 | } | 1687 | } |
1650 | 1688 | ||
1651 | /* | 1689 | /* |
1652 | * create a snapshot | 1690 | * reload the ondisk the header |
1653 | */ | 1691 | */ |
1654 | static int rbd_header_add_snap(struct rbd_device *rbd_dev, | 1692 | static int rbd_read_header(struct rbd_device *rbd_dev, |
1655 | const char *snap_name, | 1693 | struct rbd_image_header *header) |
1656 | gfp_t gfp_flags) | ||
1657 | { | 1694 | { |
1658 | int name_len = strlen(snap_name); | 1695 | struct rbd_image_header_ondisk *ondisk; |
1659 | u64 new_snapid; | 1696 | u64 ver = 0; |
1660 | int ret; | 1697 | int ret; |
1661 | void *data, *p, *e; | ||
1662 | struct ceph_mon_client *monc; | ||
1663 | 1698 | ||
1664 | /* we should create a snapshot only if we're pointing at the head */ | 1699 | ondisk = rbd_dev_v1_header_read(rbd_dev, &ver); |
1665 | if (rbd_dev->snap_id != CEPH_NOSNAP) | 1700 | if (IS_ERR(ondisk)) |
1666 | return -EINVAL; | 1701 | return PTR_ERR(ondisk); |
1702 | ret = rbd_header_from_disk(header, ondisk); | ||
1703 | if (ret >= 0) | ||
1704 | header->obj_version = ver; | ||
1705 | kfree(ondisk); | ||
1667 | 1706 | ||
1668 | monc = &rbd_dev->rbd_client->client->monc; | 1707 | return ret; |
1669 | ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid); | ||
1670 | dout("created snapid=%llu\n", (unsigned long long) new_snapid); | ||
1671 | if (ret < 0) | ||
1672 | return ret; | ||
1673 | |||
1674 | data = kmalloc(name_len + 16, gfp_flags); | ||
1675 | if (!data) | ||
1676 | return -ENOMEM; | ||
1677 | |||
1678 | p = data; | ||
1679 | e = data + name_len + 16; | ||
1680 | |||
1681 | ceph_encode_string_safe(&p, e, snap_name, name_len, bad); | ||
1682 | ceph_encode_64_safe(&p, e, new_snapid, bad); | ||
1683 | |||
1684 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | ||
1685 | "rbd", "snap_add", | ||
1686 | data, p - data, NULL); | ||
1687 | |||
1688 | kfree(data); | ||
1689 | |||
1690 | return ret < 0 ? ret : 0; | ||
1691 | bad: | ||
1692 | return -ERANGE; | ||
1693 | } | 1708 | } |
1694 | 1709 | ||
1695 | static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) | 1710 | static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) |
@@ -1716,11 +1731,15 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) | |||
1716 | down_write(&rbd_dev->header_rwsem); | 1731 | down_write(&rbd_dev->header_rwsem); |
1717 | 1732 | ||
1718 | /* resized? */ | 1733 | /* resized? */ |
1719 | if (rbd_dev->snap_id == CEPH_NOSNAP) { | 1734 | if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) { |
1720 | sector_t size = (sector_t) h.image_size / SECTOR_SIZE; | 1735 | sector_t size = (sector_t) h.image_size / SECTOR_SIZE; |
1721 | 1736 | ||
1722 | dout("setting size to %llu sectors", (unsigned long long) size); | 1737 | if (size != (sector_t) rbd_dev->mapping.size) { |
1723 | set_capacity(rbd_dev->disk, size); | 1738 | dout("setting size to %llu sectors", |
1739 | (unsigned long long) size); | ||
1740 | rbd_dev->mapping.size = (u64) size; | ||
1741 | set_capacity(rbd_dev->disk, size); | ||
1742 | } | ||
1724 | } | 1743 | } |
1725 | 1744 | ||
1726 | /* rbd_dev->header.object_prefix shouldn't change */ | 1745 | /* rbd_dev->header.object_prefix shouldn't change */ |
@@ -1733,16 +1752,16 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) | |||
1733 | *hver = h.obj_version; | 1752 | *hver = h.obj_version; |
1734 | rbd_dev->header.obj_version = h.obj_version; | 1753 | rbd_dev->header.obj_version = h.obj_version; |
1735 | rbd_dev->header.image_size = h.image_size; | 1754 | rbd_dev->header.image_size = h.image_size; |
1736 | rbd_dev->header.total_snaps = h.total_snaps; | ||
1737 | rbd_dev->header.snapc = h.snapc; | 1755 | rbd_dev->header.snapc = h.snapc; |
1738 | rbd_dev->header.snap_names = h.snap_names; | 1756 | rbd_dev->header.snap_names = h.snap_names; |
1739 | rbd_dev->header.snap_names_len = h.snap_names_len; | ||
1740 | rbd_dev->header.snap_sizes = h.snap_sizes; | 1757 | rbd_dev->header.snap_sizes = h.snap_sizes; |
1741 | /* Free the extra copy of the object prefix */ | 1758 | /* Free the extra copy of the object prefix */ |
1742 | WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); | 1759 | WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); |
1743 | kfree(h.object_prefix); | 1760 | kfree(h.object_prefix); |
1744 | 1761 | ||
1745 | ret = __rbd_init_snaps_header(rbd_dev); | 1762 | ret = rbd_dev_snaps_update(rbd_dev); |
1763 | if (!ret) | ||
1764 | ret = rbd_dev_snaps_register(rbd_dev); | ||
1746 | 1765 | ||
1747 | up_write(&rbd_dev->header_rwsem); | 1766 | up_write(&rbd_dev->header_rwsem); |
1748 | 1767 | ||
@@ -1764,29 +1783,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1764 | { | 1783 | { |
1765 | struct gendisk *disk; | 1784 | struct gendisk *disk; |
1766 | struct request_queue *q; | 1785 | struct request_queue *q; |
1767 | int rc; | ||
1768 | u64 segment_size; | 1786 | u64 segment_size; |
1769 | u64 total_size = 0; | ||
1770 | |||
1771 | /* contact OSD, request size info about the object being mapped */ | ||
1772 | rc = rbd_read_header(rbd_dev, &rbd_dev->header); | ||
1773 | if (rc) | ||
1774 | return rc; | ||
1775 | |||
1776 | /* no need to lock here, as rbd_dev is not registered yet */ | ||
1777 | rc = __rbd_init_snaps_header(rbd_dev); | ||
1778 | if (rc) | ||
1779 | return rc; | ||
1780 | |||
1781 | rc = rbd_header_set_snap(rbd_dev, &total_size); | ||
1782 | if (rc) | ||
1783 | return rc; | ||
1784 | 1787 | ||
1785 | /* create gendisk info */ | 1788 | /* create gendisk info */ |
1786 | rc = -ENOMEM; | ||
1787 | disk = alloc_disk(RBD_MINORS_PER_MAJOR); | 1789 | disk = alloc_disk(RBD_MINORS_PER_MAJOR); |
1788 | if (!disk) | 1790 | if (!disk) |
1789 | goto out; | 1791 | return -ENOMEM; |
1790 | 1792 | ||
1791 | snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", | 1793 | snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", |
1792 | rbd_dev->dev_id); | 1794 | rbd_dev->dev_id); |
@@ -1796,7 +1798,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1796 | disk->private_data = rbd_dev; | 1798 | disk->private_data = rbd_dev; |
1797 | 1799 | ||
1798 | /* init rq */ | 1800 | /* init rq */ |
1799 | rc = -ENOMEM; | ||
1800 | q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); | 1801 | q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); |
1801 | if (!q) | 1802 | if (!q) |
1802 | goto out_disk; | 1803 | goto out_disk; |
@@ -1817,20 +1818,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1817 | q->queuedata = rbd_dev; | 1818 | q->queuedata = rbd_dev; |
1818 | 1819 | ||
1819 | rbd_dev->disk = disk; | 1820 | rbd_dev->disk = disk; |
1820 | rbd_dev->q = q; | ||
1821 | 1821 | ||
1822 | /* finally, announce the disk to the world */ | 1822 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); |
1823 | set_capacity(disk, total_size / SECTOR_SIZE); | ||
1824 | add_disk(disk); | ||
1825 | 1823 | ||
1826 | pr_info("%s: added with size 0x%llx\n", | ||
1827 | disk->disk_name, (unsigned long long)total_size); | ||
1828 | return 0; | 1824 | return 0; |
1829 | |||
1830 | out_disk: | 1825 | out_disk: |
1831 | put_disk(disk); | 1826 | put_disk(disk); |
1832 | out: | 1827 | |
1833 | return rc; | 1828 | return -ENOMEM; |
1834 | } | 1829 | } |
1835 | 1830 | ||
1836 | /* | 1831 | /* |
@@ -1855,6 +1850,19 @@ static ssize_t rbd_size_show(struct device *dev, | |||
1855 | return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); | 1850 | return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); |
1856 | } | 1851 | } |
1857 | 1852 | ||
1853 | /* | ||
1854 | * Note this shows the features for whatever's mapped, which is not | ||
1855 | * necessarily the base image. | ||
1856 | */ | ||
1857 | static ssize_t rbd_features_show(struct device *dev, | ||
1858 | struct device_attribute *attr, char *buf) | ||
1859 | { | ||
1860 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | ||
1861 | |||
1862 | return sprintf(buf, "0x%016llx\n", | ||
1863 | (unsigned long long) rbd_dev->mapping.features); | ||
1864 | } | ||
1865 | |||
1858 | static ssize_t rbd_major_show(struct device *dev, | 1866 | static ssize_t rbd_major_show(struct device *dev, |
1859 | struct device_attribute *attr, char *buf) | 1867 | struct device_attribute *attr, char *buf) |
1860 | { | 1868 | { |
@@ -1896,13 +1904,25 @@ static ssize_t rbd_name_show(struct device *dev, | |||
1896 | return sprintf(buf, "%s\n", rbd_dev->image_name); | 1904 | return sprintf(buf, "%s\n", rbd_dev->image_name); |
1897 | } | 1905 | } |
1898 | 1906 | ||
1907 | static ssize_t rbd_image_id_show(struct device *dev, | ||
1908 | struct device_attribute *attr, char *buf) | ||
1909 | { | ||
1910 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | ||
1911 | |||
1912 | return sprintf(buf, "%s\n", rbd_dev->image_id); | ||
1913 | } | ||
1914 | |||
1915 | /* | ||
1916 | * Shows the name of the currently-mapped snapshot (or | ||
1917 | * RBD_SNAP_HEAD_NAME for the base image). | ||
1918 | */ | ||
1899 | static ssize_t rbd_snap_show(struct device *dev, | 1919 | static ssize_t rbd_snap_show(struct device *dev, |
1900 | struct device_attribute *attr, | 1920 | struct device_attribute *attr, |
1901 | char *buf) | 1921 | char *buf) |
1902 | { | 1922 | { |
1903 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | 1923 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1904 | 1924 | ||
1905 | return sprintf(buf, "%s\n", rbd_dev->snap_name); | 1925 | return sprintf(buf, "%s\n", rbd_dev->mapping.snap_name); |
1906 | } | 1926 | } |
1907 | 1927 | ||
1908 | static ssize_t rbd_image_refresh(struct device *dev, | 1928 | static ssize_t rbd_image_refresh(struct device *dev, |
@@ -1919,25 +1939,27 @@ static ssize_t rbd_image_refresh(struct device *dev, | |||
1919 | } | 1939 | } |
1920 | 1940 | ||
1921 | static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); | 1941 | static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); |
1942 | static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL); | ||
1922 | static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); | 1943 | static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); |
1923 | static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); | 1944 | static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); |
1924 | static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); | 1945 | static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); |
1925 | static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); | 1946 | static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); |
1926 | static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); | 1947 | static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); |
1948 | static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); | ||
1927 | static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); | 1949 | static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); |
1928 | static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); | 1950 | static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); |
1929 | static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add); | ||
1930 | 1951 | ||
1931 | static struct attribute *rbd_attrs[] = { | 1952 | static struct attribute *rbd_attrs[] = { |
1932 | &dev_attr_size.attr, | 1953 | &dev_attr_size.attr, |
1954 | &dev_attr_features.attr, | ||
1933 | &dev_attr_major.attr, | 1955 | &dev_attr_major.attr, |
1934 | &dev_attr_client_id.attr, | 1956 | &dev_attr_client_id.attr, |
1935 | &dev_attr_pool.attr, | 1957 | &dev_attr_pool.attr, |
1936 | &dev_attr_pool_id.attr, | 1958 | &dev_attr_pool_id.attr, |
1937 | &dev_attr_name.attr, | 1959 | &dev_attr_name.attr, |
1960 | &dev_attr_image_id.attr, | ||
1938 | &dev_attr_current_snap.attr, | 1961 | &dev_attr_current_snap.attr, |
1939 | &dev_attr_refresh.attr, | 1962 | &dev_attr_refresh.attr, |
1940 | &dev_attr_create_snap.attr, | ||
1941 | NULL | 1963 | NULL |
1942 | }; | 1964 | }; |
1943 | 1965 | ||
@@ -1983,12 +2005,24 @@ static ssize_t rbd_snap_id_show(struct device *dev, | |||
1983 | return sprintf(buf, "%llu\n", (unsigned long long)snap->id); | 2005 | return sprintf(buf, "%llu\n", (unsigned long long)snap->id); |
1984 | } | 2006 | } |
1985 | 2007 | ||
2008 | static ssize_t rbd_snap_features_show(struct device *dev, | ||
2009 | struct device_attribute *attr, | ||
2010 | char *buf) | ||
2011 | { | ||
2012 | struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); | ||
2013 | |||
2014 | return sprintf(buf, "0x%016llx\n", | ||
2015 | (unsigned long long) snap->features); | ||
2016 | } | ||
2017 | |||
1986 | static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); | 2018 | static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); |
1987 | static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); | 2019 | static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); |
2020 | static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL); | ||
1988 | 2021 | ||
1989 | static struct attribute *rbd_snap_attrs[] = { | 2022 | static struct attribute *rbd_snap_attrs[] = { |
1990 | &dev_attr_snap_size.attr, | 2023 | &dev_attr_snap_size.attr, |
1991 | &dev_attr_snap_id.attr, | 2024 | &dev_attr_snap_id.attr, |
2025 | &dev_attr_snap_features.attr, | ||
1992 | NULL, | 2026 | NULL, |
1993 | }; | 2027 | }; |
1994 | 2028 | ||
@@ -2013,10 +2047,21 @@ static struct device_type rbd_snap_device_type = { | |||
2013 | .release = rbd_snap_dev_release, | 2047 | .release = rbd_snap_dev_release, |
2014 | }; | 2048 | }; |
2015 | 2049 | ||
2050 | static bool rbd_snap_registered(struct rbd_snap *snap) | ||
2051 | { | ||
2052 | bool ret = snap->dev.type == &rbd_snap_device_type; | ||
2053 | bool reg = device_is_registered(&snap->dev); | ||
2054 | |||
2055 | rbd_assert(!ret ^ reg); | ||
2056 | |||
2057 | return ret; | ||
2058 | } | ||
2059 | |||
2016 | static void __rbd_remove_snap_dev(struct rbd_snap *snap) | 2060 | static void __rbd_remove_snap_dev(struct rbd_snap *snap) |
2017 | { | 2061 | { |
2018 | list_del(&snap->node); | 2062 | list_del(&snap->node); |
2019 | device_unregister(&snap->dev); | 2063 | if (device_is_registered(&snap->dev)) |
2064 | device_unregister(&snap->dev); | ||
2020 | } | 2065 | } |
2021 | 2066 | ||
2022 | static int rbd_register_snap_dev(struct rbd_snap *snap, | 2067 | static int rbd_register_snap_dev(struct rbd_snap *snap, |
@@ -2029,13 +2074,17 @@ static int rbd_register_snap_dev(struct rbd_snap *snap, | |||
2029 | dev->parent = parent; | 2074 | dev->parent = parent; |
2030 | dev->release = rbd_snap_dev_release; | 2075 | dev->release = rbd_snap_dev_release; |
2031 | dev_set_name(dev, "snap_%s", snap->name); | 2076 | dev_set_name(dev, "snap_%s", snap->name); |
2077 | dout("%s: registering device for snapshot %s\n", __func__, snap->name); | ||
2078 | |||
2032 | ret = device_register(dev); | 2079 | ret = device_register(dev); |
2033 | 2080 | ||
2034 | return ret; | 2081 | return ret; |
2035 | } | 2082 | } |
2036 | 2083 | ||
2037 | static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, | 2084 | static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, |
2038 | int i, const char *name) | 2085 | const char *snap_name, |
2086 | u64 snap_id, u64 snap_size, | ||
2087 | u64 snap_features) | ||
2039 | { | 2088 | { |
2040 | struct rbd_snap *snap; | 2089 | struct rbd_snap *snap; |
2041 | int ret; | 2090 | int ret; |
@@ -2045,17 +2094,13 @@ static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, | |||
2045 | return ERR_PTR(-ENOMEM); | 2094 | return ERR_PTR(-ENOMEM); |
2046 | 2095 | ||
2047 | ret = -ENOMEM; | 2096 | ret = -ENOMEM; |
2048 | snap->name = kstrdup(name, GFP_KERNEL); | 2097 | snap->name = kstrdup(snap_name, GFP_KERNEL); |
2049 | if (!snap->name) | 2098 | if (!snap->name) |
2050 | goto err; | 2099 | goto err; |
2051 | 2100 | ||
2052 | snap->size = rbd_dev->header.snap_sizes[i]; | 2101 | snap->id = snap_id; |
2053 | snap->id = rbd_dev->header.snapc->snaps[i]; | 2102 | snap->size = snap_size; |
2054 | if (device_is_registered(&rbd_dev->dev)) { | 2103 | snap->features = snap_features; |
2055 | ret = rbd_register_snap_dev(snap, &rbd_dev->dev); | ||
2056 | if (ret < 0) | ||
2057 | goto err; | ||
2058 | } | ||
2059 | 2104 | ||
2060 | return snap; | 2105 | return snap; |
2061 | 2106 | ||
@@ -2066,128 +2111,439 @@ err: | |||
2066 | return ERR_PTR(ret); | 2111 | return ERR_PTR(ret); |
2067 | } | 2112 | } |
2068 | 2113 | ||
2114 | static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which, | ||
2115 | u64 *snap_size, u64 *snap_features) | ||
2116 | { | ||
2117 | char *snap_name; | ||
2118 | |||
2119 | rbd_assert(which < rbd_dev->header.snapc->num_snaps); | ||
2120 | |||
2121 | *snap_size = rbd_dev->header.snap_sizes[which]; | ||
2122 | *snap_features = 0; /* No features for v1 */ | ||
2123 | |||
2124 | /* Skip over names until we find the one we are looking for */ | ||
2125 | |||
2126 | snap_name = rbd_dev->header.snap_names; | ||
2127 | while (which--) | ||
2128 | snap_name += strlen(snap_name) + 1; | ||
2129 | |||
2130 | return snap_name; | ||
2131 | } | ||
2132 | |||
2069 | /* | 2133 | /* |
2070 | * search for the previous snap in a null delimited string list | 2134 | * Get the size and object order for an image snapshot, or if |
2135 | * snap_id is CEPH_NOSNAP, gets this information for the base | ||
2136 | * image. | ||
2071 | */ | 2137 | */ |
2072 | const char *rbd_prev_snap_name(const char *name, const char *start) | 2138 | static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, |
2139 | u8 *order, u64 *snap_size) | ||
2073 | { | 2140 | { |
2074 | if (name < start + 2) | 2141 | __le64 snapid = cpu_to_le64(snap_id); |
2075 | return NULL; | 2142 | int ret; |
2143 | struct { | ||
2144 | u8 order; | ||
2145 | __le64 size; | ||
2146 | } __attribute__ ((packed)) size_buf = { 0 }; | ||
2147 | |||
2148 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | ||
2149 | "rbd", "get_size", | ||
2150 | (char *) &snapid, sizeof (snapid), | ||
2151 | (char *) &size_buf, sizeof (size_buf), | ||
2152 | CEPH_OSD_FLAG_READ, NULL); | ||
2153 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2154 | if (ret < 0) | ||
2155 | return ret; | ||
2156 | |||
2157 | *order = size_buf.order; | ||
2158 | *snap_size = le64_to_cpu(size_buf.size); | ||
2076 | 2159 | ||
2077 | name -= 2; | 2160 | dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n", |
2078 | while (*name) { | 2161 | (unsigned long long) snap_id, (unsigned int) *order, |
2079 | if (name == start) | 2162 | (unsigned long long) *snap_size); |
2080 | return start; | 2163 | |
2081 | name--; | 2164 | return 0; |
2165 | } | ||
2166 | |||
2167 | static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) | ||
2168 | { | ||
2169 | return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, | ||
2170 | &rbd_dev->header.obj_order, | ||
2171 | &rbd_dev->header.image_size); | ||
2172 | } | ||
2173 | |||
2174 | static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) | ||
2175 | { | ||
2176 | void *reply_buf; | ||
2177 | int ret; | ||
2178 | void *p; | ||
2179 | |||
2180 | reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL); | ||
2181 | if (!reply_buf) | ||
2182 | return -ENOMEM; | ||
2183 | |||
2184 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | ||
2185 | "rbd", "get_object_prefix", | ||
2186 | NULL, 0, | ||
2187 | reply_buf, RBD_OBJ_PREFIX_LEN_MAX, | ||
2188 | CEPH_OSD_FLAG_READ, NULL); | ||
2189 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2190 | if (ret < 0) | ||
2191 | goto out; | ||
2192 | |||
2193 | p = reply_buf; | ||
2194 | rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, | ||
2195 | p + RBD_OBJ_PREFIX_LEN_MAX, | ||
2196 | NULL, GFP_NOIO); | ||
2197 | |||
2198 | if (IS_ERR(rbd_dev->header.object_prefix)) { | ||
2199 | ret = PTR_ERR(rbd_dev->header.object_prefix); | ||
2200 | rbd_dev->header.object_prefix = NULL; | ||
2201 | } else { | ||
2202 | dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); | ||
2082 | } | 2203 | } |
2083 | return name + 1; | 2204 | |
2205 | out: | ||
2206 | kfree(reply_buf); | ||
2207 | |||
2208 | return ret; | ||
2084 | } | 2209 | } |
2085 | 2210 | ||
2086 | /* | 2211 | static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, |
2087 | * compare the old list of snapshots that we have to what's in the header | 2212 | u64 *snap_features) |
2088 | * and update it accordingly. Note that the header holds the snapshots | ||
2089 | * in a reverse order (from newest to oldest) and we need to go from | ||
2090 | * older to new so that we don't get a duplicate snap name when | ||
2091 | * doing the process (e.g., removed snapshot and recreated a new | ||
2092 | * one with the same name. | ||
2093 | */ | ||
2094 | static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) | ||
2095 | { | 2213 | { |
2096 | const char *name, *first_name; | 2214 | __le64 snapid = cpu_to_le64(snap_id); |
2097 | int i = rbd_dev->header.total_snaps; | 2215 | struct { |
2098 | struct rbd_snap *snap, *old_snap = NULL; | 2216 | __le64 features; |
2099 | struct list_head *p, *n; | 2217 | __le64 incompat; |
2218 | } features_buf = { 0 }; | ||
2219 | int ret; | ||
2100 | 2220 | ||
2101 | first_name = rbd_dev->header.snap_names; | 2221 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, |
2102 | name = first_name + rbd_dev->header.snap_names_len; | 2222 | "rbd", "get_features", |
2223 | (char *) &snapid, sizeof (snapid), | ||
2224 | (char *) &features_buf, sizeof (features_buf), | ||
2225 | CEPH_OSD_FLAG_READ, NULL); | ||
2226 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2227 | if (ret < 0) | ||
2228 | return ret; | ||
2229 | *snap_features = le64_to_cpu(features_buf.features); | ||
2103 | 2230 | ||
2104 | list_for_each_prev_safe(p, n, &rbd_dev->snaps) { | 2231 | dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", |
2105 | u64 cur_id; | 2232 | (unsigned long long) snap_id, |
2233 | (unsigned long long) *snap_features, | ||
2234 | (unsigned long long) le64_to_cpu(features_buf.incompat)); | ||
2106 | 2235 | ||
2107 | old_snap = list_entry(p, struct rbd_snap, node); | 2236 | return 0; |
2237 | } | ||
2108 | 2238 | ||
2109 | if (i) | 2239 | static int rbd_dev_v2_features(struct rbd_device *rbd_dev) |
2110 | cur_id = rbd_dev->header.snapc->snaps[i - 1]; | 2240 | { |
2241 | return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, | ||
2242 | &rbd_dev->header.features); | ||
2243 | } | ||
2111 | 2244 | ||
2112 | if (!i || old_snap->id < cur_id) { | 2245 | static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) |
2113 | /* | 2246 | { |
2114 | * old_snap->id was skipped, thus was | 2247 | size_t size; |
2115 | * removed. If this rbd_dev is mapped to | 2248 | int ret; |
2116 | * the removed snapshot, record that it no | 2249 | void *reply_buf; |
2117 | * longer exists, to prevent further I/O. | 2250 | void *p; |
2118 | */ | 2251 | void *end; |
2119 | if (rbd_dev->snap_id == old_snap->id) | 2252 | u64 seq; |
2120 | rbd_dev->snap_exists = false; | 2253 | u32 snap_count; |
2121 | __rbd_remove_snap_dev(old_snap); | 2254 | struct ceph_snap_context *snapc; |
2122 | continue; | 2255 | u32 i; |
2123 | } | 2256 | |
2124 | if (old_snap->id == cur_id) { | 2257 | /* |
2125 | /* we have this snapshot already */ | 2258 | * We'll need room for the seq value (maximum snapshot id), |
2126 | i--; | 2259 | * snapshot count, and array of that many snapshot ids. |
2127 | name = rbd_prev_snap_name(name, first_name); | 2260 | * For now we have a fixed upper limit on the number we're |
2261 | * prepared to receive. | ||
2262 | */ | ||
2263 | size = sizeof (__le64) + sizeof (__le32) + | ||
2264 | RBD_MAX_SNAP_COUNT * sizeof (__le64); | ||
2265 | reply_buf = kzalloc(size, GFP_KERNEL); | ||
2266 | if (!reply_buf) | ||
2267 | return -ENOMEM; | ||
2268 | |||
2269 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | ||
2270 | "rbd", "get_snapcontext", | ||
2271 | NULL, 0, | ||
2272 | reply_buf, size, | ||
2273 | CEPH_OSD_FLAG_READ, ver); | ||
2274 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2275 | if (ret < 0) | ||
2276 | goto out; | ||
2277 | |||
2278 | ret = -ERANGE; | ||
2279 | p = reply_buf; | ||
2280 | end = (char *) reply_buf + size; | ||
2281 | ceph_decode_64_safe(&p, end, seq, out); | ||
2282 | ceph_decode_32_safe(&p, end, snap_count, out); | ||
2283 | |||
2284 | /* | ||
2285 | * Make sure the reported number of snapshot ids wouldn't go | ||
2286 | * beyond the end of our buffer. But before checking that, | ||
2287 | * make sure the computed size of the snapshot context we | ||
2288 | * allocate is representable in a size_t. | ||
2289 | */ | ||
2290 | if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context)) | ||
2291 | / sizeof (u64)) { | ||
2292 | ret = -EINVAL; | ||
2293 | goto out; | ||
2294 | } | ||
2295 | if (!ceph_has_room(&p, end, snap_count * sizeof (__le64))) | ||
2296 | goto out; | ||
2297 | |||
2298 | size = sizeof (struct ceph_snap_context) + | ||
2299 | snap_count * sizeof (snapc->snaps[0]); | ||
2300 | snapc = kmalloc(size, GFP_KERNEL); | ||
2301 | if (!snapc) { | ||
2302 | ret = -ENOMEM; | ||
2303 | goto out; | ||
2304 | } | ||
2305 | |||
2306 | atomic_set(&snapc->nref, 1); | ||
2307 | snapc->seq = seq; | ||
2308 | snapc->num_snaps = snap_count; | ||
2309 | for (i = 0; i < snap_count; i++) | ||
2310 | snapc->snaps[i] = ceph_decode_64(&p); | ||
2311 | |||
2312 | rbd_dev->header.snapc = snapc; | ||
2313 | |||
2314 | dout(" snap context seq = %llu, snap_count = %u\n", | ||
2315 | (unsigned long long) seq, (unsigned int) snap_count); | ||
2316 | |||
2317 | out: | ||
2318 | kfree(reply_buf); | ||
2319 | |||
2320 | return 0; | ||
2321 | } | ||
2322 | |||
2323 | static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) | ||
2324 | { | ||
2325 | size_t size; | ||
2326 | void *reply_buf; | ||
2327 | __le64 snap_id; | ||
2328 | int ret; | ||
2329 | void *p; | ||
2330 | void *end; | ||
2331 | size_t snap_name_len; | ||
2332 | char *snap_name; | ||
2333 | |||
2334 | size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; | ||
2335 | reply_buf = kmalloc(size, GFP_KERNEL); | ||
2336 | if (!reply_buf) | ||
2337 | return ERR_PTR(-ENOMEM); | ||
2338 | |||
2339 | snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); | ||
2340 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | ||
2341 | "rbd", "get_snapshot_name", | ||
2342 | (char *) &snap_id, sizeof (snap_id), | ||
2343 | reply_buf, size, | ||
2344 | CEPH_OSD_FLAG_READ, NULL); | ||
2345 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2346 | if (ret < 0) | ||
2347 | goto out; | ||
2348 | |||
2349 | p = reply_buf; | ||
2350 | end = (char *) reply_buf + size; | ||
2351 | snap_name_len = 0; | ||
2352 | snap_name = ceph_extract_encoded_string(&p, end, &snap_name_len, | ||
2353 | GFP_KERNEL); | ||
2354 | if (IS_ERR(snap_name)) { | ||
2355 | ret = PTR_ERR(snap_name); | ||
2356 | goto out; | ||
2357 | } else { | ||
2358 | dout(" snap_id 0x%016llx snap_name = %s\n", | ||
2359 | (unsigned long long) le64_to_cpu(snap_id), snap_name); | ||
2360 | } | ||
2361 | kfree(reply_buf); | ||
2362 | |||
2363 | return snap_name; | ||
2364 | out: | ||
2365 | kfree(reply_buf); | ||
2366 | |||
2367 | return ERR_PTR(ret); | ||
2368 | } | ||
2369 | |||
2370 | static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, | ||
2371 | u64 *snap_size, u64 *snap_features) | ||
2372 | { | ||
2373 | __le64 snap_id; | ||
2374 | u8 order; | ||
2375 | int ret; | ||
2376 | |||
2377 | snap_id = rbd_dev->header.snapc->snaps[which]; | ||
2378 | ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size); | ||
2379 | if (ret) | ||
2380 | return ERR_PTR(ret); | ||
2381 | ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features); | ||
2382 | if (ret) | ||
2383 | return ERR_PTR(ret); | ||
2384 | |||
2385 | return rbd_dev_v2_snap_name(rbd_dev, which); | ||
2386 | } | ||
2387 | |||
2388 | static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, | ||
2389 | u64 *snap_size, u64 *snap_features) | ||
2390 | { | ||
2391 | if (rbd_dev->image_format == 1) | ||
2392 | return rbd_dev_v1_snap_info(rbd_dev, which, | ||
2393 | snap_size, snap_features); | ||
2394 | if (rbd_dev->image_format == 2) | ||
2395 | return rbd_dev_v2_snap_info(rbd_dev, which, | ||
2396 | snap_size, snap_features); | ||
2397 | return ERR_PTR(-EINVAL); | ||
2398 | } | ||
2399 | |||
2400 | /* | ||
2401 | * Scan the rbd device's current snapshot list and compare it to the | ||
2402 | * newly-received snapshot context. Remove any existing snapshots | ||
2403 | * not present in the new snapshot context. Add a new snapshot for | ||
2404 | * any snaphots in the snapshot context not in the current list. | ||
2405 | * And verify there are no changes to snapshots we already know | ||
2406 | * about. | ||
2407 | * | ||
2408 | * Assumes the snapshots in the snapshot context are sorted by | ||
2409 | * snapshot id, highest id first. (Snapshots in the rbd_dev's list | ||
2410 | * are also maintained in that order.) | ||
2411 | */ | ||
2412 | static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) | ||
2413 | { | ||
2414 | struct ceph_snap_context *snapc = rbd_dev->header.snapc; | ||
2415 | const u32 snap_count = snapc->num_snaps; | ||
2416 | struct list_head *head = &rbd_dev->snaps; | ||
2417 | struct list_head *links = head->next; | ||
2418 | u32 index = 0; | ||
2419 | |||
2420 | dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count); | ||
2421 | while (index < snap_count || links != head) { | ||
2422 | u64 snap_id; | ||
2423 | struct rbd_snap *snap; | ||
2424 | char *snap_name; | ||
2425 | u64 snap_size = 0; | ||
2426 | u64 snap_features = 0; | ||
2427 | |||
2428 | snap_id = index < snap_count ? snapc->snaps[index] | ||
2429 | : CEPH_NOSNAP; | ||
2430 | snap = links != head ? list_entry(links, struct rbd_snap, node) | ||
2431 | : NULL; | ||
2432 | rbd_assert(!snap || snap->id != CEPH_NOSNAP); | ||
2433 | |||
2434 | if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { | ||
2435 | struct list_head *next = links->next; | ||
2436 | |||
2437 | /* Existing snapshot not in the new snap context */ | ||
2438 | |||
2439 | if (rbd_dev->mapping.snap_id == snap->id) | ||
2440 | rbd_dev->mapping.snap_exists = false; | ||
2441 | __rbd_remove_snap_dev(snap); | ||
2442 | dout("%ssnap id %llu has been removed\n", | ||
2443 | rbd_dev->mapping.snap_id == snap->id ? | ||
2444 | "mapped " : "", | ||
2445 | (unsigned long long) snap->id); | ||
2446 | |||
2447 | /* Done with this list entry; advance */ | ||
2448 | |||
2449 | links = next; | ||
2128 | continue; | 2450 | continue; |
2129 | } | 2451 | } |
2130 | for (; i > 0; | 2452 | |
2131 | i--, name = rbd_prev_snap_name(name, first_name)) { | 2453 | snap_name = rbd_dev_snap_info(rbd_dev, index, |
2132 | if (!name) { | 2454 | &snap_size, &snap_features); |
2133 | WARN_ON(1); | 2455 | if (IS_ERR(snap_name)) |
2134 | return -EINVAL; | 2456 | return PTR_ERR(snap_name); |
2457 | |||
2458 | dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count, | ||
2459 | (unsigned long long) snap_id); | ||
2460 | if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) { | ||
2461 | struct rbd_snap *new_snap; | ||
2462 | |||
2463 | /* We haven't seen this snapshot before */ | ||
2464 | |||
2465 | new_snap = __rbd_add_snap_dev(rbd_dev, snap_name, | ||
2466 | snap_id, snap_size, snap_features); | ||
2467 | if (IS_ERR(new_snap)) { | ||
2468 | int err = PTR_ERR(new_snap); | ||
2469 | |||
2470 | dout(" failed to add dev, error %d\n", err); | ||
2471 | |||
2472 | return err; | ||
2135 | } | 2473 | } |
2136 | cur_id = rbd_dev->header.snapc->snaps[i]; | 2474 | |
2137 | /* snapshot removal? handle it above */ | 2475 | /* New goes before existing, or at end of list */ |
2138 | if (cur_id >= old_snap->id) | 2476 | |
2139 | break; | 2477 | dout(" added dev%s\n", snap ? "" : " at end\n"); |
2140 | /* a new snapshot */ | 2478 | if (snap) |
2141 | snap = __rbd_add_snap_dev(rbd_dev, i - 1, name); | 2479 | list_add_tail(&new_snap->node, &snap->node); |
2142 | if (IS_ERR(snap)) | 2480 | else |
2143 | return PTR_ERR(snap); | 2481 | list_add_tail(&new_snap->node, head); |
2144 | 2482 | } else { | |
2145 | /* note that we add it backward so using n and not p */ | 2483 | /* Already have this one */ |
2146 | list_add(&snap->node, n); | 2484 | |
2147 | p = &snap->node; | 2485 | dout(" already present\n"); |
2486 | |||
2487 | rbd_assert(snap->size == snap_size); | ||
2488 | rbd_assert(!strcmp(snap->name, snap_name)); | ||
2489 | rbd_assert(snap->features == snap_features); | ||
2490 | |||
2491 | /* Done with this list entry; advance */ | ||
2492 | |||
2493 | links = links->next; | ||
2148 | } | 2494 | } |
2495 | |||
2496 | /* Advance to the next entry in the snapshot context */ | ||
2497 | |||
2498 | index++; | ||
2149 | } | 2499 | } |
2150 | /* we're done going over the old snap list, just add what's left */ | 2500 | dout("%s: done\n", __func__); |
2151 | for (; i > 0; i--) { | 2501 | |
2152 | name = rbd_prev_snap_name(name, first_name); | 2502 | return 0; |
2153 | if (!name) { | 2503 | } |
2154 | WARN_ON(1); | 2504 | |
2155 | return -EINVAL; | 2505 | /* |
2506 | * Scan the list of snapshots and register the devices for any that | ||
2507 | * have not already been registered. | ||
2508 | */ | ||
2509 | static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) | ||
2510 | { | ||
2511 | struct rbd_snap *snap; | ||
2512 | int ret = 0; | ||
2513 | |||
2514 | dout("%s called\n", __func__); | ||
2515 | if (WARN_ON(!device_is_registered(&rbd_dev->dev))) | ||
2516 | return -EIO; | ||
2517 | |||
2518 | list_for_each_entry(snap, &rbd_dev->snaps, node) { | ||
2519 | if (!rbd_snap_registered(snap)) { | ||
2520 | ret = rbd_register_snap_dev(snap, &rbd_dev->dev); | ||
2521 | if (ret < 0) | ||
2522 | break; | ||
2156 | } | 2523 | } |
2157 | snap = __rbd_add_snap_dev(rbd_dev, i - 1, name); | ||
2158 | if (IS_ERR(snap)) | ||
2159 | return PTR_ERR(snap); | ||
2160 | list_add(&snap->node, &rbd_dev->snaps); | ||
2161 | } | 2524 | } |
2525 | dout("%s: returning %d\n", __func__, ret); | ||
2162 | 2526 | ||
2163 | return 0; | 2527 | return ret; |
2164 | } | 2528 | } |
2165 | 2529 | ||
2166 | static int rbd_bus_add_dev(struct rbd_device *rbd_dev) | 2530 | static int rbd_bus_add_dev(struct rbd_device *rbd_dev) |
2167 | { | 2531 | { |
2168 | int ret; | ||
2169 | struct device *dev; | 2532 | struct device *dev; |
2170 | struct rbd_snap *snap; | 2533 | int ret; |
2171 | 2534 | ||
2172 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 2535 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
2173 | dev = &rbd_dev->dev; | ||
2174 | 2536 | ||
2537 | dev = &rbd_dev->dev; | ||
2175 | dev->bus = &rbd_bus_type; | 2538 | dev->bus = &rbd_bus_type; |
2176 | dev->type = &rbd_device_type; | 2539 | dev->type = &rbd_device_type; |
2177 | dev->parent = &rbd_root_dev; | 2540 | dev->parent = &rbd_root_dev; |
2178 | dev->release = rbd_dev_release; | 2541 | dev->release = rbd_dev_release; |
2179 | dev_set_name(dev, "%d", rbd_dev->dev_id); | 2542 | dev_set_name(dev, "%d", rbd_dev->dev_id); |
2180 | ret = device_register(dev); | 2543 | ret = device_register(dev); |
2181 | if (ret < 0) | ||
2182 | goto out; | ||
2183 | 2544 | ||
2184 | list_for_each_entry(snap, &rbd_dev->snaps, node) { | ||
2185 | ret = rbd_register_snap_dev(snap, &rbd_dev->dev); | ||
2186 | if (ret < 0) | ||
2187 | break; | ||
2188 | } | ||
2189 | out: | ||
2190 | mutex_unlock(&ctl_mutex); | 2545 | mutex_unlock(&ctl_mutex); |
2546 | |||
2191 | return ret; | 2547 | return ret; |
2192 | } | 2548 | } |
2193 | 2549 | ||
@@ -2212,33 +2568,37 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) | |||
2212 | return ret; | 2568 | return ret; |
2213 | } | 2569 | } |
2214 | 2570 | ||
2215 | static atomic64_t rbd_id_max = ATOMIC64_INIT(0); | 2571 | static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); |
2216 | 2572 | ||
2217 | /* | 2573 | /* |
2218 | * Get a unique rbd identifier for the given new rbd_dev, and add | 2574 | * Get a unique rbd identifier for the given new rbd_dev, and add |
2219 | * the rbd_dev to the global list. The minimum rbd id is 1. | 2575 | * the rbd_dev to the global list. The minimum rbd id is 1. |
2220 | */ | 2576 | */ |
2221 | static void rbd_id_get(struct rbd_device *rbd_dev) | 2577 | static void rbd_dev_id_get(struct rbd_device *rbd_dev) |
2222 | { | 2578 | { |
2223 | rbd_dev->dev_id = atomic64_inc_return(&rbd_id_max); | 2579 | rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max); |
2224 | 2580 | ||
2225 | spin_lock(&rbd_dev_list_lock); | 2581 | spin_lock(&rbd_dev_list_lock); |
2226 | list_add_tail(&rbd_dev->node, &rbd_dev_list); | 2582 | list_add_tail(&rbd_dev->node, &rbd_dev_list); |
2227 | spin_unlock(&rbd_dev_list_lock); | 2583 | spin_unlock(&rbd_dev_list_lock); |
2584 | dout("rbd_dev %p given dev id %llu\n", rbd_dev, | ||
2585 | (unsigned long long) rbd_dev->dev_id); | ||
2228 | } | 2586 | } |
2229 | 2587 | ||
2230 | /* | 2588 | /* |
2231 | * Remove an rbd_dev from the global list, and record that its | 2589 | * Remove an rbd_dev from the global list, and record that its |
2232 | * identifier is no longer in use. | 2590 | * identifier is no longer in use. |
2233 | */ | 2591 | */ |
2234 | static void rbd_id_put(struct rbd_device *rbd_dev) | 2592 | static void rbd_dev_id_put(struct rbd_device *rbd_dev) |
2235 | { | 2593 | { |
2236 | struct list_head *tmp; | 2594 | struct list_head *tmp; |
2237 | int rbd_id = rbd_dev->dev_id; | 2595 | int rbd_id = rbd_dev->dev_id; |
2238 | int max_id; | 2596 | int max_id; |
2239 | 2597 | ||
2240 | BUG_ON(rbd_id < 1); | 2598 | rbd_assert(rbd_id > 0); |
2241 | 2599 | ||
2600 | dout("rbd_dev %p released dev id %llu\n", rbd_dev, | ||
2601 | (unsigned long long) rbd_dev->dev_id); | ||
2242 | spin_lock(&rbd_dev_list_lock); | 2602 | spin_lock(&rbd_dev_list_lock); |
2243 | list_del_init(&rbd_dev->node); | 2603 | list_del_init(&rbd_dev->node); |
2244 | 2604 | ||
@@ -2246,7 +2606,7 @@ static void rbd_id_put(struct rbd_device *rbd_dev) | |||
2246 | * If the id being "put" is not the current maximum, there | 2606 | * If the id being "put" is not the current maximum, there |
2247 | * is nothing special we need to do. | 2607 | * is nothing special we need to do. |
2248 | */ | 2608 | */ |
2249 | if (rbd_id != atomic64_read(&rbd_id_max)) { | 2609 | if (rbd_id != atomic64_read(&rbd_dev_id_max)) { |
2250 | spin_unlock(&rbd_dev_list_lock); | 2610 | spin_unlock(&rbd_dev_list_lock); |
2251 | return; | 2611 | return; |
2252 | } | 2612 | } |
@@ -2267,12 +2627,13 @@ static void rbd_id_put(struct rbd_device *rbd_dev) | |||
2267 | spin_unlock(&rbd_dev_list_lock); | 2627 | spin_unlock(&rbd_dev_list_lock); |
2268 | 2628 | ||
2269 | /* | 2629 | /* |
2270 | * The max id could have been updated by rbd_id_get(), in | 2630 | * The max id could have been updated by rbd_dev_id_get(), in |
2271 | * which case it now accurately reflects the new maximum. | 2631 | * which case it now accurately reflects the new maximum. |
2272 | * Be careful not to overwrite the maximum value in that | 2632 | * Be careful not to overwrite the maximum value in that |
2273 | * case. | 2633 | * case. |
2274 | */ | 2634 | */ |
2275 | atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id); | 2635 | atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id); |
2636 | dout(" max dev id has been reset\n"); | ||
2276 | } | 2637 | } |
2277 | 2638 | ||
2278 | /* | 2639 | /* |
@@ -2361,28 +2722,31 @@ static inline char *dup_token(const char **buf, size_t *lenp) | |||
2361 | } | 2722 | } |
2362 | 2723 | ||
2363 | /* | 2724 | /* |
2364 | * This fills in the pool_name, image_name, image_name_len, snap_name, | 2725 | * This fills in the pool_name, image_name, image_name_len, rbd_dev, |
2365 | * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based | 2726 | * rbd_md_name, and name fields of the given rbd_dev, based on the |
2366 | * on the list of monitor addresses and other options provided via | 2727 | * list of monitor addresses and other options provided via |
2367 | * /sys/bus/rbd/add. | 2728 | * /sys/bus/rbd/add. Returns a pointer to a dynamically-allocated |
2729 | * copy of the snapshot name to map if successful, or a | ||
2730 | * pointer-coded error otherwise. | ||
2368 | * | 2731 | * |
2369 | * Note: rbd_dev is assumed to have been initially zero-filled. | 2732 | * Note: rbd_dev is assumed to have been initially zero-filled. |
2370 | */ | 2733 | */ |
2371 | static int rbd_add_parse_args(struct rbd_device *rbd_dev, | 2734 | static char *rbd_add_parse_args(struct rbd_device *rbd_dev, |
2372 | const char *buf, | 2735 | const char *buf, |
2373 | const char **mon_addrs, | 2736 | const char **mon_addrs, |
2374 | size_t *mon_addrs_size, | 2737 | size_t *mon_addrs_size, |
2375 | char *options, | 2738 | char *options, |
2376 | size_t options_size) | 2739 | size_t options_size) |
2377 | { | 2740 | { |
2378 | size_t len; | 2741 | size_t len; |
2379 | int ret; | 2742 | char *err_ptr = ERR_PTR(-EINVAL); |
2743 | char *snap_name; | ||
2380 | 2744 | ||
2381 | /* The first four tokens are required */ | 2745 | /* The first four tokens are required */ |
2382 | 2746 | ||
2383 | len = next_token(&buf); | 2747 | len = next_token(&buf); |
2384 | if (!len) | 2748 | if (!len) |
2385 | return -EINVAL; | 2749 | return err_ptr; |
2386 | *mon_addrs_size = len + 1; | 2750 | *mon_addrs_size = len + 1; |
2387 | *mon_addrs = buf; | 2751 | *mon_addrs = buf; |
2388 | 2752 | ||
@@ -2390,9 +2754,9 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, | |||
2390 | 2754 | ||
2391 | len = copy_token(&buf, options, options_size); | 2755 | len = copy_token(&buf, options, options_size); |
2392 | if (!len || len >= options_size) | 2756 | if (!len || len >= options_size) |
2393 | return -EINVAL; | 2757 | return err_ptr; |
2394 | 2758 | ||
2395 | ret = -ENOMEM; | 2759 | err_ptr = ERR_PTR(-ENOMEM); |
2396 | rbd_dev->pool_name = dup_token(&buf, NULL); | 2760 | rbd_dev->pool_name = dup_token(&buf, NULL); |
2397 | if (!rbd_dev->pool_name) | 2761 | if (!rbd_dev->pool_name) |
2398 | goto out_err; | 2762 | goto out_err; |
@@ -2401,41 +2765,227 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, | |||
2401 | if (!rbd_dev->image_name) | 2765 | if (!rbd_dev->image_name) |
2402 | goto out_err; | 2766 | goto out_err; |
2403 | 2767 | ||
2404 | /* Create the name of the header object */ | 2768 | /* Snapshot name is optional */ |
2769 | len = next_token(&buf); | ||
2770 | if (!len) { | ||
2771 | buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ | ||
2772 | len = sizeof (RBD_SNAP_HEAD_NAME) - 1; | ||
2773 | } | ||
2774 | snap_name = kmalloc(len + 1, GFP_KERNEL); | ||
2775 | if (!snap_name) | ||
2776 | goto out_err; | ||
2777 | memcpy(snap_name, buf, len); | ||
2778 | *(snap_name + len) = '\0'; | ||
2405 | 2779 | ||
2406 | rbd_dev->header_name = kmalloc(rbd_dev->image_name_len | 2780 | dout(" SNAP_NAME is <%s>, len is %zd\n", snap_name, len); |
2407 | + sizeof (RBD_SUFFIX), | 2781 | |
2408 | GFP_KERNEL); | 2782 | return snap_name; |
2409 | if (!rbd_dev->header_name) | 2783 | |
2784 | out_err: | ||
2785 | kfree(rbd_dev->image_name); | ||
2786 | rbd_dev->image_name = NULL; | ||
2787 | rbd_dev->image_name_len = 0; | ||
2788 | kfree(rbd_dev->pool_name); | ||
2789 | rbd_dev->pool_name = NULL; | ||
2790 | |||
2791 | return err_ptr; | ||
2792 | } | ||
2793 | |||
2794 | /* | ||
2795 | * An rbd format 2 image has a unique identifier, distinct from the | ||
2796 | * name given to it by the user. Internally, that identifier is | ||
2797 | * what's used to specify the names of objects related to the image. | ||
2798 | * | ||
2799 | * A special "rbd id" object is used to map an rbd image name to its | ||
2800 | * id. If that object doesn't exist, then there is no v2 rbd image | ||
2801 | * with the supplied name. | ||
2802 | * | ||
2803 | * This function will record the given rbd_dev's image_id field if | ||
2804 | * it can be determined, and in that case will return 0. If any | ||
2805 | * errors occur a negative errno will be returned and the rbd_dev's | ||
2806 | * image_id field will be unchanged (and should be NULL). | ||
2807 | */ | ||
2808 | static int rbd_dev_image_id(struct rbd_device *rbd_dev) | ||
2809 | { | ||
2810 | int ret; | ||
2811 | size_t size; | ||
2812 | char *object_name; | ||
2813 | void *response; | ||
2814 | void *p; | ||
2815 | |||
2816 | /* | ||
2817 | * First, see if the format 2 image id file exists, and if | ||
2818 | * so, get the image's persistent id from it. | ||
2819 | */ | ||
2820 | size = sizeof (RBD_ID_PREFIX) + rbd_dev->image_name_len; | ||
2821 | object_name = kmalloc(size, GFP_NOIO); | ||
2822 | if (!object_name) | ||
2823 | return -ENOMEM; | ||
2824 | sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->image_name); | ||
2825 | dout("rbd id object name is %s\n", object_name); | ||
2826 | |||
2827 | /* Response will be an encoded string, which includes a length */ | ||
2828 | |||
2829 | size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX; | ||
2830 | response = kzalloc(size, GFP_NOIO); | ||
2831 | if (!response) { | ||
2832 | ret = -ENOMEM; | ||
2833 | goto out; | ||
2834 | } | ||
2835 | |||
2836 | ret = rbd_req_sync_exec(rbd_dev, object_name, | ||
2837 | "rbd", "get_id", | ||
2838 | NULL, 0, | ||
2839 | response, RBD_IMAGE_ID_LEN_MAX, | ||
2840 | CEPH_OSD_FLAG_READ, NULL); | ||
2841 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2842 | if (ret < 0) | ||
2843 | goto out; | ||
2844 | |||
2845 | p = response; | ||
2846 | rbd_dev->image_id = ceph_extract_encoded_string(&p, | ||
2847 | p + RBD_IMAGE_ID_LEN_MAX, | ||
2848 | &rbd_dev->image_id_len, | ||
2849 | GFP_NOIO); | ||
2850 | if (IS_ERR(rbd_dev->image_id)) { | ||
2851 | ret = PTR_ERR(rbd_dev->image_id); | ||
2852 | rbd_dev->image_id = NULL; | ||
2853 | } else { | ||
2854 | dout("image_id is %s\n", rbd_dev->image_id); | ||
2855 | } | ||
2856 | out: | ||
2857 | kfree(response); | ||
2858 | kfree(object_name); | ||
2859 | |||
2860 | return ret; | ||
2861 | } | ||
2862 | |||
2863 | static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) | ||
2864 | { | ||
2865 | int ret; | ||
2866 | size_t size; | ||
2867 | |||
2868 | /* Version 1 images have no id; empty string is used */ | ||
2869 | |||
2870 | rbd_dev->image_id = kstrdup("", GFP_KERNEL); | ||
2871 | if (!rbd_dev->image_id) | ||
2872 | return -ENOMEM; | ||
2873 | rbd_dev->image_id_len = 0; | ||
2874 | |||
2875 | /* Record the header object name for this rbd image. */ | ||
2876 | |||
2877 | size = rbd_dev->image_name_len + sizeof (RBD_SUFFIX); | ||
2878 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); | ||
2879 | if (!rbd_dev->header_name) { | ||
2880 | ret = -ENOMEM; | ||
2410 | goto out_err; | 2881 | goto out_err; |
2882 | } | ||
2411 | sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); | 2883 | sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); |
2412 | 2884 | ||
2885 | /* Populate rbd image metadata */ | ||
2886 | |||
2887 | ret = rbd_read_header(rbd_dev, &rbd_dev->header); | ||
2888 | if (ret < 0) | ||
2889 | goto out_err; | ||
2890 | rbd_dev->image_format = 1; | ||
2891 | |||
2892 | dout("discovered version 1 image, header name is %s\n", | ||
2893 | rbd_dev->header_name); | ||
2894 | |||
2895 | return 0; | ||
2896 | |||
2897 | out_err: | ||
2898 | kfree(rbd_dev->header_name); | ||
2899 | rbd_dev->header_name = NULL; | ||
2900 | kfree(rbd_dev->image_id); | ||
2901 | rbd_dev->image_id = NULL; | ||
2902 | |||
2903 | return ret; | ||
2904 | } | ||
2905 | |||
2906 | static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) | ||
2907 | { | ||
2908 | size_t size; | ||
2909 | int ret; | ||
2910 | u64 ver = 0; | ||
2911 | |||
2413 | /* | 2912 | /* |
2414 | * The snapshot name is optional. If none is is supplied, | 2913 | * Image id was filled in by the caller. Record the header |
2415 | * we use the default value. | 2914 | * object name for this rbd image. |
2416 | */ | 2915 | */ |
2417 | rbd_dev->snap_name = dup_token(&buf, &len); | 2916 | size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->image_id_len; |
2418 | if (!rbd_dev->snap_name) | 2917 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); |
2918 | if (!rbd_dev->header_name) | ||
2919 | return -ENOMEM; | ||
2920 | sprintf(rbd_dev->header_name, "%s%s", | ||
2921 | RBD_HEADER_PREFIX, rbd_dev->image_id); | ||
2922 | |||
2923 | /* Get the size and object order for the image */ | ||
2924 | |||
2925 | ret = rbd_dev_v2_image_size(rbd_dev); | ||
2926 | if (ret < 0) | ||
2419 | goto out_err; | 2927 | goto out_err; |
2420 | if (!len) { | ||
2421 | /* Replace the empty name with the default */ | ||
2422 | kfree(rbd_dev->snap_name); | ||
2423 | rbd_dev->snap_name | ||
2424 | = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL); | ||
2425 | if (!rbd_dev->snap_name) | ||
2426 | goto out_err; | ||
2427 | 2928 | ||
2428 | memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, | 2929 | /* Get the object prefix (a.k.a. block_name) for the image */ |
2429 | sizeof (RBD_SNAP_HEAD_NAME)); | ||
2430 | } | ||
2431 | 2930 | ||
2432 | return 0; | 2931 | ret = rbd_dev_v2_object_prefix(rbd_dev); |
2932 | if (ret < 0) | ||
2933 | goto out_err; | ||
2934 | |||
2935 | /* Get the features for the image */ | ||
2433 | 2936 | ||
2937 | ret = rbd_dev_v2_features(rbd_dev); | ||
2938 | if (ret < 0) | ||
2939 | goto out_err; | ||
2940 | |||
2941 | /* crypto and compression type aren't (yet) supported for v2 images */ | ||
2942 | |||
2943 | rbd_dev->header.crypt_type = 0; | ||
2944 | rbd_dev->header.comp_type = 0; | ||
2945 | |||
2946 | /* Get the snapshot context, plus the header version */ | ||
2947 | |||
2948 | ret = rbd_dev_v2_snap_context(rbd_dev, &ver); | ||
2949 | if (ret) | ||
2950 | goto out_err; | ||
2951 | rbd_dev->header.obj_version = ver; | ||
2952 | |||
2953 | rbd_dev->image_format = 2; | ||
2954 | |||
2955 | dout("discovered version 2 image, header name is %s\n", | ||
2956 | rbd_dev->header_name); | ||
2957 | |||
2958 | return -ENOTSUPP; | ||
2434 | out_err: | 2959 | out_err: |
2435 | kfree(rbd_dev->header_name); | 2960 | kfree(rbd_dev->header_name); |
2436 | kfree(rbd_dev->image_name); | 2961 | rbd_dev->header_name = NULL; |
2437 | kfree(rbd_dev->pool_name); | 2962 | kfree(rbd_dev->header.object_prefix); |
2438 | rbd_dev->pool_name = NULL; | 2963 | rbd_dev->header.object_prefix = NULL; |
2964 | |||
2965 | return ret; | ||
2966 | } | ||
2967 | |||
2968 | /* | ||
2969 | * Probe for the existence of the header object for the given rbd | ||
2970 | * device. For format 2 images this includes determining the image | ||
2971 | * id. | ||
2972 | */ | ||
2973 | static int rbd_dev_probe(struct rbd_device *rbd_dev) | ||
2974 | { | ||
2975 | int ret; | ||
2976 | |||
2977 | /* | ||
2978 | * Get the id from the image id object. If it's not a | ||
2979 | * format 2 image, we'll get ENOENT back, and we'll assume | ||
2980 | * it's a format 1 image. | ||
2981 | */ | ||
2982 | ret = rbd_dev_image_id(rbd_dev); | ||
2983 | if (ret) | ||
2984 | ret = rbd_dev_v1_probe(rbd_dev); | ||
2985 | else | ||
2986 | ret = rbd_dev_v2_probe(rbd_dev); | ||
2987 | if (ret) | ||
2988 | dout("probe failed, returning %d\n", ret); | ||
2439 | 2989 | ||
2440 | return ret; | 2990 | return ret; |
2441 | } | 2991 | } |
@@ -2450,16 +3000,17 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
2450 | size_t mon_addrs_size = 0; | 3000 | size_t mon_addrs_size = 0; |
2451 | struct ceph_osd_client *osdc; | 3001 | struct ceph_osd_client *osdc; |
2452 | int rc = -ENOMEM; | 3002 | int rc = -ENOMEM; |
3003 | char *snap_name; | ||
2453 | 3004 | ||
2454 | if (!try_module_get(THIS_MODULE)) | 3005 | if (!try_module_get(THIS_MODULE)) |
2455 | return -ENODEV; | 3006 | return -ENODEV; |
2456 | 3007 | ||
2457 | options = kmalloc(count, GFP_KERNEL); | 3008 | options = kmalloc(count, GFP_KERNEL); |
2458 | if (!options) | 3009 | if (!options) |
2459 | goto err_nomem; | 3010 | goto err_out_mem; |
2460 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); | 3011 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); |
2461 | if (!rbd_dev) | 3012 | if (!rbd_dev) |
2462 | goto err_nomem; | 3013 | goto err_out_mem; |
2463 | 3014 | ||
2464 | /* static rbd_device initialization */ | 3015 | /* static rbd_device initialization */ |
2465 | spin_lock_init(&rbd_dev->lock); | 3016 | spin_lock_init(&rbd_dev->lock); |
@@ -2467,27 +3018,18 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
2467 | INIT_LIST_HEAD(&rbd_dev->snaps); | 3018 | INIT_LIST_HEAD(&rbd_dev->snaps); |
2468 | init_rwsem(&rbd_dev->header_rwsem); | 3019 | init_rwsem(&rbd_dev->header_rwsem); |
2469 | 3020 | ||
2470 | /* generate unique id: find highest unique id, add one */ | ||
2471 | rbd_id_get(rbd_dev); | ||
2472 | |||
2473 | /* Fill in the device name, now that we have its id. */ | ||
2474 | BUILD_BUG_ON(DEV_NAME_LEN | ||
2475 | < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); | ||
2476 | sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); | ||
2477 | |||
2478 | /* parse add command */ | 3021 | /* parse add command */ |
2479 | rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, | 3022 | snap_name = rbd_add_parse_args(rbd_dev, buf, |
2480 | options, count); | 3023 | &mon_addrs, &mon_addrs_size, options, count); |
2481 | if (rc) | 3024 | if (IS_ERR(snap_name)) { |
2482 | goto err_put_id; | 3025 | rc = PTR_ERR(snap_name); |
2483 | 3026 | goto err_out_mem; | |
2484 | rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1, | ||
2485 | options); | ||
2486 | if (IS_ERR(rbd_dev->rbd_client)) { | ||
2487 | rc = PTR_ERR(rbd_dev->rbd_client); | ||
2488 | goto err_put_id; | ||
2489 | } | 3027 | } |
2490 | 3028 | ||
3029 | rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options); | ||
3030 | if (rc < 0) | ||
3031 | goto err_out_args; | ||
3032 | |||
2491 | /* pick the pool */ | 3033 | /* pick the pool */ |
2492 | osdc = &rbd_dev->rbd_client->client->osdc; | 3034 | osdc = &rbd_dev->rbd_client->client->osdc; |
2493 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); | 3035 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); |
@@ -2495,23 +3037,53 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
2495 | goto err_out_client; | 3037 | goto err_out_client; |
2496 | rbd_dev->pool_id = rc; | 3038 | rbd_dev->pool_id = rc; |
2497 | 3039 | ||
2498 | /* register our block device */ | 3040 | rc = rbd_dev_probe(rbd_dev); |
2499 | rc = register_blkdev(0, rbd_dev->name); | ||
2500 | if (rc < 0) | 3041 | if (rc < 0) |
2501 | goto err_out_client; | 3042 | goto err_out_client; |
3043 | rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); | ||
3044 | |||
3045 | /* no need to lock here, as rbd_dev is not registered yet */ | ||
3046 | rc = rbd_dev_snaps_update(rbd_dev); | ||
3047 | if (rc) | ||
3048 | goto err_out_header; | ||
3049 | |||
3050 | rc = rbd_dev_set_mapping(rbd_dev, snap_name); | ||
3051 | if (rc) | ||
3052 | goto err_out_header; | ||
3053 | |||
3054 | /* generate unique id: find highest unique id, add one */ | ||
3055 | rbd_dev_id_get(rbd_dev); | ||
3056 | |||
3057 | /* Fill in the device name, now that we have its id. */ | ||
3058 | BUILD_BUG_ON(DEV_NAME_LEN | ||
3059 | < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); | ||
3060 | sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); | ||
3061 | |||
3062 | /* Get our block major device number. */ | ||
3063 | |||
3064 | rc = register_blkdev(0, rbd_dev->name); | ||
3065 | if (rc < 0) | ||
3066 | goto err_out_id; | ||
2502 | rbd_dev->major = rc; | 3067 | rbd_dev->major = rc; |
2503 | 3068 | ||
2504 | rc = rbd_bus_add_dev(rbd_dev); | 3069 | /* Set up the blkdev mapping. */ |
3070 | |||
3071 | rc = rbd_init_disk(rbd_dev); | ||
2505 | if (rc) | 3072 | if (rc) |
2506 | goto err_out_blkdev; | 3073 | goto err_out_blkdev; |
2507 | 3074 | ||
3075 | rc = rbd_bus_add_dev(rbd_dev); | ||
3076 | if (rc) | ||
3077 | goto err_out_disk; | ||
3078 | |||
2508 | /* | 3079 | /* |
2509 | * At this point cleanup in the event of an error is the job | 3080 | * At this point cleanup in the event of an error is the job |
2510 | * of the sysfs code (initiated by rbd_bus_del_dev()). | 3081 | * of the sysfs code (initiated by rbd_bus_del_dev()). |
2511 | * | ||
2512 | * Set up and announce blkdev mapping. | ||
2513 | */ | 3082 | */ |
2514 | rc = rbd_init_disk(rbd_dev); | 3083 | |
3084 | down_write(&rbd_dev->header_rwsem); | ||
3085 | rc = rbd_dev_snaps_register(rbd_dev); | ||
3086 | up_write(&rbd_dev->header_rwsem); | ||
2515 | if (rc) | 3087 | if (rc) |
2516 | goto err_out_bus; | 3088 | goto err_out_bus; |
2517 | 3089 | ||
@@ -2519,6 +3091,13 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
2519 | if (rc) | 3091 | if (rc) |
2520 | goto err_out_bus; | 3092 | goto err_out_bus; |
2521 | 3093 | ||
3094 | /* Everything's ready. Announce the disk to the world. */ | ||
3095 | |||
3096 | add_disk(rbd_dev->disk); | ||
3097 | |||
3098 | pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, | ||
3099 | (unsigned long long) rbd_dev->mapping.size); | ||
3100 | |||
2522 | return count; | 3101 | return count; |
2523 | 3102 | ||
2524 | err_out_bus: | 3103 | err_out_bus: |
@@ -2528,19 +3107,23 @@ err_out_bus: | |||
2528 | kfree(options); | 3107 | kfree(options); |
2529 | return rc; | 3108 | return rc; |
2530 | 3109 | ||
3110 | err_out_disk: | ||
3111 | rbd_free_disk(rbd_dev); | ||
2531 | err_out_blkdev: | 3112 | err_out_blkdev: |
2532 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 3113 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
3114 | err_out_id: | ||
3115 | rbd_dev_id_put(rbd_dev); | ||
3116 | err_out_header: | ||
3117 | rbd_header_free(&rbd_dev->header); | ||
2533 | err_out_client: | 3118 | err_out_client: |
3119 | kfree(rbd_dev->header_name); | ||
2534 | rbd_put_client(rbd_dev); | 3120 | rbd_put_client(rbd_dev); |
2535 | err_put_id: | 3121 | kfree(rbd_dev->image_id); |
2536 | if (rbd_dev->pool_name) { | 3122 | err_out_args: |
2537 | kfree(rbd_dev->snap_name); | 3123 | kfree(rbd_dev->mapping.snap_name); |
2538 | kfree(rbd_dev->header_name); | 3124 | kfree(rbd_dev->image_name); |
2539 | kfree(rbd_dev->image_name); | 3125 | kfree(rbd_dev->pool_name); |
2540 | kfree(rbd_dev->pool_name); | 3126 | err_out_mem: |
2541 | } | ||
2542 | rbd_id_put(rbd_dev); | ||
2543 | err_nomem: | ||
2544 | kfree(rbd_dev); | 3127 | kfree(rbd_dev); |
2545 | kfree(options); | 3128 | kfree(options); |
2546 | 3129 | ||
@@ -2586,12 +3169,16 @@ static void rbd_dev_release(struct device *dev) | |||
2586 | rbd_free_disk(rbd_dev); | 3169 | rbd_free_disk(rbd_dev); |
2587 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 3170 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
2588 | 3171 | ||
3172 | /* release allocated disk header fields */ | ||
3173 | rbd_header_free(&rbd_dev->header); | ||
3174 | |||
2589 | /* done with the id, and with the rbd_dev */ | 3175 | /* done with the id, and with the rbd_dev */ |
2590 | kfree(rbd_dev->snap_name); | 3176 | kfree(rbd_dev->mapping.snap_name); |
3177 | kfree(rbd_dev->image_id); | ||
2591 | kfree(rbd_dev->header_name); | 3178 | kfree(rbd_dev->header_name); |
2592 | kfree(rbd_dev->pool_name); | 3179 | kfree(rbd_dev->pool_name); |
2593 | kfree(rbd_dev->image_name); | 3180 | kfree(rbd_dev->image_name); |
2594 | rbd_id_put(rbd_dev); | 3181 | rbd_dev_id_put(rbd_dev); |
2595 | kfree(rbd_dev); | 3182 | kfree(rbd_dev); |
2596 | 3183 | ||
2597 | /* release module ref */ | 3184 | /* release module ref */ |
@@ -2629,47 +3216,7 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
2629 | 3216 | ||
2630 | done: | 3217 | done: |
2631 | mutex_unlock(&ctl_mutex); | 3218 | mutex_unlock(&ctl_mutex); |
2632 | return ret; | ||
2633 | } | ||
2634 | 3219 | ||
2635 | static ssize_t rbd_snap_add(struct device *dev, | ||
2636 | struct device_attribute *attr, | ||
2637 | const char *buf, | ||
2638 | size_t count) | ||
2639 | { | ||
2640 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | ||
2641 | int ret; | ||
2642 | char *name = kmalloc(count + 1, GFP_KERNEL); | ||
2643 | if (!name) | ||
2644 | return -ENOMEM; | ||
2645 | |||
2646 | snprintf(name, count, "%s", buf); | ||
2647 | |||
2648 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
2649 | |||
2650 | ret = rbd_header_add_snap(rbd_dev, | ||
2651 | name, GFP_KERNEL); | ||
2652 | if (ret < 0) | ||
2653 | goto err_unlock; | ||
2654 | |||
2655 | ret = __rbd_refresh_header(rbd_dev, NULL); | ||
2656 | if (ret < 0) | ||
2657 | goto err_unlock; | ||
2658 | |||
2659 | /* shouldn't hold ctl_mutex when notifying.. notify might | ||
2660 | trigger a watch callback that would need to get that mutex */ | ||
2661 | mutex_unlock(&ctl_mutex); | ||
2662 | |||
2663 | /* make a best effort, don't error if failed */ | ||
2664 | rbd_req_sync_notify(rbd_dev); | ||
2665 | |||
2666 | ret = count; | ||
2667 | kfree(name); | ||
2668 | return ret; | ||
2669 | |||
2670 | err_unlock: | ||
2671 | mutex_unlock(&ctl_mutex); | ||
2672 | kfree(name); | ||
2673 | return ret; | 3220 | return ret; |
2674 | } | 3221 | } |
2675 | 3222 | ||
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index 0924e9e41a60..cbe77fa105ba 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h | |||
@@ -15,15 +15,30 @@ | |||
15 | 15 | ||
16 | #include <linux/types.h> | 16 | #include <linux/types.h> |
17 | 17 | ||
18 | /* For format version 2, rbd image 'foo' consists of objects | ||
19 | * rbd_id.foo - id of image | ||
20 | * rbd_header.<id> - image metadata | ||
21 | * rbd_data.<id>.0000000000000000 | ||
22 | * rbd_data.<id>.0000000000000001 | ||
23 | * ... - data | ||
24 | * Clients do not access header data directly in rbd format 2. | ||
25 | */ | ||
26 | |||
27 | #define RBD_HEADER_PREFIX "rbd_header." | ||
28 | #define RBD_DATA_PREFIX "rbd_data." | ||
29 | #define RBD_ID_PREFIX "rbd_id." | ||
30 | |||
18 | /* | 31 | /* |
19 | * rbd image 'foo' consists of objects | 32 | * For format version 1, rbd image 'foo' consists of objects |
20 | * foo.rbd - image metadata | 33 | * foo.rbd - image metadata |
21 | * foo.00000000 | 34 | * rb.<idhi>.<idlo>.00000000 |
22 | * foo.00000001 | 35 | * rb.<idhi>.<idlo>.00000001 |
23 | * ... - data | 36 | * ... - data |
37 | * There is no notion of a persistent image id in rbd format 1. | ||
24 | */ | 38 | */ |
25 | 39 | ||
26 | #define RBD_SUFFIX ".rbd" | 40 | #define RBD_SUFFIX ".rbd" |
41 | |||
27 | #define RBD_DIRECTORY "rbd_directory" | 42 | #define RBD_DIRECTORY "rbd_directory" |
28 | #define RBD_INFO "rbd_info" | 43 | #define RBD_INFO "rbd_info" |
29 | 44 | ||
@@ -47,7 +62,7 @@ struct rbd_image_snap_ondisk { | |||
47 | 62 | ||
48 | struct rbd_image_header_ondisk { | 63 | struct rbd_image_header_ondisk { |
49 | char text[40]; | 64 | char text[40]; |
50 | char block_name[24]; | 65 | char object_prefix[24]; |
51 | char signature[4]; | 66 | char signature[4]; |
52 | char version[8]; | 67 | char version[8]; |
53 | struct { | 68 | struct { |
diff --git a/drivers/block/ub.c b/drivers/block/ub.c deleted file mode 100644 index fcec0225ac76..000000000000 --- a/drivers/block/ub.c +++ /dev/null | |||
@@ -1,2474 +0,0 @@ | |||
1 | /* | ||
2 | * The low performance USB storage driver (ub). | ||
3 | * | ||
4 | * Copyright (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) | ||
5 | * Copyright (C) 2004 Pete Zaitcev (zaitcev@yahoo.com) | ||
6 | * | ||
7 | * This work is a part of Linux kernel, is derived from it, | ||
8 | * and is not licensed separately. See file COPYING for details. | ||
9 | * | ||
10 | * TODO (sorted by decreasing priority) | ||
11 | * -- Return sense now that rq allows it (we always auto-sense anyway). | ||
12 | * -- set readonly flag for CDs, set removable flag for CF readers | ||
13 | * -- do inquiry and verify we got a disk and not a tape (for LUN mismatch) | ||
14 | * -- verify the 13 conditions and do bulk resets | ||
15 | * -- highmem | ||
16 | * -- move top_sense and work_bcs into separate allocations (if they survive) | ||
17 | * for cache purists and esoteric architectures. | ||
18 | * -- Allocate structure for LUN 0 before the first ub_sync_tur, avoid NULL. ? | ||
19 | * -- prune comments, they are too volumnous | ||
20 | * -- Resove XXX's | ||
21 | * -- CLEAR, CLR2STS, CLRRS seem to be ripe for refactoring. | ||
22 | */ | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/usb.h> | ||
26 | #include <linux/usb_usual.h> | ||
27 | #include <linux/blkdev.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/scatterlist.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/mutex.h> | ||
32 | #include <scsi/scsi.h> | ||
33 | |||
34 | #define DRV_NAME "ub" | ||
35 | |||
36 | #define UB_MAJOR 180 | ||
37 | |||
38 | /* | ||
39 | * The command state machine is the key model for understanding of this driver. | ||
40 | * | ||
41 | * The general rule is that all transitions are done towards the bottom | ||
42 | * of the diagram, thus preventing any loops. | ||
43 | * | ||
44 | * An exception to that is how the STAT state is handled. A counter allows it | ||
45 | * to be re-entered along the path marked with [C]. | ||
46 | * | ||
47 | * +--------+ | ||
48 | * ! INIT ! | ||
49 | * +--------+ | ||
50 | * ! | ||
51 | * ub_scsi_cmd_start fails ->--------------------------------------\ | ||
52 | * ! ! | ||
53 | * V ! | ||
54 | * +--------+ ! | ||
55 | * ! CMD ! ! | ||
56 | * +--------+ ! | ||
57 | * ! +--------+ ! | ||
58 | * was -EPIPE -->-------------------------------->! CLEAR ! ! | ||
59 | * ! +--------+ ! | ||
60 | * ! ! ! | ||
61 | * was error -->------------------------------------- ! --------->\ | ||
62 | * ! ! ! | ||
63 | * /--<-- cmd->dir == NONE ? ! ! | ||
64 | * ! ! ! ! | ||
65 | * ! V ! ! | ||
66 | * ! +--------+ ! ! | ||
67 | * ! ! DATA ! ! ! | ||
68 | * ! +--------+ ! ! | ||
69 | * ! ! +---------+ ! ! | ||
70 | * ! was -EPIPE -->--------------->! CLR2STS ! ! ! | ||
71 | * ! ! +---------+ ! ! | ||
72 | * ! ! ! ! ! | ||
73 | * ! ! was error -->---- ! --------->\ | ||
74 | * ! was error -->--------------------- ! ------------- ! --------->\ | ||
75 | * ! ! ! ! ! | ||
76 | * ! V ! ! ! | ||
77 | * \--->+--------+ ! ! ! | ||
78 | * ! STAT !<--------------------------/ ! ! | ||
79 | * /--->+--------+ ! ! | ||
80 | * ! ! ! ! | ||
81 | * [C] was -EPIPE -->-----------\ ! ! | ||
82 | * ! ! ! ! ! | ||
83 | * +<---- len == 0 ! ! ! | ||
84 | * ! ! ! ! ! | ||
85 | * ! was error -->--------------------------------------!---------->\ | ||
86 | * ! ! ! ! ! | ||
87 | * +<---- bad CSW ! ! ! | ||
88 | * +<---- bad tag ! ! ! | ||
89 | * ! ! V ! ! | ||
90 | * ! ! +--------+ ! ! | ||
91 | * ! ! ! CLRRS ! ! ! | ||
92 | * ! ! +--------+ ! ! | ||
93 | * ! ! ! ! ! | ||
94 | * \------- ! --------------------[C]--------\ ! ! | ||
95 | * ! ! ! ! | ||
96 | * cmd->error---\ +--------+ ! ! | ||
97 | * ! +--------------->! SENSE !<----------/ ! | ||
98 | * STAT_FAIL----/ +--------+ ! | ||
99 | * ! ! V | ||
100 | * ! V +--------+ | ||
101 | * \--------------------------------\--------------------->! DONE ! | ||
102 | * +--------+ | ||
103 | */ | ||
104 | |||
105 | /* | ||
106 | * This many LUNs per USB device. | ||
107 | * Every one of them takes a host, see UB_MAX_HOSTS. | ||
108 | */ | ||
109 | #define UB_MAX_LUNS 9 | ||
110 | |||
111 | /* | ||
112 | */ | ||
113 | |||
114 | #define UB_PARTS_PER_LUN 8 | ||
115 | |||
116 | #define UB_MAX_CDB_SIZE 16 /* Corresponds to Bulk */ | ||
117 | |||
118 | #define UB_SENSE_SIZE 18 | ||
119 | |||
120 | /* | ||
121 | */ | ||
122 | struct ub_dev; | ||
123 | |||
124 | #define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */ | ||
125 | #define UB_MAX_SECTORS 64 | ||
126 | |||
127 | /* | ||
128 | * A second is more than enough for a 32K transfer (UB_MAX_SECTORS) | ||
129 | * even if a webcam hogs the bus, but some devices need time to spin up. | ||
130 | */ | ||
131 | #define UB_URB_TIMEOUT (HZ*2) | ||
132 | #define UB_DATA_TIMEOUT (HZ*5) /* ZIP does spin-ups in the data phase */ | ||
133 | #define UB_STAT_TIMEOUT (HZ*5) /* Same spinups and eject for a dataless cmd. */ | ||
134 | #define UB_CTRL_TIMEOUT (HZ/2) /* 500ms ought to be enough to clear a stall */ | ||
135 | |||
136 | /* | ||
137 | * An instance of a SCSI command in transit. | ||
138 | */ | ||
139 | #define UB_DIR_NONE 0 | ||
140 | #define UB_DIR_READ 1 | ||
141 | #define UB_DIR_ILLEGAL2 2 | ||
142 | #define UB_DIR_WRITE 3 | ||
143 | |||
144 | #define UB_DIR_CHAR(c) (((c)==UB_DIR_WRITE)? 'w': \ | ||
145 | (((c)==UB_DIR_READ)? 'r': 'n')) | ||
146 | |||
147 | enum ub_scsi_cmd_state { | ||
148 | UB_CMDST_INIT, /* Initial state */ | ||
149 | UB_CMDST_CMD, /* Command submitted */ | ||
150 | UB_CMDST_DATA, /* Data phase */ | ||
151 | UB_CMDST_CLR2STS, /* Clearing before requesting status */ | ||
152 | UB_CMDST_STAT, /* Status phase */ | ||
153 | UB_CMDST_CLEAR, /* Clearing a stall (halt, actually) */ | ||
154 | UB_CMDST_CLRRS, /* Clearing before retrying status */ | ||
155 | UB_CMDST_SENSE, /* Sending Request Sense */ | ||
156 | UB_CMDST_DONE /* Final state */ | ||
157 | }; | ||
158 | |||
159 | struct ub_scsi_cmd { | ||
160 | unsigned char cdb[UB_MAX_CDB_SIZE]; | ||
161 | unsigned char cdb_len; | ||
162 | |||
163 | unsigned char dir; /* 0 - none, 1 - read, 3 - write. */ | ||
164 | enum ub_scsi_cmd_state state; | ||
165 | unsigned int tag; | ||
166 | struct ub_scsi_cmd *next; | ||
167 | |||
168 | int error; /* Return code - valid upon done */ | ||
169 | unsigned int act_len; /* Return size */ | ||
170 | unsigned char key, asc, ascq; /* May be valid if error==-EIO */ | ||
171 | |||
172 | int stat_count; /* Retries getting status. */ | ||
173 | unsigned int timeo; /* jiffies until rq->timeout changes */ | ||
174 | |||
175 | unsigned int len; /* Requested length */ | ||
176 | unsigned int current_sg; | ||
177 | unsigned int nsg; /* sgv[nsg] */ | ||
178 | struct scatterlist sgv[UB_MAX_REQ_SG]; | ||
179 | |||
180 | struct ub_lun *lun; | ||
181 | void (*done)(struct ub_dev *, struct ub_scsi_cmd *); | ||
182 | void *back; | ||
183 | }; | ||
184 | |||
185 | struct ub_request { | ||
186 | struct request *rq; | ||
187 | unsigned int current_try; | ||
188 | unsigned int nsg; /* sgv[nsg] */ | ||
189 | struct scatterlist sgv[UB_MAX_REQ_SG]; | ||
190 | }; | ||
191 | |||
192 | /* | ||
193 | */ | ||
194 | struct ub_capacity { | ||
195 | unsigned long nsec; /* Linux size - 512 byte sectors */ | ||
196 | unsigned int bsize; /* Linux hardsect_size */ | ||
197 | unsigned int bshift; /* Shift between 512 and hard sects */ | ||
198 | }; | ||
199 | |||
200 | /* | ||
201 | * This is a direct take-off from linux/include/completion.h | ||
202 | * The difference is that I do not wait on this thing, just poll. | ||
203 | * When I want to wait (ub_probe), I just use the stock completion. | ||
204 | * | ||
205 | * Note that INIT_COMPLETION takes no lock. It is correct. But why | ||
206 | * in the bloody hell that thing takes struct instead of pointer to struct | ||
207 | * is quite beyond me. I just copied it from the stock completion. | ||
208 | */ | ||
209 | struct ub_completion { | ||
210 | unsigned int done; | ||
211 | spinlock_t lock; | ||
212 | }; | ||
213 | |||
214 | static DEFINE_MUTEX(ub_mutex); | ||
215 | static inline void ub_init_completion(struct ub_completion *x) | ||
216 | { | ||
217 | x->done = 0; | ||
218 | spin_lock_init(&x->lock); | ||
219 | } | ||
220 | |||
221 | #define UB_INIT_COMPLETION(x) ((x).done = 0) | ||
222 | |||
223 | static void ub_complete(struct ub_completion *x) | ||
224 | { | ||
225 | unsigned long flags; | ||
226 | |||
227 | spin_lock_irqsave(&x->lock, flags); | ||
228 | x->done++; | ||
229 | spin_unlock_irqrestore(&x->lock, flags); | ||
230 | } | ||
231 | |||
232 | static int ub_is_completed(struct ub_completion *x) | ||
233 | { | ||
234 | unsigned long flags; | ||
235 | int ret; | ||
236 | |||
237 | spin_lock_irqsave(&x->lock, flags); | ||
238 | ret = x->done; | ||
239 | spin_unlock_irqrestore(&x->lock, flags); | ||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | */ | ||
245 | struct ub_scsi_cmd_queue { | ||
246 | int qlen, qmax; | ||
247 | struct ub_scsi_cmd *head, *tail; | ||
248 | }; | ||
249 | |||
250 | /* | ||
251 | * The block device instance (one per LUN). | ||
252 | */ | ||
253 | struct ub_lun { | ||
254 | struct ub_dev *udev; | ||
255 | struct list_head link; | ||
256 | struct gendisk *disk; | ||
257 | int id; /* Host index */ | ||
258 | int num; /* LUN number */ | ||
259 | char name[16]; | ||
260 | |||
261 | int changed; /* Media was changed */ | ||
262 | int removable; | ||
263 | int readonly; | ||
264 | |||
265 | struct ub_request urq; | ||
266 | |||
267 | /* Use Ingo's mempool if or when we have more than one command. */ | ||
268 | /* | ||
269 | * Currently we never need more than one command for the whole device. | ||
270 | * However, giving every LUN a command is a cheap and automatic way | ||
271 | * to enforce fairness between them. | ||
272 | */ | ||
273 | int cmda[1]; | ||
274 | struct ub_scsi_cmd cmdv[1]; | ||
275 | |||
276 | struct ub_capacity capacity; | ||
277 | }; | ||
278 | |||
279 | /* | ||
280 | * The USB device instance. | ||
281 | */ | ||
282 | struct ub_dev { | ||
283 | spinlock_t *lock; | ||
284 | atomic_t poison; /* The USB device is disconnected */ | ||
285 | int openc; /* protected by ub_lock! */ | ||
286 | /* kref is too implicit for our taste */ | ||
287 | int reset; /* Reset is running */ | ||
288 | int bad_resid; | ||
289 | unsigned int tagcnt; | ||
290 | char name[12]; | ||
291 | struct usb_device *dev; | ||
292 | struct usb_interface *intf; | ||
293 | |||
294 | struct list_head luns; | ||
295 | |||
296 | unsigned int send_bulk_pipe; /* cached pipe values */ | ||
297 | unsigned int recv_bulk_pipe; | ||
298 | unsigned int send_ctrl_pipe; | ||
299 | unsigned int recv_ctrl_pipe; | ||
300 | |||
301 | struct tasklet_struct tasklet; | ||
302 | |||
303 | struct ub_scsi_cmd_queue cmd_queue; | ||
304 | struct ub_scsi_cmd top_rqs_cmd; /* REQUEST SENSE */ | ||
305 | unsigned char top_sense[UB_SENSE_SIZE]; | ||
306 | |||
307 | struct ub_completion work_done; | ||
308 | struct urb work_urb; | ||
309 | struct timer_list work_timer; | ||
310 | int last_pipe; /* What might need clearing */ | ||
311 | __le32 signature; /* Learned signature */ | ||
312 | struct bulk_cb_wrap work_bcb; | ||
313 | struct bulk_cs_wrap work_bcs; | ||
314 | struct usb_ctrlrequest work_cr; | ||
315 | |||
316 | struct work_struct reset_work; | ||
317 | wait_queue_head_t reset_wait; | ||
318 | }; | ||
319 | |||
320 | /* | ||
321 | */ | ||
322 | static void ub_cleanup(struct ub_dev *sc); | ||
323 | static int ub_request_fn_1(struct ub_lun *lun, struct request *rq); | ||
324 | static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, | ||
325 | struct ub_scsi_cmd *cmd, struct ub_request *urq); | ||
326 | static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun, | ||
327 | struct ub_scsi_cmd *cmd, struct ub_request *urq); | ||
328 | static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
329 | static void ub_end_rq(struct request *rq, unsigned int status); | ||
330 | static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun, | ||
331 | struct ub_request *urq, struct ub_scsi_cmd *cmd); | ||
332 | static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
333 | static void ub_urb_complete(struct urb *urb); | ||
334 | static void ub_scsi_action(unsigned long _dev); | ||
335 | static void ub_scsi_dispatch(struct ub_dev *sc); | ||
336 | static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
337 | static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
338 | static void ub_state_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd, int rc); | ||
339 | static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
340 | static void ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
341 | static void ub_state_stat_counted(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
342 | static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd); | ||
343 | static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd, | ||
344 | int stalled_pipe); | ||
345 | static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd); | ||
346 | static void ub_reset_enter(struct ub_dev *sc, int try); | ||
347 | static void ub_reset_task(struct work_struct *work); | ||
348 | static int ub_sync_tur(struct ub_dev *sc, struct ub_lun *lun); | ||
349 | static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun, | ||
350 | struct ub_capacity *ret); | ||
351 | static int ub_sync_reset(struct ub_dev *sc); | ||
352 | static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe); | ||
353 | static int ub_probe_lun(struct ub_dev *sc, int lnum); | ||
354 | |||
355 | /* | ||
356 | */ | ||
357 | #ifdef CONFIG_USB_LIBUSUAL | ||
358 | |||
359 | #define ub_usb_ids usb_storage_usb_ids | ||
360 | #else | ||
361 | |||
362 | static const struct usb_device_id ub_usb_ids[] = { | ||
363 | { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, USB_SC_SCSI, USB_PR_BULK) }, | ||
364 | { } | ||
365 | }; | ||
366 | |||
367 | MODULE_DEVICE_TABLE(usb, ub_usb_ids); | ||
368 | #endif /* CONFIG_USB_LIBUSUAL */ | ||
369 | |||
370 | /* | ||
371 | * Find me a way to identify "next free minor" for add_disk(), | ||
372 | * and the array disappears the next day. However, the number of | ||
373 | * hosts has something to do with the naming and /proc/partitions. | ||
374 | * This has to be thought out in detail before changing. | ||
375 | * If UB_MAX_HOST was 1000, we'd use a bitmap. Or a better data structure. | ||
376 | */ | ||
377 | #define UB_MAX_HOSTS 26 | ||
378 | static char ub_hostv[UB_MAX_HOSTS]; | ||
379 | |||
380 | #define UB_QLOCK_NUM 5 | ||
381 | static spinlock_t ub_qlockv[UB_QLOCK_NUM]; | ||
382 | static int ub_qlock_next = 0; | ||
383 | |||
384 | static DEFINE_SPINLOCK(ub_lock); /* Locks globals and ->openc */ | ||
385 | |||
386 | /* | ||
387 | * The id allocator. | ||
388 | * | ||
389 | * This also stores the host for indexing by minor, which is somewhat dirty. | ||
390 | */ | ||
391 | static int ub_id_get(void) | ||
392 | { | ||
393 | unsigned long flags; | ||
394 | int i; | ||
395 | |||
396 | spin_lock_irqsave(&ub_lock, flags); | ||
397 | for (i = 0; i < UB_MAX_HOSTS; i++) { | ||
398 | if (ub_hostv[i] == 0) { | ||
399 | ub_hostv[i] = 1; | ||
400 | spin_unlock_irqrestore(&ub_lock, flags); | ||
401 | return i; | ||
402 | } | ||
403 | } | ||
404 | spin_unlock_irqrestore(&ub_lock, flags); | ||
405 | return -1; | ||
406 | } | ||
407 | |||
408 | static void ub_id_put(int id) | ||
409 | { | ||
410 | unsigned long flags; | ||
411 | |||
412 | if (id < 0 || id >= UB_MAX_HOSTS) { | ||
413 | printk(KERN_ERR DRV_NAME ": bad host ID %d\n", id); | ||
414 | return; | ||
415 | } | ||
416 | |||
417 | spin_lock_irqsave(&ub_lock, flags); | ||
418 | if (ub_hostv[id] == 0) { | ||
419 | spin_unlock_irqrestore(&ub_lock, flags); | ||
420 | printk(KERN_ERR DRV_NAME ": freeing free host ID %d\n", id); | ||
421 | return; | ||
422 | } | ||
423 | ub_hostv[id] = 0; | ||
424 | spin_unlock_irqrestore(&ub_lock, flags); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * This is necessitated by the fact that blk_cleanup_queue does not | ||
429 | * necesserily destroy the queue. Instead, it may merely decrease q->refcnt. | ||
430 | * Since our blk_init_queue() passes a spinlock common with ub_dev, | ||
431 | * we have life time issues when ub_cleanup frees ub_dev. | ||
432 | */ | ||
433 | static spinlock_t *ub_next_lock(void) | ||
434 | { | ||
435 | unsigned long flags; | ||
436 | spinlock_t *ret; | ||
437 | |||
438 | spin_lock_irqsave(&ub_lock, flags); | ||
439 | ret = &ub_qlockv[ub_qlock_next]; | ||
440 | ub_qlock_next = (ub_qlock_next + 1) % UB_QLOCK_NUM; | ||
441 | spin_unlock_irqrestore(&ub_lock, flags); | ||
442 | return ret; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * Downcount for deallocation. This rides on two assumptions: | ||
447 | * - once something is poisoned, its refcount cannot grow | ||
448 | * - opens cannot happen at this time (del_gendisk was done) | ||
449 | * If the above is true, we can drop the lock, which we need for | ||
450 | * blk_cleanup_queue(): the silly thing may attempt to sleep. | ||
451 | * [Actually, it never needs to sleep for us, but it calls might_sleep()] | ||
452 | */ | ||
453 | static void ub_put(struct ub_dev *sc) | ||
454 | { | ||
455 | unsigned long flags; | ||
456 | |||
457 | spin_lock_irqsave(&ub_lock, flags); | ||
458 | --sc->openc; | ||
459 | if (sc->openc == 0 && atomic_read(&sc->poison)) { | ||
460 | spin_unlock_irqrestore(&ub_lock, flags); | ||
461 | ub_cleanup(sc); | ||
462 | } else { | ||
463 | spin_unlock_irqrestore(&ub_lock, flags); | ||
464 | } | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Final cleanup and deallocation. | ||
469 | */ | ||
470 | static void ub_cleanup(struct ub_dev *sc) | ||
471 | { | ||
472 | struct list_head *p; | ||
473 | struct ub_lun *lun; | ||
474 | struct request_queue *q; | ||
475 | |||
476 | while (!list_empty(&sc->luns)) { | ||
477 | p = sc->luns.next; | ||
478 | lun = list_entry(p, struct ub_lun, link); | ||
479 | list_del(p); | ||
480 | |||
481 | /* I don't think queue can be NULL. But... Stolen from sx8.c */ | ||
482 | if ((q = lun->disk->queue) != NULL) | ||
483 | blk_cleanup_queue(q); | ||
484 | /* | ||
485 | * If we zero disk->private_data BEFORE put_disk, we have | ||
486 | * to check for NULL all over the place in open, release, | ||
487 | * check_media and revalidate, because the block level | ||
488 | * semaphore is well inside the put_disk. | ||
489 | * But we cannot zero after the call, because *disk is gone. | ||
490 | * The sd.c is blatantly racy in this area. | ||
491 | */ | ||
492 | /* disk->private_data = NULL; */ | ||
493 | put_disk(lun->disk); | ||
494 | lun->disk = NULL; | ||
495 | |||
496 | ub_id_put(lun->id); | ||
497 | kfree(lun); | ||
498 | } | ||
499 | |||
500 | usb_set_intfdata(sc->intf, NULL); | ||
501 | usb_put_intf(sc->intf); | ||
502 | usb_put_dev(sc->dev); | ||
503 | kfree(sc); | ||
504 | } | ||
505 | |||
506 | /* | ||
507 | * The "command allocator". | ||
508 | */ | ||
509 | static struct ub_scsi_cmd *ub_get_cmd(struct ub_lun *lun) | ||
510 | { | ||
511 | struct ub_scsi_cmd *ret; | ||
512 | |||
513 | if (lun->cmda[0]) | ||
514 | return NULL; | ||
515 | ret = &lun->cmdv[0]; | ||
516 | lun->cmda[0] = 1; | ||
517 | return ret; | ||
518 | } | ||
519 | |||
520 | static void ub_put_cmd(struct ub_lun *lun, struct ub_scsi_cmd *cmd) | ||
521 | { | ||
522 | if (cmd != &lun->cmdv[0]) { | ||
523 | printk(KERN_WARNING "%s: releasing a foreign cmd %p\n", | ||
524 | lun->name, cmd); | ||
525 | return; | ||
526 | } | ||
527 | if (!lun->cmda[0]) { | ||
528 | printk(KERN_WARNING "%s: releasing a free cmd\n", lun->name); | ||
529 | return; | ||
530 | } | ||
531 | lun->cmda[0] = 0; | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * The command queue. | ||
536 | */ | ||
537 | static void ub_cmdq_add(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
538 | { | ||
539 | struct ub_scsi_cmd_queue *t = &sc->cmd_queue; | ||
540 | |||
541 | if (t->qlen++ == 0) { | ||
542 | t->head = cmd; | ||
543 | t->tail = cmd; | ||
544 | } else { | ||
545 | t->tail->next = cmd; | ||
546 | t->tail = cmd; | ||
547 | } | ||
548 | |||
549 | if (t->qlen > t->qmax) | ||
550 | t->qmax = t->qlen; | ||
551 | } | ||
552 | |||
553 | static void ub_cmdq_insert(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
554 | { | ||
555 | struct ub_scsi_cmd_queue *t = &sc->cmd_queue; | ||
556 | |||
557 | if (t->qlen++ == 0) { | ||
558 | t->head = cmd; | ||
559 | t->tail = cmd; | ||
560 | } else { | ||
561 | cmd->next = t->head; | ||
562 | t->head = cmd; | ||
563 | } | ||
564 | |||
565 | if (t->qlen > t->qmax) | ||
566 | t->qmax = t->qlen; | ||
567 | } | ||
568 | |||
569 | static struct ub_scsi_cmd *ub_cmdq_pop(struct ub_dev *sc) | ||
570 | { | ||
571 | struct ub_scsi_cmd_queue *t = &sc->cmd_queue; | ||
572 | struct ub_scsi_cmd *cmd; | ||
573 | |||
574 | if (t->qlen == 0) | ||
575 | return NULL; | ||
576 | if (--t->qlen == 0) | ||
577 | t->tail = NULL; | ||
578 | cmd = t->head; | ||
579 | t->head = cmd->next; | ||
580 | cmd->next = NULL; | ||
581 | return cmd; | ||
582 | } | ||
583 | |||
584 | #define ub_cmdq_peek(sc) ((sc)->cmd_queue.head) | ||
585 | |||
586 | /* | ||
587 | * The request function is our main entry point | ||
588 | */ | ||
589 | |||
590 | static void ub_request_fn(struct request_queue *q) | ||
591 | { | ||
592 | struct ub_lun *lun = q->queuedata; | ||
593 | struct request *rq; | ||
594 | |||
595 | while ((rq = blk_peek_request(q)) != NULL) { | ||
596 | if (ub_request_fn_1(lun, rq) != 0) { | ||
597 | blk_stop_queue(q); | ||
598 | break; | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | |||
603 | static int ub_request_fn_1(struct ub_lun *lun, struct request *rq) | ||
604 | { | ||
605 | struct ub_dev *sc = lun->udev; | ||
606 | struct ub_scsi_cmd *cmd; | ||
607 | struct ub_request *urq; | ||
608 | int n_elem; | ||
609 | |||
610 | if (atomic_read(&sc->poison)) { | ||
611 | blk_start_request(rq); | ||
612 | ub_end_rq(rq, DID_NO_CONNECT << 16); | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | if (lun->changed && rq->cmd_type != REQ_TYPE_BLOCK_PC) { | ||
617 | blk_start_request(rq); | ||
618 | ub_end_rq(rq, SAM_STAT_CHECK_CONDITION); | ||
619 | return 0; | ||
620 | } | ||
621 | |||
622 | if (lun->urq.rq != NULL) | ||
623 | return -1; | ||
624 | if ((cmd = ub_get_cmd(lun)) == NULL) | ||
625 | return -1; | ||
626 | memset(cmd, 0, sizeof(struct ub_scsi_cmd)); | ||
627 | |||
628 | blk_start_request(rq); | ||
629 | |||
630 | urq = &lun->urq; | ||
631 | memset(urq, 0, sizeof(struct ub_request)); | ||
632 | urq->rq = rq; | ||
633 | |||
634 | /* | ||
635 | * get scatterlist from block layer | ||
636 | */ | ||
637 | sg_init_table(&urq->sgv[0], UB_MAX_REQ_SG); | ||
638 | n_elem = blk_rq_map_sg(lun->disk->queue, rq, &urq->sgv[0]); | ||
639 | if (n_elem < 0) { | ||
640 | /* Impossible, because blk_rq_map_sg should not hit ENOMEM. */ | ||
641 | printk(KERN_INFO "%s: failed request map (%d)\n", | ||
642 | lun->name, n_elem); | ||
643 | goto drop; | ||
644 | } | ||
645 | if (n_elem > UB_MAX_REQ_SG) { /* Paranoia */ | ||
646 | printk(KERN_WARNING "%s: request with %d segments\n", | ||
647 | lun->name, n_elem); | ||
648 | goto drop; | ||
649 | } | ||
650 | urq->nsg = n_elem; | ||
651 | |||
652 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | ||
653 | ub_cmd_build_packet(sc, lun, cmd, urq); | ||
654 | } else { | ||
655 | ub_cmd_build_block(sc, lun, cmd, urq); | ||
656 | } | ||
657 | cmd->state = UB_CMDST_INIT; | ||
658 | cmd->lun = lun; | ||
659 | cmd->done = ub_rw_cmd_done; | ||
660 | cmd->back = urq; | ||
661 | |||
662 | cmd->tag = sc->tagcnt++; | ||
663 | if (ub_submit_scsi(sc, cmd) != 0) | ||
664 | goto drop; | ||
665 | |||
666 | return 0; | ||
667 | |||
668 | drop: | ||
669 | ub_put_cmd(lun, cmd); | ||
670 | ub_end_rq(rq, DID_ERROR << 16); | ||
671 | return 0; | ||
672 | } | ||
673 | |||
674 | static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, | ||
675 | struct ub_scsi_cmd *cmd, struct ub_request *urq) | ||
676 | { | ||
677 | struct request *rq = urq->rq; | ||
678 | unsigned int block, nblks; | ||
679 | |||
680 | if (rq_data_dir(rq) == WRITE) | ||
681 | cmd->dir = UB_DIR_WRITE; | ||
682 | else | ||
683 | cmd->dir = UB_DIR_READ; | ||
684 | |||
685 | cmd->nsg = urq->nsg; | ||
686 | memcpy(cmd->sgv, urq->sgv, sizeof(struct scatterlist) * cmd->nsg); | ||
687 | |||
688 | /* | ||
689 | * build the command | ||
690 | * | ||
691 | * The call to blk_queue_logical_block_size() guarantees that request | ||
692 | * is aligned, but it is given in terms of 512 byte units, always. | ||
693 | */ | ||
694 | block = blk_rq_pos(rq) >> lun->capacity.bshift; | ||
695 | nblks = blk_rq_sectors(rq) >> lun->capacity.bshift; | ||
696 | |||
697 | cmd->cdb[0] = (cmd->dir == UB_DIR_READ)? READ_10: WRITE_10; | ||
698 | /* 10-byte uses 4 bytes of LBA: 2147483648KB, 2097152MB, 2048GB */ | ||
699 | cmd->cdb[2] = block >> 24; | ||
700 | cmd->cdb[3] = block >> 16; | ||
701 | cmd->cdb[4] = block >> 8; | ||
702 | cmd->cdb[5] = block; | ||
703 | cmd->cdb[7] = nblks >> 8; | ||
704 | cmd->cdb[8] = nblks; | ||
705 | cmd->cdb_len = 10; | ||
706 | |||
707 | cmd->len = blk_rq_bytes(rq); | ||
708 | } | ||
709 | |||
710 | static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun, | ||
711 | struct ub_scsi_cmd *cmd, struct ub_request *urq) | ||
712 | { | ||
713 | struct request *rq = urq->rq; | ||
714 | |||
715 | if (blk_rq_bytes(rq) == 0) { | ||
716 | cmd->dir = UB_DIR_NONE; | ||
717 | } else { | ||
718 | if (rq_data_dir(rq) == WRITE) | ||
719 | cmd->dir = UB_DIR_WRITE; | ||
720 | else | ||
721 | cmd->dir = UB_DIR_READ; | ||
722 | } | ||
723 | |||
724 | cmd->nsg = urq->nsg; | ||
725 | memcpy(cmd->sgv, urq->sgv, sizeof(struct scatterlist) * cmd->nsg); | ||
726 | |||
727 | memcpy(&cmd->cdb, rq->cmd, rq->cmd_len); | ||
728 | cmd->cdb_len = rq->cmd_len; | ||
729 | |||
730 | cmd->len = blk_rq_bytes(rq); | ||
731 | |||
732 | /* | ||
733 | * To reapply this to every URB is not as incorrect as it looks. | ||
734 | * In return, we avoid any complicated tracking calculations. | ||
735 | */ | ||
736 | cmd->timeo = rq->timeout; | ||
737 | } | ||
738 | |||
739 | static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
740 | { | ||
741 | struct ub_lun *lun = cmd->lun; | ||
742 | struct ub_request *urq = cmd->back; | ||
743 | struct request *rq; | ||
744 | unsigned int scsi_status; | ||
745 | |||
746 | rq = urq->rq; | ||
747 | |||
748 | if (cmd->error == 0) { | ||
749 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | ||
750 | if (cmd->act_len >= rq->resid_len) | ||
751 | rq->resid_len = 0; | ||
752 | else | ||
753 | rq->resid_len -= cmd->act_len; | ||
754 | scsi_status = 0; | ||
755 | } else { | ||
756 | if (cmd->act_len != cmd->len) { | ||
757 | scsi_status = SAM_STAT_CHECK_CONDITION; | ||
758 | } else { | ||
759 | scsi_status = 0; | ||
760 | } | ||
761 | } | ||
762 | } else { | ||
763 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | ||
764 | /* UB_SENSE_SIZE is smaller than SCSI_SENSE_BUFFERSIZE */ | ||
765 | memcpy(rq->sense, sc->top_sense, UB_SENSE_SIZE); | ||
766 | rq->sense_len = UB_SENSE_SIZE; | ||
767 | if (sc->top_sense[0] != 0) | ||
768 | scsi_status = SAM_STAT_CHECK_CONDITION; | ||
769 | else | ||
770 | scsi_status = DID_ERROR << 16; | ||
771 | } else { | ||
772 | if (cmd->error == -EIO && | ||
773 | (cmd->key == 0 || | ||
774 | cmd->key == MEDIUM_ERROR || | ||
775 | cmd->key == UNIT_ATTENTION)) { | ||
776 | if (ub_rw_cmd_retry(sc, lun, urq, cmd) == 0) | ||
777 | return; | ||
778 | } | ||
779 | scsi_status = SAM_STAT_CHECK_CONDITION; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | urq->rq = NULL; | ||
784 | |||
785 | ub_put_cmd(lun, cmd); | ||
786 | ub_end_rq(rq, scsi_status); | ||
787 | blk_start_queue(lun->disk->queue); | ||
788 | } | ||
789 | |||
790 | static void ub_end_rq(struct request *rq, unsigned int scsi_status) | ||
791 | { | ||
792 | int error; | ||
793 | |||
794 | if (scsi_status == 0) { | ||
795 | error = 0; | ||
796 | } else { | ||
797 | error = -EIO; | ||
798 | rq->errors = scsi_status; | ||
799 | } | ||
800 | __blk_end_request_all(rq, error); | ||
801 | } | ||
802 | |||
803 | static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun, | ||
804 | struct ub_request *urq, struct ub_scsi_cmd *cmd) | ||
805 | { | ||
806 | |||
807 | if (atomic_read(&sc->poison)) | ||
808 | return -ENXIO; | ||
809 | |||
810 | ub_reset_enter(sc, urq->current_try); | ||
811 | |||
812 | if (urq->current_try >= 3) | ||
813 | return -EIO; | ||
814 | urq->current_try++; | ||
815 | |||
816 | /* Remove this if anyone complains of flooding. */ | ||
817 | printk(KERN_DEBUG "%s: dir %c len/act %d/%d " | ||
818 | "[sense %x %02x %02x] retry %d\n", | ||
819 | sc->name, UB_DIR_CHAR(cmd->dir), cmd->len, cmd->act_len, | ||
820 | cmd->key, cmd->asc, cmd->ascq, urq->current_try); | ||
821 | |||
822 | memset(cmd, 0, sizeof(struct ub_scsi_cmd)); | ||
823 | ub_cmd_build_block(sc, lun, cmd, urq); | ||
824 | |||
825 | cmd->state = UB_CMDST_INIT; | ||
826 | cmd->lun = lun; | ||
827 | cmd->done = ub_rw_cmd_done; | ||
828 | cmd->back = urq; | ||
829 | |||
830 | cmd->tag = sc->tagcnt++; | ||
831 | |||
832 | #if 0 /* Wasteful */ | ||
833 | return ub_submit_scsi(sc, cmd); | ||
834 | #else | ||
835 | ub_cmdq_add(sc, cmd); | ||
836 | return 0; | ||
837 | #endif | ||
838 | } | ||
839 | |||
840 | /* | ||
841 | * Submit a regular SCSI operation (not an auto-sense). | ||
842 | * | ||
843 | * The Iron Law of Good Submit Routine is: | ||
844 | * Zero return - callback is done, Nonzero return - callback is not done. | ||
845 | * No exceptions. | ||
846 | * | ||
847 | * Host is assumed locked. | ||
848 | */ | ||
849 | static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
850 | { | ||
851 | |||
852 | if (cmd->state != UB_CMDST_INIT || | ||
853 | (cmd->dir != UB_DIR_NONE && cmd->len == 0)) { | ||
854 | return -EINVAL; | ||
855 | } | ||
856 | |||
857 | ub_cmdq_add(sc, cmd); | ||
858 | /* | ||
859 | * We can call ub_scsi_dispatch(sc) right away here, but it's a little | ||
860 | * safer to jump to a tasklet, in case upper layers do something silly. | ||
861 | */ | ||
862 | tasklet_schedule(&sc->tasklet); | ||
863 | return 0; | ||
864 | } | ||
865 | |||
866 | /* | ||
867 | * Submit the first URB for the queued command. | ||
868 | * This function does not deal with queueing in any way. | ||
869 | */ | ||
870 | static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
871 | { | ||
872 | struct bulk_cb_wrap *bcb; | ||
873 | int rc; | ||
874 | |||
875 | bcb = &sc->work_bcb; | ||
876 | |||
877 | /* | ||
878 | * ``If the allocation length is eighteen or greater, and a device | ||
879 | * server returns less than eithteen bytes of data, the application | ||
880 | * client should assume that the bytes not transferred would have been | ||
881 | * zeroes had the device server returned those bytes.'' | ||
882 | * | ||
883 | * We zero sense for all commands so that when a packet request | ||
884 | * fails it does not return a stale sense. | ||
885 | */ | ||
886 | memset(&sc->top_sense, 0, UB_SENSE_SIZE); | ||
887 | |||
888 | /* set up the command wrapper */ | ||
889 | bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); | ||
890 | bcb->Tag = cmd->tag; /* Endianness is not important */ | ||
891 | bcb->DataTransferLength = cpu_to_le32(cmd->len); | ||
892 | bcb->Flags = (cmd->dir == UB_DIR_READ) ? 0x80 : 0; | ||
893 | bcb->Lun = (cmd->lun != NULL) ? cmd->lun->num : 0; | ||
894 | bcb->Length = cmd->cdb_len; | ||
895 | |||
896 | /* copy the command payload */ | ||
897 | memcpy(bcb->CDB, cmd->cdb, UB_MAX_CDB_SIZE); | ||
898 | |||
899 | UB_INIT_COMPLETION(sc->work_done); | ||
900 | |||
901 | sc->last_pipe = sc->send_bulk_pipe; | ||
902 | usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->send_bulk_pipe, | ||
903 | bcb, US_BULK_CB_WRAP_LEN, ub_urb_complete, sc); | ||
904 | |||
905 | if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { | ||
906 | /* XXX Clear stalls */ | ||
907 | ub_complete(&sc->work_done); | ||
908 | return rc; | ||
909 | } | ||
910 | |||
911 | sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; | ||
912 | add_timer(&sc->work_timer); | ||
913 | |||
914 | cmd->state = UB_CMDST_CMD; | ||
915 | return 0; | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * Timeout handler. | ||
920 | */ | ||
921 | static void ub_urb_timeout(unsigned long arg) | ||
922 | { | ||
923 | struct ub_dev *sc = (struct ub_dev *) arg; | ||
924 | unsigned long flags; | ||
925 | |||
926 | spin_lock_irqsave(sc->lock, flags); | ||
927 | if (!ub_is_completed(&sc->work_done)) | ||
928 | usb_unlink_urb(&sc->work_urb); | ||
929 | spin_unlock_irqrestore(sc->lock, flags); | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * Completion routine for the work URB. | ||
934 | * | ||
935 | * This can be called directly from usb_submit_urb (while we have | ||
936 | * the sc->lock taken) and from an interrupt (while we do NOT have | ||
937 | * the sc->lock taken). Therefore, bounce this off to a tasklet. | ||
938 | */ | ||
939 | static void ub_urb_complete(struct urb *urb) | ||
940 | { | ||
941 | struct ub_dev *sc = urb->context; | ||
942 | |||
943 | ub_complete(&sc->work_done); | ||
944 | tasklet_schedule(&sc->tasklet); | ||
945 | } | ||
946 | |||
947 | static void ub_scsi_action(unsigned long _dev) | ||
948 | { | ||
949 | struct ub_dev *sc = (struct ub_dev *) _dev; | ||
950 | unsigned long flags; | ||
951 | |||
952 | spin_lock_irqsave(sc->lock, flags); | ||
953 | ub_scsi_dispatch(sc); | ||
954 | spin_unlock_irqrestore(sc->lock, flags); | ||
955 | } | ||
956 | |||
957 | static void ub_scsi_dispatch(struct ub_dev *sc) | ||
958 | { | ||
959 | struct ub_scsi_cmd *cmd; | ||
960 | int rc; | ||
961 | |||
962 | while (!sc->reset && (cmd = ub_cmdq_peek(sc)) != NULL) { | ||
963 | if (cmd->state == UB_CMDST_DONE) { | ||
964 | ub_cmdq_pop(sc); | ||
965 | (*cmd->done)(sc, cmd); | ||
966 | } else if (cmd->state == UB_CMDST_INIT) { | ||
967 | if ((rc = ub_scsi_cmd_start(sc, cmd)) == 0) | ||
968 | break; | ||
969 | cmd->error = rc; | ||
970 | cmd->state = UB_CMDST_DONE; | ||
971 | } else { | ||
972 | if (!ub_is_completed(&sc->work_done)) | ||
973 | break; | ||
974 | del_timer(&sc->work_timer); | ||
975 | ub_scsi_urb_compl(sc, cmd); | ||
976 | } | ||
977 | } | ||
978 | } | ||
979 | |||
980 | static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
981 | { | ||
982 | struct urb *urb = &sc->work_urb; | ||
983 | struct bulk_cs_wrap *bcs; | ||
984 | int endp; | ||
985 | int len; | ||
986 | int rc; | ||
987 | |||
988 | if (atomic_read(&sc->poison)) { | ||
989 | ub_state_done(sc, cmd, -ENODEV); | ||
990 | return; | ||
991 | } | ||
992 | |||
993 | endp = usb_pipeendpoint(sc->last_pipe); | ||
994 | if (usb_pipein(sc->last_pipe)) | ||
995 | endp |= USB_DIR_IN; | ||
996 | |||
997 | if (cmd->state == UB_CMDST_CLEAR) { | ||
998 | if (urb->status == -EPIPE) { | ||
999 | /* | ||
1000 | * STALL while clearning STALL. | ||
1001 | * The control pipe clears itself - nothing to do. | ||
1002 | */ | ||
1003 | printk(KERN_NOTICE "%s: stall on control pipe\n", | ||
1004 | sc->name); | ||
1005 | goto Bad_End; | ||
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * We ignore the result for the halt clear. | ||
1010 | */ | ||
1011 | |||
1012 | usb_reset_endpoint(sc->dev, endp); | ||
1013 | |||
1014 | ub_state_sense(sc, cmd); | ||
1015 | |||
1016 | } else if (cmd->state == UB_CMDST_CLR2STS) { | ||
1017 | if (urb->status == -EPIPE) { | ||
1018 | printk(KERN_NOTICE "%s: stall on control pipe\n", | ||
1019 | sc->name); | ||
1020 | goto Bad_End; | ||
1021 | } | ||
1022 | |||
1023 | /* | ||
1024 | * We ignore the result for the halt clear. | ||
1025 | */ | ||
1026 | |||
1027 | usb_reset_endpoint(sc->dev, endp); | ||
1028 | |||
1029 | ub_state_stat(sc, cmd); | ||
1030 | |||
1031 | } else if (cmd->state == UB_CMDST_CLRRS) { | ||
1032 | if (urb->status == -EPIPE) { | ||
1033 | printk(KERN_NOTICE "%s: stall on control pipe\n", | ||
1034 | sc->name); | ||
1035 | goto Bad_End; | ||
1036 | } | ||
1037 | |||
1038 | /* | ||
1039 | * We ignore the result for the halt clear. | ||
1040 | */ | ||
1041 | |||
1042 | usb_reset_endpoint(sc->dev, endp); | ||
1043 | |||
1044 | ub_state_stat_counted(sc, cmd); | ||
1045 | |||
1046 | } else if (cmd->state == UB_CMDST_CMD) { | ||
1047 | switch (urb->status) { | ||
1048 | case 0: | ||
1049 | break; | ||
1050 | case -EOVERFLOW: | ||
1051 | goto Bad_End; | ||
1052 | case -EPIPE: | ||
1053 | rc = ub_submit_clear_stall(sc, cmd, sc->last_pipe); | ||
1054 | if (rc != 0) { | ||
1055 | printk(KERN_NOTICE "%s: " | ||
1056 | "unable to submit clear (%d)\n", | ||
1057 | sc->name, rc); | ||
1058 | /* | ||
1059 | * This is typically ENOMEM or some other such shit. | ||
1060 | * Retrying is pointless. Just do Bad End on it... | ||
1061 | */ | ||
1062 | ub_state_done(sc, cmd, rc); | ||
1063 | return; | ||
1064 | } | ||
1065 | cmd->state = UB_CMDST_CLEAR; | ||
1066 | return; | ||
1067 | case -ESHUTDOWN: /* unplug */ | ||
1068 | case -EILSEQ: /* unplug timeout on uhci */ | ||
1069 | ub_state_done(sc, cmd, -ENODEV); | ||
1070 | return; | ||
1071 | default: | ||
1072 | goto Bad_End; | ||
1073 | } | ||
1074 | if (urb->actual_length != US_BULK_CB_WRAP_LEN) { | ||
1075 | goto Bad_End; | ||
1076 | } | ||
1077 | |||
1078 | if (cmd->dir == UB_DIR_NONE || cmd->nsg < 1) { | ||
1079 | ub_state_stat(sc, cmd); | ||
1080 | return; | ||
1081 | } | ||
1082 | |||
1083 | // udelay(125); // usb-storage has this | ||
1084 | ub_data_start(sc, cmd); | ||
1085 | |||
1086 | } else if (cmd->state == UB_CMDST_DATA) { | ||
1087 | if (urb->status == -EPIPE) { | ||
1088 | rc = ub_submit_clear_stall(sc, cmd, sc->last_pipe); | ||
1089 | if (rc != 0) { | ||
1090 | printk(KERN_NOTICE "%s: " | ||
1091 | "unable to submit clear (%d)\n", | ||
1092 | sc->name, rc); | ||
1093 | ub_state_done(sc, cmd, rc); | ||
1094 | return; | ||
1095 | } | ||
1096 | cmd->state = UB_CMDST_CLR2STS; | ||
1097 | return; | ||
1098 | } | ||
1099 | if (urb->status == -EOVERFLOW) { | ||
1100 | /* | ||
1101 | * A babble? Failure, but we must transfer CSW now. | ||
1102 | */ | ||
1103 | cmd->error = -EOVERFLOW; /* A cheap trick... */ | ||
1104 | ub_state_stat(sc, cmd); | ||
1105 | return; | ||
1106 | } | ||
1107 | |||
1108 | if (cmd->dir == UB_DIR_WRITE) { | ||
1109 | /* | ||
1110 | * Do not continue writes in case of a failure. | ||
1111 | * Doing so would cause sectors to be mixed up, | ||
1112 | * which is worse than sectors lost. | ||
1113 | * | ||
1114 | * We must try to read the CSW, or many devices | ||
1115 | * get confused. | ||
1116 | */ | ||
1117 | len = urb->actual_length; | ||
1118 | if (urb->status != 0 || | ||
1119 | len != cmd->sgv[cmd->current_sg].length) { | ||
1120 | cmd->act_len += len; | ||
1121 | |||
1122 | cmd->error = -EIO; | ||
1123 | ub_state_stat(sc, cmd); | ||
1124 | return; | ||
1125 | } | ||
1126 | |||
1127 | } else { | ||
1128 | /* | ||
1129 | * If an error occurs on read, we record it, and | ||
1130 | * continue to fetch data in order to avoid bubble. | ||
1131 | * | ||
1132 | * As a small shortcut, we stop if we detect that | ||
1133 | * a CSW mixed into data. | ||
1134 | */ | ||
1135 | if (urb->status != 0) | ||
1136 | cmd->error = -EIO; | ||
1137 | |||
1138 | len = urb->actual_length; | ||
1139 | if (urb->status != 0 || | ||
1140 | len != cmd->sgv[cmd->current_sg].length) { | ||
1141 | if ((len & 0x1FF) == US_BULK_CS_WRAP_LEN) | ||
1142 | goto Bad_End; | ||
1143 | } | ||
1144 | } | ||
1145 | |||
1146 | cmd->act_len += urb->actual_length; | ||
1147 | |||
1148 | if (++cmd->current_sg < cmd->nsg) { | ||
1149 | ub_data_start(sc, cmd); | ||
1150 | return; | ||
1151 | } | ||
1152 | ub_state_stat(sc, cmd); | ||
1153 | |||
1154 | } else if (cmd->state == UB_CMDST_STAT) { | ||
1155 | if (urb->status == -EPIPE) { | ||
1156 | rc = ub_submit_clear_stall(sc, cmd, sc->last_pipe); | ||
1157 | if (rc != 0) { | ||
1158 | printk(KERN_NOTICE "%s: " | ||
1159 | "unable to submit clear (%d)\n", | ||
1160 | sc->name, rc); | ||
1161 | ub_state_done(sc, cmd, rc); | ||
1162 | return; | ||
1163 | } | ||
1164 | |||
1165 | /* | ||
1166 | * Having a stall when getting CSW is an error, so | ||
1167 | * make sure uppper levels are not oblivious to it. | ||
1168 | */ | ||
1169 | cmd->error = -EIO; /* A cheap trick... */ | ||
1170 | |||
1171 | cmd->state = UB_CMDST_CLRRS; | ||
1172 | return; | ||
1173 | } | ||
1174 | |||
1175 | /* Catch everything, including -EOVERFLOW and other nasties. */ | ||
1176 | if (urb->status != 0) | ||
1177 | goto Bad_End; | ||
1178 | |||
1179 | if (urb->actual_length == 0) { | ||
1180 | ub_state_stat_counted(sc, cmd); | ||
1181 | return; | ||
1182 | } | ||
1183 | |||
1184 | /* | ||
1185 | * Check the returned Bulk protocol status. | ||
1186 | * The status block has to be validated first. | ||
1187 | */ | ||
1188 | |||
1189 | bcs = &sc->work_bcs; | ||
1190 | |||
1191 | if (sc->signature == cpu_to_le32(0)) { | ||
1192 | /* | ||
1193 | * This is the first reply, so do not perform the check. | ||
1194 | * Instead, remember the signature the device uses | ||
1195 | * for future checks. But do not allow a nul. | ||
1196 | */ | ||
1197 | sc->signature = bcs->Signature; | ||
1198 | if (sc->signature == cpu_to_le32(0)) { | ||
1199 | ub_state_stat_counted(sc, cmd); | ||
1200 | return; | ||
1201 | } | ||
1202 | } else { | ||
1203 | if (bcs->Signature != sc->signature) { | ||
1204 | ub_state_stat_counted(sc, cmd); | ||
1205 | return; | ||
1206 | } | ||
1207 | } | ||
1208 | |||
1209 | if (bcs->Tag != cmd->tag) { | ||
1210 | /* | ||
1211 | * This usually happens when we disagree with the | ||
1212 | * device's microcode about something. For instance, | ||
1213 | * a few of them throw this after timeouts. They buffer | ||
1214 | * commands and reply at commands we timed out before. | ||
1215 | * Without flushing these replies we loop forever. | ||
1216 | */ | ||
1217 | ub_state_stat_counted(sc, cmd); | ||
1218 | return; | ||
1219 | } | ||
1220 | |||
1221 | if (!sc->bad_resid) { | ||
1222 | len = le32_to_cpu(bcs->Residue); | ||
1223 | if (len != cmd->len - cmd->act_len) { | ||
1224 | /* | ||
1225 | * Only start ignoring if this cmd ended well. | ||
1226 | */ | ||
1227 | if (cmd->len == cmd->act_len) { | ||
1228 | printk(KERN_NOTICE "%s: " | ||
1229 | "bad residual %d of %d, ignoring\n", | ||
1230 | sc->name, len, cmd->len); | ||
1231 | sc->bad_resid = 1; | ||
1232 | } | ||
1233 | } | ||
1234 | } | ||
1235 | |||
1236 | switch (bcs->Status) { | ||
1237 | case US_BULK_STAT_OK: | ||
1238 | break; | ||
1239 | case US_BULK_STAT_FAIL: | ||
1240 | ub_state_sense(sc, cmd); | ||
1241 | return; | ||
1242 | case US_BULK_STAT_PHASE: | ||
1243 | goto Bad_End; | ||
1244 | default: | ||
1245 | printk(KERN_INFO "%s: unknown CSW status 0x%x\n", | ||
1246 | sc->name, bcs->Status); | ||
1247 | ub_state_done(sc, cmd, -EINVAL); | ||
1248 | return; | ||
1249 | } | ||
1250 | |||
1251 | /* Not zeroing error to preserve a babble indicator */ | ||
1252 | if (cmd->error != 0) { | ||
1253 | ub_state_sense(sc, cmd); | ||
1254 | return; | ||
1255 | } | ||
1256 | cmd->state = UB_CMDST_DONE; | ||
1257 | ub_cmdq_pop(sc); | ||
1258 | (*cmd->done)(sc, cmd); | ||
1259 | |||
1260 | } else if (cmd->state == UB_CMDST_SENSE) { | ||
1261 | ub_state_done(sc, cmd, -EIO); | ||
1262 | |||
1263 | } else { | ||
1264 | printk(KERN_WARNING "%s: wrong command state %d\n", | ||
1265 | sc->name, cmd->state); | ||
1266 | ub_state_done(sc, cmd, -EINVAL); | ||
1267 | return; | ||
1268 | } | ||
1269 | return; | ||
1270 | |||
1271 | Bad_End: /* Little Excel is dead */ | ||
1272 | ub_state_done(sc, cmd, -EIO); | ||
1273 | } | ||
1274 | |||
1275 | /* | ||
1276 | * Factorization helper for the command state machine: | ||
1277 | * Initiate a data segment transfer. | ||
1278 | */ | ||
1279 | static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
1280 | { | ||
1281 | struct scatterlist *sg = &cmd->sgv[cmd->current_sg]; | ||
1282 | int pipe; | ||
1283 | int rc; | ||
1284 | |||
1285 | UB_INIT_COMPLETION(sc->work_done); | ||
1286 | |||
1287 | if (cmd->dir == UB_DIR_READ) | ||
1288 | pipe = sc->recv_bulk_pipe; | ||
1289 | else | ||
1290 | pipe = sc->send_bulk_pipe; | ||
1291 | sc->last_pipe = pipe; | ||
1292 | usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, sg_virt(sg), | ||
1293 | sg->length, ub_urb_complete, sc); | ||
1294 | |||
1295 | if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { | ||
1296 | /* XXX Clear stalls */ | ||
1297 | ub_complete(&sc->work_done); | ||
1298 | ub_state_done(sc, cmd, rc); | ||
1299 | return; | ||
1300 | } | ||
1301 | |||
1302 | if (cmd->timeo) | ||
1303 | sc->work_timer.expires = jiffies + cmd->timeo; | ||
1304 | else | ||
1305 | sc->work_timer.expires = jiffies + UB_DATA_TIMEOUT; | ||
1306 | add_timer(&sc->work_timer); | ||
1307 | |||
1308 | cmd->state = UB_CMDST_DATA; | ||
1309 | } | ||
1310 | |||
1311 | /* | ||
1312 | * Factorization helper for the command state machine: | ||
1313 | * Finish the command. | ||
1314 | */ | ||
1315 | static void ub_state_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd, int rc) | ||
1316 | { | ||
1317 | |||
1318 | cmd->error = rc; | ||
1319 | cmd->state = UB_CMDST_DONE; | ||
1320 | ub_cmdq_pop(sc); | ||
1321 | (*cmd->done)(sc, cmd); | ||
1322 | } | ||
1323 | |||
1324 | /* | ||
1325 | * Factorization helper for the command state machine: | ||
1326 | * Submit a CSW read. | ||
1327 | */ | ||
1328 | static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
1329 | { | ||
1330 | int rc; | ||
1331 | |||
1332 | UB_INIT_COMPLETION(sc->work_done); | ||
1333 | |||
1334 | sc->last_pipe = sc->recv_bulk_pipe; | ||
1335 | usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->recv_bulk_pipe, | ||
1336 | &sc->work_bcs, US_BULK_CS_WRAP_LEN, ub_urb_complete, sc); | ||
1337 | |||
1338 | if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { | ||
1339 | /* XXX Clear stalls */ | ||
1340 | ub_complete(&sc->work_done); | ||
1341 | ub_state_done(sc, cmd, rc); | ||
1342 | return -1; | ||
1343 | } | ||
1344 | |||
1345 | if (cmd->timeo) | ||
1346 | sc->work_timer.expires = jiffies + cmd->timeo; | ||
1347 | else | ||
1348 | sc->work_timer.expires = jiffies + UB_STAT_TIMEOUT; | ||
1349 | add_timer(&sc->work_timer); | ||
1350 | return 0; | ||
1351 | } | ||
1352 | |||
1353 | /* | ||
1354 | * Factorization helper for the command state machine: | ||
1355 | * Submit a CSW read and go to STAT state. | ||
1356 | */ | ||
1357 | static void ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
1358 | { | ||
1359 | |||
1360 | if (__ub_state_stat(sc, cmd) != 0) | ||
1361 | return; | ||
1362 | |||
1363 | cmd->stat_count = 0; | ||
1364 | cmd->state = UB_CMDST_STAT; | ||
1365 | } | ||
1366 | |||
1367 | /* | ||
1368 | * Factorization helper for the command state machine: | ||
1369 | * Submit a CSW read and go to STAT state with counter (along [C] path). | ||
1370 | */ | ||
1371 | static void ub_state_stat_counted(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
1372 | { | ||
1373 | |||
1374 | if (++cmd->stat_count >= 4) { | ||
1375 | ub_state_sense(sc, cmd); | ||
1376 | return; | ||
1377 | } | ||
1378 | |||
1379 | if (__ub_state_stat(sc, cmd) != 0) | ||
1380 | return; | ||
1381 | |||
1382 | cmd->state = UB_CMDST_STAT; | ||
1383 | } | ||
1384 | |||
1385 | /* | ||
1386 | * Factorization helper for the command state machine: | ||
1387 | * Submit a REQUEST SENSE and go to SENSE state. | ||
1388 | */ | ||
1389 | static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
1390 | { | ||
1391 | struct ub_scsi_cmd *scmd; | ||
1392 | struct scatterlist *sg; | ||
1393 | int rc; | ||
1394 | |||
1395 | if (cmd->cdb[0] == REQUEST_SENSE) { | ||
1396 | rc = -EPIPE; | ||
1397 | goto error; | ||
1398 | } | ||
1399 | |||
1400 | scmd = &sc->top_rqs_cmd; | ||
1401 | memset(scmd, 0, sizeof(struct ub_scsi_cmd)); | ||
1402 | scmd->cdb[0] = REQUEST_SENSE; | ||
1403 | scmd->cdb[4] = UB_SENSE_SIZE; | ||
1404 | scmd->cdb_len = 6; | ||
1405 | scmd->dir = UB_DIR_READ; | ||
1406 | scmd->state = UB_CMDST_INIT; | ||
1407 | scmd->nsg = 1; | ||
1408 | sg = &scmd->sgv[0]; | ||
1409 | sg_init_table(sg, UB_MAX_REQ_SG); | ||
1410 | sg_set_page(sg, virt_to_page(sc->top_sense), UB_SENSE_SIZE, | ||
1411 | (unsigned long)sc->top_sense & (PAGE_SIZE-1)); | ||
1412 | scmd->len = UB_SENSE_SIZE; | ||
1413 | scmd->lun = cmd->lun; | ||
1414 | scmd->done = ub_top_sense_done; | ||
1415 | scmd->back = cmd; | ||
1416 | |||
1417 | scmd->tag = sc->tagcnt++; | ||
1418 | |||
1419 | cmd->state = UB_CMDST_SENSE; | ||
1420 | |||
1421 | ub_cmdq_insert(sc, scmd); | ||
1422 | return; | ||
1423 | |||
1424 | error: | ||
1425 | ub_state_done(sc, cmd, rc); | ||
1426 | } | ||
1427 | |||
1428 | /* | ||
1429 | * A helper for the command's state machine: | ||
1430 | * Submit a stall clear. | ||
1431 | */ | ||
1432 | static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd, | ||
1433 | int stalled_pipe) | ||
1434 | { | ||
1435 | int endp; | ||
1436 | struct usb_ctrlrequest *cr; | ||
1437 | int rc; | ||
1438 | |||
1439 | endp = usb_pipeendpoint(stalled_pipe); | ||
1440 | if (usb_pipein (stalled_pipe)) | ||
1441 | endp |= USB_DIR_IN; | ||
1442 | |||
1443 | cr = &sc->work_cr; | ||
1444 | cr->bRequestType = USB_RECIP_ENDPOINT; | ||
1445 | cr->bRequest = USB_REQ_CLEAR_FEATURE; | ||
1446 | cr->wValue = cpu_to_le16(USB_ENDPOINT_HALT); | ||
1447 | cr->wIndex = cpu_to_le16(endp); | ||
1448 | cr->wLength = cpu_to_le16(0); | ||
1449 | |||
1450 | UB_INIT_COMPLETION(sc->work_done); | ||
1451 | |||
1452 | usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe, | ||
1453 | (unsigned char*) cr, NULL, 0, ub_urb_complete, sc); | ||
1454 | |||
1455 | if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { | ||
1456 | ub_complete(&sc->work_done); | ||
1457 | return rc; | ||
1458 | } | ||
1459 | |||
1460 | sc->work_timer.expires = jiffies + UB_CTRL_TIMEOUT; | ||
1461 | add_timer(&sc->work_timer); | ||
1462 | return 0; | ||
1463 | } | ||
1464 | |||
1465 | /* | ||
1466 | */ | ||
1467 | static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd) | ||
1468 | { | ||
1469 | unsigned char *sense = sc->top_sense; | ||
1470 | struct ub_scsi_cmd *cmd; | ||
1471 | |||
1472 | /* | ||
1473 | * Find the command which triggered the unit attention or a check, | ||
1474 | * save the sense into it, and advance its state machine. | ||
1475 | */ | ||
1476 | if ((cmd = ub_cmdq_peek(sc)) == NULL) { | ||
1477 | printk(KERN_WARNING "%s: sense done while idle\n", sc->name); | ||
1478 | return; | ||
1479 | } | ||
1480 | if (cmd != scmd->back) { | ||
1481 | printk(KERN_WARNING "%s: " | ||
1482 | "sense done for wrong command 0x%x\n", | ||
1483 | sc->name, cmd->tag); | ||
1484 | return; | ||
1485 | } | ||
1486 | if (cmd->state != UB_CMDST_SENSE) { | ||
1487 | printk(KERN_WARNING "%s: sense done with bad cmd state %d\n", | ||
1488 | sc->name, cmd->state); | ||
1489 | return; | ||
1490 | } | ||
1491 | |||
1492 | /* | ||
1493 | * Ignoring scmd->act_len, because the buffer was pre-zeroed. | ||
1494 | */ | ||
1495 | cmd->key = sense[2] & 0x0F; | ||
1496 | cmd->asc = sense[12]; | ||
1497 | cmd->ascq = sense[13]; | ||
1498 | |||
1499 | ub_scsi_urb_compl(sc, cmd); | ||
1500 | } | ||
1501 | |||
1502 | /* | ||
1503 | * Reset management | ||
1504 | */ | ||
1505 | |||
1506 | static void ub_reset_enter(struct ub_dev *sc, int try) | ||
1507 | { | ||
1508 | |||
1509 | if (sc->reset) { | ||
1510 | /* This happens often on multi-LUN devices. */ | ||
1511 | return; | ||
1512 | } | ||
1513 | sc->reset = try + 1; | ||
1514 | |||
1515 | #if 0 /* Not needed because the disconnect waits for us. */ | ||
1516 | unsigned long flags; | ||
1517 | spin_lock_irqsave(&ub_lock, flags); | ||
1518 | sc->openc++; | ||
1519 | spin_unlock_irqrestore(&ub_lock, flags); | ||
1520 | #endif | ||
1521 | |||
1522 | #if 0 /* We let them stop themselves. */ | ||
1523 | struct ub_lun *lun; | ||
1524 | list_for_each_entry(lun, &sc->luns, link) { | ||
1525 | blk_stop_queue(lun->disk->queue); | ||
1526 | } | ||
1527 | #endif | ||
1528 | |||
1529 | schedule_work(&sc->reset_work); | ||
1530 | } | ||
1531 | |||
1532 | static void ub_reset_task(struct work_struct *work) | ||
1533 | { | ||
1534 | struct ub_dev *sc = container_of(work, struct ub_dev, reset_work); | ||
1535 | unsigned long flags; | ||
1536 | struct ub_lun *lun; | ||
1537 | int rc; | ||
1538 | |||
1539 | if (!sc->reset) { | ||
1540 | printk(KERN_WARNING "%s: Running reset unrequested\n", | ||
1541 | sc->name); | ||
1542 | return; | ||
1543 | } | ||
1544 | |||
1545 | if (atomic_read(&sc->poison)) { | ||
1546 | ; | ||
1547 | } else if ((sc->reset & 1) == 0) { | ||
1548 | ub_sync_reset(sc); | ||
1549 | msleep(700); /* usb-storage sleeps 6s (!) */ | ||
1550 | ub_probe_clear_stall(sc, sc->recv_bulk_pipe); | ||
1551 | ub_probe_clear_stall(sc, sc->send_bulk_pipe); | ||
1552 | } else if (sc->dev->actconfig->desc.bNumInterfaces != 1) { | ||
1553 | ; | ||
1554 | } else { | ||
1555 | rc = usb_lock_device_for_reset(sc->dev, sc->intf); | ||
1556 | if (rc < 0) { | ||
1557 | printk(KERN_NOTICE | ||
1558 | "%s: usb_lock_device_for_reset failed (%d)\n", | ||
1559 | sc->name, rc); | ||
1560 | } else { | ||
1561 | rc = usb_reset_device(sc->dev); | ||
1562 | if (rc < 0) { | ||
1563 | printk(KERN_NOTICE "%s: " | ||
1564 | "usb_lock_device_for_reset failed (%d)\n", | ||
1565 | sc->name, rc); | ||
1566 | } | ||
1567 | usb_unlock_device(sc->dev); | ||
1568 | } | ||
1569 | } | ||
1570 | |||
1571 | /* | ||
1572 | * In theory, no commands can be running while reset is active, | ||
1573 | * so nobody can ask for another reset, and so we do not need any | ||
1574 | * queues of resets or anything. We do need a spinlock though, | ||
1575 | * to interact with block layer. | ||
1576 | */ | ||
1577 | spin_lock_irqsave(sc->lock, flags); | ||
1578 | sc->reset = 0; | ||
1579 | tasklet_schedule(&sc->tasklet); | ||
1580 | list_for_each_entry(lun, &sc->luns, link) { | ||
1581 | blk_start_queue(lun->disk->queue); | ||
1582 | } | ||
1583 | wake_up(&sc->reset_wait); | ||
1584 | spin_unlock_irqrestore(sc->lock, flags); | ||
1585 | } | ||
1586 | |||
1587 | /* | ||
1588 | * XXX Reset brackets are too much hassle to implement, so just stub them | ||
1589 | * in order to prevent forced unbinding (which deadlocks solid when our | ||
1590 | * ->disconnect method waits for the reset to complete and this kills keventd). | ||
1591 | * | ||
1592 | * XXX Tell Alan to move usb_unlock_device inside of usb_reset_device, | ||
1593 | * or else the post_reset is invoked, and restats I/O on a locked device. | ||
1594 | */ | ||
1595 | static int ub_pre_reset(struct usb_interface *iface) { | ||
1596 | return 0; | ||
1597 | } | ||
1598 | |||
1599 | static int ub_post_reset(struct usb_interface *iface) { | ||
1600 | return 0; | ||
1601 | } | ||
1602 | |||
1603 | /* | ||
1604 | * This is called from a process context. | ||
1605 | */ | ||
1606 | static void ub_revalidate(struct ub_dev *sc, struct ub_lun *lun) | ||
1607 | { | ||
1608 | |||
1609 | lun->readonly = 0; /* XXX Query this from the device */ | ||
1610 | |||
1611 | lun->capacity.nsec = 0; | ||
1612 | lun->capacity.bsize = 512; | ||
1613 | lun->capacity.bshift = 0; | ||
1614 | |||
1615 | if (ub_sync_tur(sc, lun) != 0) | ||
1616 | return; /* Not ready */ | ||
1617 | lun->changed = 0; | ||
1618 | |||
1619 | if (ub_sync_read_cap(sc, lun, &lun->capacity) != 0) { | ||
1620 | /* | ||
1621 | * The retry here means something is wrong, either with the | ||
1622 | * device, with the transport, or with our code. | ||
1623 | * We keep this because sd.c has retries for capacity. | ||
1624 | */ | ||
1625 | if (ub_sync_read_cap(sc, lun, &lun->capacity) != 0) { | ||
1626 | lun->capacity.nsec = 0; | ||
1627 | lun->capacity.bsize = 512; | ||
1628 | lun->capacity.bshift = 0; | ||
1629 | } | ||
1630 | } | ||
1631 | } | ||
1632 | |||
1633 | /* | ||
1634 | * The open funcion. | ||
1635 | * This is mostly needed to keep refcounting, but also to support | ||
1636 | * media checks on removable media drives. | ||
1637 | */ | ||
1638 | static int ub_bd_open(struct block_device *bdev, fmode_t mode) | ||
1639 | { | ||
1640 | struct ub_lun *lun = bdev->bd_disk->private_data; | ||
1641 | struct ub_dev *sc = lun->udev; | ||
1642 | unsigned long flags; | ||
1643 | int rc; | ||
1644 | |||
1645 | spin_lock_irqsave(&ub_lock, flags); | ||
1646 | if (atomic_read(&sc->poison)) { | ||
1647 | spin_unlock_irqrestore(&ub_lock, flags); | ||
1648 | return -ENXIO; | ||
1649 | } | ||
1650 | sc->openc++; | ||
1651 | spin_unlock_irqrestore(&ub_lock, flags); | ||
1652 | |||
1653 | if (lun->removable || lun->readonly) | ||
1654 | check_disk_change(bdev); | ||
1655 | |||
1656 | /* | ||
1657 | * The sd.c considers ->media_present and ->changed not equivalent, | ||
1658 | * under some pretty murky conditions (a failure of READ CAPACITY). | ||
1659 | * We may need it one day. | ||
1660 | */ | ||
1661 | if (lun->removable && lun->changed && !(mode & FMODE_NDELAY)) { | ||
1662 | rc = -ENOMEDIUM; | ||
1663 | goto err_open; | ||
1664 | } | ||
1665 | |||
1666 | if (lun->readonly && (mode & FMODE_WRITE)) { | ||
1667 | rc = -EROFS; | ||
1668 | goto err_open; | ||
1669 | } | ||
1670 | |||
1671 | return 0; | ||
1672 | |||
1673 | err_open: | ||
1674 | ub_put(sc); | ||
1675 | return rc; | ||
1676 | } | ||
1677 | |||
1678 | static int ub_bd_unlocked_open(struct block_device *bdev, fmode_t mode) | ||
1679 | { | ||
1680 | int ret; | ||
1681 | |||
1682 | mutex_lock(&ub_mutex); | ||
1683 | ret = ub_bd_open(bdev, mode); | ||
1684 | mutex_unlock(&ub_mutex); | ||
1685 | |||
1686 | return ret; | ||
1687 | } | ||
1688 | |||
1689 | |||
1690 | /* | ||
1691 | */ | ||
1692 | static int ub_bd_release(struct gendisk *disk, fmode_t mode) | ||
1693 | { | ||
1694 | struct ub_lun *lun = disk->private_data; | ||
1695 | struct ub_dev *sc = lun->udev; | ||
1696 | |||
1697 | mutex_lock(&ub_mutex); | ||
1698 | ub_put(sc); | ||
1699 | mutex_unlock(&ub_mutex); | ||
1700 | |||
1701 | return 0; | ||
1702 | } | ||
1703 | |||
1704 | /* | ||
1705 | * The ioctl interface. | ||
1706 | */ | ||
1707 | static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode, | ||
1708 | unsigned int cmd, unsigned long arg) | ||
1709 | { | ||
1710 | void __user *usermem = (void __user *) arg; | ||
1711 | int ret; | ||
1712 | |||
1713 | mutex_lock(&ub_mutex); | ||
1714 | ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, usermem); | ||
1715 | mutex_unlock(&ub_mutex); | ||
1716 | |||
1717 | return ret; | ||
1718 | } | ||
1719 | |||
1720 | /* | ||
1721 | * This is called by check_disk_change if we reported a media change. | ||
1722 | * The main onjective here is to discover the features of the media such as | ||
1723 | * the capacity, read-only status, etc. USB storage generally does not | ||
1724 | * need to be spun up, but if we needed it, this would be the place. | ||
1725 | * | ||
1726 | * This call can sleep. | ||
1727 | * | ||
1728 | * The return code is not used. | ||
1729 | */ | ||
1730 | static int ub_bd_revalidate(struct gendisk *disk) | ||
1731 | { | ||
1732 | struct ub_lun *lun = disk->private_data; | ||
1733 | |||
1734 | ub_revalidate(lun->udev, lun); | ||
1735 | |||
1736 | /* XXX Support sector size switching like in sr.c */ | ||
1737 | blk_queue_logical_block_size(disk->queue, lun->capacity.bsize); | ||
1738 | set_capacity(disk, lun->capacity.nsec); | ||
1739 | // set_disk_ro(sdkp->disk, lun->readonly); | ||
1740 | |||
1741 | return 0; | ||
1742 | } | ||
1743 | |||
1744 | /* | ||
1745 | * The check is called by the block layer to verify if the media | ||
1746 | * is still available. It is supposed to be harmless, lightweight and | ||
1747 | * non-intrusive in case the media was not changed. | ||
1748 | * | ||
1749 | * This call can sleep. | ||
1750 | * | ||
1751 | * The return code is bool! | ||
1752 | */ | ||
1753 | static unsigned int ub_bd_check_events(struct gendisk *disk, | ||
1754 | unsigned int clearing) | ||
1755 | { | ||
1756 | struct ub_lun *lun = disk->private_data; | ||
1757 | |||
1758 | if (!lun->removable) | ||
1759 | return 0; | ||
1760 | |||
1761 | /* | ||
1762 | * We clean checks always after every command, so this is not | ||
1763 | * as dangerous as it looks. If the TEST_UNIT_READY fails here, | ||
1764 | * the device is actually not ready with operator or software | ||
1765 | * intervention required. One dangerous item might be a drive which | ||
1766 | * spins itself down, and come the time to write dirty pages, this | ||
1767 | * will fail, then block layer discards the data. Since we never | ||
1768 | * spin drives up, such devices simply cannot be used with ub anyway. | ||
1769 | */ | ||
1770 | if (ub_sync_tur(lun->udev, lun) != 0) { | ||
1771 | lun->changed = 1; | ||
1772 | return DISK_EVENT_MEDIA_CHANGE; | ||
1773 | } | ||
1774 | |||
1775 | return lun->changed ? DISK_EVENT_MEDIA_CHANGE : 0; | ||
1776 | } | ||
1777 | |||
1778 | static const struct block_device_operations ub_bd_fops = { | ||
1779 | .owner = THIS_MODULE, | ||
1780 | .open = ub_bd_unlocked_open, | ||
1781 | .release = ub_bd_release, | ||
1782 | .ioctl = ub_bd_ioctl, | ||
1783 | .check_events = ub_bd_check_events, | ||
1784 | .revalidate_disk = ub_bd_revalidate, | ||
1785 | }; | ||
1786 | |||
1787 | /* | ||
1788 | * Common ->done routine for commands executed synchronously. | ||
1789 | */ | ||
1790 | static void ub_probe_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd) | ||
1791 | { | ||
1792 | struct completion *cop = cmd->back; | ||
1793 | complete(cop); | ||
1794 | } | ||
1795 | |||
1796 | /* | ||
1797 | * Test if the device has a check condition on it, synchronously. | ||
1798 | */ | ||
1799 | static int ub_sync_tur(struct ub_dev *sc, struct ub_lun *lun) | ||
1800 | { | ||
1801 | struct ub_scsi_cmd *cmd; | ||
1802 | enum { ALLOC_SIZE = sizeof(struct ub_scsi_cmd) }; | ||
1803 | unsigned long flags; | ||
1804 | struct completion compl; | ||
1805 | int rc; | ||
1806 | |||
1807 | init_completion(&compl); | ||
1808 | |||
1809 | rc = -ENOMEM; | ||
1810 | if ((cmd = kzalloc(ALLOC_SIZE, GFP_KERNEL)) == NULL) | ||
1811 | goto err_alloc; | ||
1812 | |||
1813 | cmd->cdb[0] = TEST_UNIT_READY; | ||
1814 | cmd->cdb_len = 6; | ||
1815 | cmd->dir = UB_DIR_NONE; | ||
1816 | cmd->state = UB_CMDST_INIT; | ||
1817 | cmd->lun = lun; /* This may be NULL, but that's ok */ | ||
1818 | cmd->done = ub_probe_done; | ||
1819 | cmd->back = &compl; | ||
1820 | |||
1821 | spin_lock_irqsave(sc->lock, flags); | ||
1822 | cmd->tag = sc->tagcnt++; | ||
1823 | |||
1824 | rc = ub_submit_scsi(sc, cmd); | ||
1825 | spin_unlock_irqrestore(sc->lock, flags); | ||
1826 | |||
1827 | if (rc != 0) | ||
1828 | goto err_submit; | ||
1829 | |||
1830 | wait_for_completion(&compl); | ||
1831 | |||
1832 | rc = cmd->error; | ||
1833 | |||
1834 | if (rc == -EIO && cmd->key != 0) /* Retries for benh's key */ | ||
1835 | rc = cmd->key; | ||
1836 | |||
1837 | err_submit: | ||
1838 | kfree(cmd); | ||
1839 | err_alloc: | ||
1840 | return rc; | ||
1841 | } | ||
1842 | |||
1843 | /* | ||
1844 | * Read the SCSI capacity synchronously (for probing). | ||
1845 | */ | ||
1846 | static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun, | ||
1847 | struct ub_capacity *ret) | ||
1848 | { | ||
1849 | struct ub_scsi_cmd *cmd; | ||
1850 | struct scatterlist *sg; | ||
1851 | char *p; | ||
1852 | enum { ALLOC_SIZE = sizeof(struct ub_scsi_cmd) + 8 }; | ||
1853 | unsigned long flags; | ||
1854 | unsigned int bsize, shift; | ||
1855 | unsigned long nsec; | ||
1856 | struct completion compl; | ||
1857 | int rc; | ||
1858 | |||
1859 | init_completion(&compl); | ||
1860 | |||
1861 | rc = -ENOMEM; | ||
1862 | if ((cmd = kzalloc(ALLOC_SIZE, GFP_KERNEL)) == NULL) | ||
1863 | goto err_alloc; | ||
1864 | p = (char *)cmd + sizeof(struct ub_scsi_cmd); | ||
1865 | |||
1866 | cmd->cdb[0] = 0x25; | ||
1867 | cmd->cdb_len = 10; | ||
1868 | cmd->dir = UB_DIR_READ; | ||
1869 | cmd->state = UB_CMDST_INIT; | ||
1870 | cmd->nsg = 1; | ||
1871 | sg = &cmd->sgv[0]; | ||
1872 | sg_init_table(sg, UB_MAX_REQ_SG); | ||
1873 | sg_set_page(sg, virt_to_page(p), 8, (unsigned long)p & (PAGE_SIZE-1)); | ||
1874 | cmd->len = 8; | ||
1875 | cmd->lun = lun; | ||
1876 | cmd->done = ub_probe_done; | ||
1877 | cmd->back = &compl; | ||
1878 | |||
1879 | spin_lock_irqsave(sc->lock, flags); | ||
1880 | cmd->tag = sc->tagcnt++; | ||
1881 | |||
1882 | rc = ub_submit_scsi(sc, cmd); | ||
1883 | spin_unlock_irqrestore(sc->lock, flags); | ||
1884 | |||
1885 | if (rc != 0) | ||
1886 | goto err_submit; | ||
1887 | |||
1888 | wait_for_completion(&compl); | ||
1889 | |||
1890 | if (cmd->error != 0) { | ||
1891 | rc = -EIO; | ||
1892 | goto err_read; | ||
1893 | } | ||
1894 | if (cmd->act_len != 8) { | ||
1895 | rc = -EIO; | ||
1896 | goto err_read; | ||
1897 | } | ||
1898 | |||
1899 | /* sd.c special-cases sector size of 0 to mean 512. Needed? Safe? */ | ||
1900 | nsec = be32_to_cpu(*(__be32 *)p) + 1; | ||
1901 | bsize = be32_to_cpu(*(__be32 *)(p + 4)); | ||
1902 | switch (bsize) { | ||
1903 | case 512: shift = 0; break; | ||
1904 | case 1024: shift = 1; break; | ||
1905 | case 2048: shift = 2; break; | ||
1906 | case 4096: shift = 3; break; | ||
1907 | default: | ||
1908 | rc = -EDOM; | ||
1909 | goto err_inv_bsize; | ||
1910 | } | ||
1911 | |||
1912 | ret->bsize = bsize; | ||
1913 | ret->bshift = shift; | ||
1914 | ret->nsec = nsec << shift; | ||
1915 | rc = 0; | ||
1916 | |||
1917 | err_inv_bsize: | ||
1918 | err_read: | ||
1919 | err_submit: | ||
1920 | kfree(cmd); | ||
1921 | err_alloc: | ||
1922 | return rc; | ||
1923 | } | ||
1924 | |||
1925 | /* | ||
1926 | */ | ||
1927 | static void ub_probe_urb_complete(struct urb *urb) | ||
1928 | { | ||
1929 | struct completion *cop = urb->context; | ||
1930 | complete(cop); | ||
1931 | } | ||
1932 | |||
1933 | static void ub_probe_timeout(unsigned long arg) | ||
1934 | { | ||
1935 | struct completion *cop = (struct completion *) arg; | ||
1936 | complete(cop); | ||
1937 | } | ||
1938 | |||
1939 | /* | ||
1940 | * Reset with a Bulk reset. | ||
1941 | */ | ||
1942 | static int ub_sync_reset(struct ub_dev *sc) | ||
1943 | { | ||
1944 | int ifnum = sc->intf->cur_altsetting->desc.bInterfaceNumber; | ||
1945 | struct usb_ctrlrequest *cr; | ||
1946 | struct completion compl; | ||
1947 | struct timer_list timer; | ||
1948 | int rc; | ||
1949 | |||
1950 | init_completion(&compl); | ||
1951 | |||
1952 | cr = &sc->work_cr; | ||
1953 | cr->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE; | ||
1954 | cr->bRequest = US_BULK_RESET_REQUEST; | ||
1955 | cr->wValue = cpu_to_le16(0); | ||
1956 | cr->wIndex = cpu_to_le16(ifnum); | ||
1957 | cr->wLength = cpu_to_le16(0); | ||
1958 | |||
1959 | usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe, | ||
1960 | (unsigned char*) cr, NULL, 0, ub_probe_urb_complete, &compl); | ||
1961 | |||
1962 | if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) { | ||
1963 | printk(KERN_WARNING | ||
1964 | "%s: Unable to submit a bulk reset (%d)\n", sc->name, rc); | ||
1965 | return rc; | ||
1966 | } | ||
1967 | |||
1968 | init_timer(&timer); | ||
1969 | timer.function = ub_probe_timeout; | ||
1970 | timer.data = (unsigned long) &compl; | ||
1971 | timer.expires = jiffies + UB_CTRL_TIMEOUT; | ||
1972 | add_timer(&timer); | ||
1973 | |||
1974 | wait_for_completion(&compl); | ||
1975 | |||
1976 | del_timer_sync(&timer); | ||
1977 | usb_kill_urb(&sc->work_urb); | ||
1978 | |||
1979 | return sc->work_urb.status; | ||
1980 | } | ||
1981 | |||
1982 | /* | ||
1983 | * Get number of LUNs by the way of Bulk GetMaxLUN command. | ||
1984 | */ | ||
1985 | static int ub_sync_getmaxlun(struct ub_dev *sc) | ||
1986 | { | ||
1987 | int ifnum = sc->intf->cur_altsetting->desc.bInterfaceNumber; | ||
1988 | unsigned char *p; | ||
1989 | enum { ALLOC_SIZE = 1 }; | ||
1990 | struct usb_ctrlrequest *cr; | ||
1991 | struct completion compl; | ||
1992 | struct timer_list timer; | ||
1993 | int nluns; | ||
1994 | int rc; | ||
1995 | |||
1996 | init_completion(&compl); | ||
1997 | |||
1998 | rc = -ENOMEM; | ||
1999 | if ((p = kmalloc(ALLOC_SIZE, GFP_KERNEL)) == NULL) | ||
2000 | goto err_alloc; | ||
2001 | *p = 55; | ||
2002 | |||
2003 | cr = &sc->work_cr; | ||
2004 | cr->bRequestType = USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE; | ||
2005 | cr->bRequest = US_BULK_GET_MAX_LUN; | ||
2006 | cr->wValue = cpu_to_le16(0); | ||
2007 | cr->wIndex = cpu_to_le16(ifnum); | ||
2008 | cr->wLength = cpu_to_le16(1); | ||
2009 | |||
2010 | usb_fill_control_urb(&sc->work_urb, sc->dev, sc->recv_ctrl_pipe, | ||
2011 | (unsigned char*) cr, p, 1, ub_probe_urb_complete, &compl); | ||
2012 | |||
2013 | if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) | ||
2014 | goto err_submit; | ||
2015 | |||
2016 | init_timer(&timer); | ||
2017 | timer.function = ub_probe_timeout; | ||
2018 | timer.data = (unsigned long) &compl; | ||
2019 | timer.expires = jiffies + UB_CTRL_TIMEOUT; | ||
2020 | add_timer(&timer); | ||
2021 | |||
2022 | wait_for_completion(&compl); | ||
2023 | |||
2024 | del_timer_sync(&timer); | ||
2025 | usb_kill_urb(&sc->work_urb); | ||
2026 | |||
2027 | if ((rc = sc->work_urb.status) < 0) | ||
2028 | goto err_io; | ||
2029 | |||
2030 | if (sc->work_urb.actual_length != 1) { | ||
2031 | nluns = 0; | ||
2032 | } else { | ||
2033 | if ((nluns = *p) == 55) { | ||
2034 | nluns = 0; | ||
2035 | } else { | ||
2036 | /* GetMaxLUN returns the maximum LUN number */ | ||
2037 | nluns += 1; | ||
2038 | if (nluns > UB_MAX_LUNS) | ||
2039 | nluns = UB_MAX_LUNS; | ||
2040 | } | ||
2041 | } | ||
2042 | |||
2043 | kfree(p); | ||
2044 | return nluns; | ||
2045 | |||
2046 | err_io: | ||
2047 | err_submit: | ||
2048 | kfree(p); | ||
2049 | err_alloc: | ||
2050 | return rc; | ||
2051 | } | ||
2052 | |||
2053 | /* | ||
2054 | * Clear initial stalls. | ||
2055 | */ | ||
2056 | static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe) | ||
2057 | { | ||
2058 | int endp; | ||
2059 | struct usb_ctrlrequest *cr; | ||
2060 | struct completion compl; | ||
2061 | struct timer_list timer; | ||
2062 | int rc; | ||
2063 | |||
2064 | init_completion(&compl); | ||
2065 | |||
2066 | endp = usb_pipeendpoint(stalled_pipe); | ||
2067 | if (usb_pipein (stalled_pipe)) | ||
2068 | endp |= USB_DIR_IN; | ||
2069 | |||
2070 | cr = &sc->work_cr; | ||
2071 | cr->bRequestType = USB_RECIP_ENDPOINT; | ||
2072 | cr->bRequest = USB_REQ_CLEAR_FEATURE; | ||
2073 | cr->wValue = cpu_to_le16(USB_ENDPOINT_HALT); | ||
2074 | cr->wIndex = cpu_to_le16(endp); | ||
2075 | cr->wLength = cpu_to_le16(0); | ||
2076 | |||
2077 | usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe, | ||
2078 | (unsigned char*) cr, NULL, 0, ub_probe_urb_complete, &compl); | ||
2079 | |||
2080 | if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) { | ||
2081 | printk(KERN_WARNING | ||
2082 | "%s: Unable to submit a probe clear (%d)\n", sc->name, rc); | ||
2083 | return rc; | ||
2084 | } | ||
2085 | |||
2086 | init_timer(&timer); | ||
2087 | timer.function = ub_probe_timeout; | ||
2088 | timer.data = (unsigned long) &compl; | ||
2089 | timer.expires = jiffies + UB_CTRL_TIMEOUT; | ||
2090 | add_timer(&timer); | ||
2091 | |||
2092 | wait_for_completion(&compl); | ||
2093 | |||
2094 | del_timer_sync(&timer); | ||
2095 | usb_kill_urb(&sc->work_urb); | ||
2096 | |||
2097 | usb_reset_endpoint(sc->dev, endp); | ||
2098 | |||
2099 | return 0; | ||
2100 | } | ||
2101 | |||
2102 | /* | ||
2103 | * Get the pipe settings. | ||
2104 | */ | ||
2105 | static int ub_get_pipes(struct ub_dev *sc, struct usb_device *dev, | ||
2106 | struct usb_interface *intf) | ||
2107 | { | ||
2108 | struct usb_host_interface *altsetting = intf->cur_altsetting; | ||
2109 | struct usb_endpoint_descriptor *ep_in = NULL; | ||
2110 | struct usb_endpoint_descriptor *ep_out = NULL; | ||
2111 | struct usb_endpoint_descriptor *ep; | ||
2112 | int i; | ||
2113 | |||
2114 | /* | ||
2115 | * Find the endpoints we need. | ||
2116 | * We are expecting a minimum of 2 endpoints - in and out (bulk). | ||
2117 | * We will ignore any others. | ||
2118 | */ | ||
2119 | for (i = 0; i < altsetting->desc.bNumEndpoints; i++) { | ||
2120 | ep = &altsetting->endpoint[i].desc; | ||
2121 | |||
2122 | /* Is it a BULK endpoint? */ | ||
2123 | if (usb_endpoint_xfer_bulk(ep)) { | ||
2124 | /* BULK in or out? */ | ||
2125 | if (usb_endpoint_dir_in(ep)) { | ||
2126 | if (ep_in == NULL) | ||
2127 | ep_in = ep; | ||
2128 | } else { | ||
2129 | if (ep_out == NULL) | ||
2130 | ep_out = ep; | ||
2131 | } | ||
2132 | } | ||
2133 | } | ||
2134 | |||
2135 | if (ep_in == NULL || ep_out == NULL) { | ||
2136 | printk(KERN_NOTICE "%s: failed endpoint check\n", sc->name); | ||
2137 | return -ENODEV; | ||
2138 | } | ||
2139 | |||
2140 | /* Calculate and store the pipe values */ | ||
2141 | sc->send_ctrl_pipe = usb_sndctrlpipe(dev, 0); | ||
2142 | sc->recv_ctrl_pipe = usb_rcvctrlpipe(dev, 0); | ||
2143 | sc->send_bulk_pipe = usb_sndbulkpipe(dev, | ||
2144 | usb_endpoint_num(ep_out)); | ||
2145 | sc->recv_bulk_pipe = usb_rcvbulkpipe(dev, | ||
2146 | usb_endpoint_num(ep_in)); | ||
2147 | |||
2148 | return 0; | ||
2149 | } | ||
2150 | |||
2151 | /* | ||
2152 | * Probing is done in the process context, which allows us to cheat | ||
2153 | * and not to build a state machine for the discovery. | ||
2154 | */ | ||
2155 | static int ub_probe(struct usb_interface *intf, | ||
2156 | const struct usb_device_id *dev_id) | ||
2157 | { | ||
2158 | struct ub_dev *sc; | ||
2159 | int nluns; | ||
2160 | int rc; | ||
2161 | int i; | ||
2162 | |||
2163 | if (usb_usual_check_type(dev_id, USB_US_TYPE_UB)) | ||
2164 | return -ENXIO; | ||
2165 | |||
2166 | rc = -ENOMEM; | ||
2167 | if ((sc = kzalloc(sizeof(struct ub_dev), GFP_KERNEL)) == NULL) | ||
2168 | goto err_core; | ||
2169 | sc->lock = ub_next_lock(); | ||
2170 | INIT_LIST_HEAD(&sc->luns); | ||
2171 | usb_init_urb(&sc->work_urb); | ||
2172 | tasklet_init(&sc->tasklet, ub_scsi_action, (unsigned long)sc); | ||
2173 | atomic_set(&sc->poison, 0); | ||
2174 | INIT_WORK(&sc->reset_work, ub_reset_task); | ||
2175 | init_waitqueue_head(&sc->reset_wait); | ||
2176 | |||
2177 | init_timer(&sc->work_timer); | ||
2178 | sc->work_timer.data = (unsigned long) sc; | ||
2179 | sc->work_timer.function = ub_urb_timeout; | ||
2180 | |||
2181 | ub_init_completion(&sc->work_done); | ||
2182 | sc->work_done.done = 1; /* A little yuk, but oh well... */ | ||
2183 | |||
2184 | sc->dev = interface_to_usbdev(intf); | ||
2185 | sc->intf = intf; | ||
2186 | // sc->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; | ||
2187 | usb_set_intfdata(intf, sc); | ||
2188 | usb_get_dev(sc->dev); | ||
2189 | /* | ||
2190 | * Since we give the interface struct to the block level through | ||
2191 | * disk->driverfs_dev, we have to pin it. Otherwise, block_uevent | ||
2192 | * oopses on close after a disconnect (kernels 2.6.16 and up). | ||
2193 | */ | ||
2194 | usb_get_intf(sc->intf); | ||
2195 | |||
2196 | snprintf(sc->name, 12, DRV_NAME "(%d.%d)", | ||
2197 | sc->dev->bus->busnum, sc->dev->devnum); | ||
2198 | |||
2199 | /* XXX Verify that we can handle the device (from descriptors) */ | ||
2200 | |||
2201 | if (ub_get_pipes(sc, sc->dev, intf) != 0) | ||
2202 | goto err_dev_desc; | ||
2203 | |||
2204 | /* | ||
2205 | * At this point, all USB initialization is done, do upper layer. | ||
2206 | * We really hate halfway initialized structures, so from the | ||
2207 | * invariants perspective, this ub_dev is fully constructed at | ||
2208 | * this point. | ||
2209 | */ | ||
2210 | |||
2211 | /* | ||
2212 | * This is needed to clear toggles. It is a problem only if we do | ||
2213 | * `rmmod ub && modprobe ub` without disconnects, but we like that. | ||
2214 | */ | ||
2215 | #if 0 /* iPod Mini fails if we do this (big white iPod works) */ | ||
2216 | ub_probe_clear_stall(sc, sc->recv_bulk_pipe); | ||
2217 | ub_probe_clear_stall(sc, sc->send_bulk_pipe); | ||
2218 | #endif | ||
2219 | |||
2220 | /* | ||
2221 | * The way this is used by the startup code is a little specific. | ||
2222 | * A SCSI check causes a USB stall. Our common case code sees it | ||
2223 | * and clears the check, after which the device is ready for use. | ||
2224 | * But if a check was not present, any command other than | ||
2225 | * TEST_UNIT_READY ends with a lockup (including REQUEST_SENSE). | ||
2226 | * | ||
2227 | * If we neglect to clear the SCSI check, the first real command fails | ||
2228 | * (which is the capacity readout). We clear that and retry, but why | ||
2229 | * causing spurious retries for no reason. | ||
2230 | * | ||
2231 | * Revalidation may start with its own TEST_UNIT_READY, but that one | ||
2232 | * has to succeed, so we clear checks with an additional one here. | ||
2233 | * In any case it's not our business how revaliadation is implemented. | ||
2234 | */ | ||
2235 | for (i = 0; i < 3; i++) { /* Retries for the schwag key from KS'04 */ | ||
2236 | if ((rc = ub_sync_tur(sc, NULL)) <= 0) break; | ||
2237 | if (rc != 0x6) break; | ||
2238 | msleep(10); | ||
2239 | } | ||
2240 | |||
2241 | nluns = 1; | ||
2242 | for (i = 0; i < 3; i++) { | ||
2243 | if ((rc = ub_sync_getmaxlun(sc)) < 0) | ||
2244 | break; | ||
2245 | if (rc != 0) { | ||
2246 | nluns = rc; | ||
2247 | break; | ||
2248 | } | ||
2249 | msleep(100); | ||
2250 | } | ||
2251 | |||
2252 | for (i = 0; i < nluns; i++) { | ||
2253 | ub_probe_lun(sc, i); | ||
2254 | } | ||
2255 | return 0; | ||
2256 | |||
2257 | err_dev_desc: | ||
2258 | usb_set_intfdata(intf, NULL); | ||
2259 | usb_put_intf(sc->intf); | ||
2260 | usb_put_dev(sc->dev); | ||
2261 | kfree(sc); | ||
2262 | err_core: | ||
2263 | return rc; | ||
2264 | } | ||
2265 | |||
2266 | static int ub_probe_lun(struct ub_dev *sc, int lnum) | ||
2267 | { | ||
2268 | struct ub_lun *lun; | ||
2269 | struct request_queue *q; | ||
2270 | struct gendisk *disk; | ||
2271 | int rc; | ||
2272 | |||
2273 | rc = -ENOMEM; | ||
2274 | if ((lun = kzalloc(sizeof(struct ub_lun), GFP_KERNEL)) == NULL) | ||
2275 | goto err_alloc; | ||
2276 | lun->num = lnum; | ||
2277 | |||
2278 | rc = -ENOSR; | ||
2279 | if ((lun->id = ub_id_get()) == -1) | ||
2280 | goto err_id; | ||
2281 | |||
2282 | lun->udev = sc; | ||
2283 | |||
2284 | snprintf(lun->name, 16, DRV_NAME "%c(%d.%d.%d)", | ||
2285 | lun->id + 'a', sc->dev->bus->busnum, sc->dev->devnum, lun->num); | ||
2286 | |||
2287 | lun->removable = 1; /* XXX Query this from the device */ | ||
2288 | lun->changed = 1; /* ub_revalidate clears only */ | ||
2289 | ub_revalidate(sc, lun); | ||
2290 | |||
2291 | rc = -ENOMEM; | ||
2292 | if ((disk = alloc_disk(UB_PARTS_PER_LUN)) == NULL) | ||
2293 | goto err_diskalloc; | ||
2294 | |||
2295 | sprintf(disk->disk_name, DRV_NAME "%c", lun->id + 'a'); | ||
2296 | disk->major = UB_MAJOR; | ||
2297 | disk->first_minor = lun->id * UB_PARTS_PER_LUN; | ||
2298 | disk->fops = &ub_bd_fops; | ||
2299 | disk->private_data = lun; | ||
2300 | disk->driverfs_dev = &sc->intf->dev; | ||
2301 | |||
2302 | rc = -ENOMEM; | ||
2303 | if ((q = blk_init_queue(ub_request_fn, sc->lock)) == NULL) | ||
2304 | goto err_blkqinit; | ||
2305 | |||
2306 | disk->queue = q; | ||
2307 | |||
2308 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | ||
2309 | blk_queue_max_segments(q, UB_MAX_REQ_SG); | ||
2310 | blk_queue_segment_boundary(q, 0xffffffff); /* Dubious. */ | ||
2311 | blk_queue_max_hw_sectors(q, UB_MAX_SECTORS); | ||
2312 | blk_queue_logical_block_size(q, lun->capacity.bsize); | ||
2313 | |||
2314 | lun->disk = disk; | ||
2315 | q->queuedata = lun; | ||
2316 | list_add(&lun->link, &sc->luns); | ||
2317 | |||
2318 | set_capacity(disk, lun->capacity.nsec); | ||
2319 | if (lun->removable) | ||
2320 | disk->flags |= GENHD_FL_REMOVABLE; | ||
2321 | |||
2322 | add_disk(disk); | ||
2323 | |||
2324 | return 0; | ||
2325 | |||
2326 | err_blkqinit: | ||
2327 | put_disk(disk); | ||
2328 | err_diskalloc: | ||
2329 | ub_id_put(lun->id); | ||
2330 | err_id: | ||
2331 | kfree(lun); | ||
2332 | err_alloc: | ||
2333 | return rc; | ||
2334 | } | ||
2335 | |||
2336 | static void ub_disconnect(struct usb_interface *intf) | ||
2337 | { | ||
2338 | struct ub_dev *sc = usb_get_intfdata(intf); | ||
2339 | struct ub_lun *lun; | ||
2340 | unsigned long flags; | ||
2341 | |||
2342 | /* | ||
2343 | * Prevent ub_bd_release from pulling the rug from under us. | ||
2344 | * XXX This is starting to look like a kref. | ||
2345 | * XXX Why not to take this ref at probe time? | ||
2346 | */ | ||
2347 | spin_lock_irqsave(&ub_lock, flags); | ||
2348 | sc->openc++; | ||
2349 | spin_unlock_irqrestore(&ub_lock, flags); | ||
2350 | |||
2351 | /* | ||
2352 | * Fence stall clearings, operations triggered by unlinkings and so on. | ||
2353 | * We do not attempt to unlink any URBs, because we do not trust the | ||
2354 | * unlink paths in HC drivers. Also, we get -84 upon disconnect anyway. | ||
2355 | */ | ||
2356 | atomic_set(&sc->poison, 1); | ||
2357 | |||
2358 | /* | ||
2359 | * Wait for reset to end, if any. | ||
2360 | */ | ||
2361 | wait_event(sc->reset_wait, !sc->reset); | ||
2362 | |||
2363 | /* | ||
2364 | * Blow away queued commands. | ||
2365 | * | ||
2366 | * Actually, this never works, because before we get here | ||
2367 | * the HCD terminates outstanding URB(s). It causes our | ||
2368 | * SCSI command queue to advance, commands fail to submit, | ||
2369 | * and the whole queue drains. So, we just use this code to | ||
2370 | * print warnings. | ||
2371 | */ | ||
2372 | spin_lock_irqsave(sc->lock, flags); | ||
2373 | { | ||
2374 | struct ub_scsi_cmd *cmd; | ||
2375 | int cnt = 0; | ||
2376 | while ((cmd = ub_cmdq_peek(sc)) != NULL) { | ||
2377 | cmd->error = -ENOTCONN; | ||
2378 | cmd->state = UB_CMDST_DONE; | ||
2379 | ub_cmdq_pop(sc); | ||
2380 | (*cmd->done)(sc, cmd); | ||
2381 | cnt++; | ||
2382 | } | ||
2383 | if (cnt != 0) { | ||
2384 | printk(KERN_WARNING "%s: " | ||
2385 | "%d was queued after shutdown\n", sc->name, cnt); | ||
2386 | } | ||
2387 | } | ||
2388 | spin_unlock_irqrestore(sc->lock, flags); | ||
2389 | |||
2390 | /* | ||
2391 | * Unregister the upper layer. | ||
2392 | */ | ||
2393 | list_for_each_entry(lun, &sc->luns, link) { | ||
2394 | del_gendisk(lun->disk); | ||
2395 | /* | ||
2396 | * I wish I could do: | ||
2397 | * queue_flag_set(QUEUE_FLAG_DEAD, q); | ||
2398 | * As it is, we rely on our internal poisoning and let | ||
2399 | * the upper levels to spin furiously failing all the I/O. | ||
2400 | */ | ||
2401 | } | ||
2402 | |||
2403 | /* | ||
2404 | * Testing for -EINPROGRESS is always a bug, so we are bending | ||
2405 | * the rules a little. | ||
2406 | */ | ||
2407 | spin_lock_irqsave(sc->lock, flags); | ||
2408 | if (sc->work_urb.status == -EINPROGRESS) { /* janitors: ignore */ | ||
2409 | printk(KERN_WARNING "%s: " | ||
2410 | "URB is active after disconnect\n", sc->name); | ||
2411 | } | ||
2412 | spin_unlock_irqrestore(sc->lock, flags); | ||
2413 | |||
2414 | /* | ||
2415 | * There is virtually no chance that other CPU runs a timeout so long | ||
2416 | * after ub_urb_complete should have called del_timer, but only if HCD | ||
2417 | * didn't forget to deliver a callback on unlink. | ||
2418 | */ | ||
2419 | del_timer_sync(&sc->work_timer); | ||
2420 | |||
2421 | /* | ||
2422 | * At this point there must be no commands coming from anyone | ||
2423 | * and no URBs left in transit. | ||
2424 | */ | ||
2425 | |||
2426 | ub_put(sc); | ||
2427 | } | ||
2428 | |||
2429 | static struct usb_driver ub_driver = { | ||
2430 | .name = "ub", | ||
2431 | .probe = ub_probe, | ||
2432 | .disconnect = ub_disconnect, | ||
2433 | .id_table = ub_usb_ids, | ||
2434 | .pre_reset = ub_pre_reset, | ||
2435 | .post_reset = ub_post_reset, | ||
2436 | }; | ||
2437 | |||
2438 | static int __init ub_init(void) | ||
2439 | { | ||
2440 | int rc; | ||
2441 | int i; | ||
2442 | |||
2443 | pr_info("'Low Performance USB Block' driver is deprecated. " | ||
2444 | "Please switch to usb-storage\n"); | ||
2445 | for (i = 0; i < UB_QLOCK_NUM; i++) | ||
2446 | spin_lock_init(&ub_qlockv[i]); | ||
2447 | |||
2448 | if ((rc = register_blkdev(UB_MAJOR, DRV_NAME)) != 0) | ||
2449 | goto err_regblkdev; | ||
2450 | |||
2451 | if ((rc = usb_register(&ub_driver)) != 0) | ||
2452 | goto err_register; | ||
2453 | |||
2454 | usb_usual_set_present(USB_US_TYPE_UB); | ||
2455 | return 0; | ||
2456 | |||
2457 | err_register: | ||
2458 | unregister_blkdev(UB_MAJOR, DRV_NAME); | ||
2459 | err_regblkdev: | ||
2460 | return rc; | ||
2461 | } | ||
2462 | |||
2463 | static void __exit ub_exit(void) | ||
2464 | { | ||
2465 | usb_deregister(&ub_driver); | ||
2466 | |||
2467 | unregister_blkdev(UB_MAJOR, DRV_NAME); | ||
2468 | usb_usual_clear_present(USB_US_TYPE_UB); | ||
2469 | } | ||
2470 | |||
2471 | module_init(ub_init); | ||
2472 | module_exit(ub_exit); | ||
2473 | |||
2474 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c0bbeb470754..0bdde8fba397 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -14,6 +14,9 @@ | |||
14 | 14 | ||
15 | #define PART_BITS 4 | 15 | #define PART_BITS 4 |
16 | 16 | ||
17 | static bool use_bio; | ||
18 | module_param(use_bio, bool, S_IRUGO); | ||
19 | |||
17 | static int major; | 20 | static int major; |
18 | static DEFINE_IDA(vd_index_ida); | 21 | static DEFINE_IDA(vd_index_ida); |
19 | 22 | ||
@@ -23,6 +26,7 @@ struct virtio_blk | |||
23 | { | 26 | { |
24 | struct virtio_device *vdev; | 27 | struct virtio_device *vdev; |
25 | struct virtqueue *vq; | 28 | struct virtqueue *vq; |
29 | wait_queue_head_t queue_wait; | ||
26 | 30 | ||
27 | /* The disk structure for the kernel. */ | 31 | /* The disk structure for the kernel. */ |
28 | struct gendisk *disk; | 32 | struct gendisk *disk; |
@@ -51,53 +55,244 @@ struct virtio_blk | |||
51 | struct virtblk_req | 55 | struct virtblk_req |
52 | { | 56 | { |
53 | struct request *req; | 57 | struct request *req; |
58 | struct bio *bio; | ||
54 | struct virtio_blk_outhdr out_hdr; | 59 | struct virtio_blk_outhdr out_hdr; |
55 | struct virtio_scsi_inhdr in_hdr; | 60 | struct virtio_scsi_inhdr in_hdr; |
61 | struct work_struct work; | ||
62 | struct virtio_blk *vblk; | ||
63 | int flags; | ||
56 | u8 status; | 64 | u8 status; |
65 | struct scatterlist sg[]; | ||
66 | }; | ||
67 | |||
68 | enum { | ||
69 | VBLK_IS_FLUSH = 1, | ||
70 | VBLK_REQ_FLUSH = 2, | ||
71 | VBLK_REQ_DATA = 4, | ||
72 | VBLK_REQ_FUA = 8, | ||
57 | }; | 73 | }; |
58 | 74 | ||
59 | static void blk_done(struct virtqueue *vq) | 75 | static inline int virtblk_result(struct virtblk_req *vbr) |
76 | { | ||
77 | switch (vbr->status) { | ||
78 | case VIRTIO_BLK_S_OK: | ||
79 | return 0; | ||
80 | case VIRTIO_BLK_S_UNSUPP: | ||
81 | return -ENOTTY; | ||
82 | default: | ||
83 | return -EIO; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk, | ||
88 | gfp_t gfp_mask) | ||
60 | { | 89 | { |
61 | struct virtio_blk *vblk = vq->vdev->priv; | ||
62 | struct virtblk_req *vbr; | 90 | struct virtblk_req *vbr; |
63 | unsigned int len; | ||
64 | unsigned long flags; | ||
65 | 91 | ||
66 | spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); | 92 | vbr = mempool_alloc(vblk->pool, gfp_mask); |
67 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | 93 | if (!vbr) |
68 | int error; | 94 | return NULL; |
69 | 95 | ||
70 | switch (vbr->status) { | 96 | vbr->vblk = vblk; |
71 | case VIRTIO_BLK_S_OK: | 97 | if (use_bio) |
72 | error = 0; | 98 | sg_init_table(vbr->sg, vblk->sg_elems); |
73 | break; | 99 | |
74 | case VIRTIO_BLK_S_UNSUPP: | 100 | return vbr; |
75 | error = -ENOTTY; | 101 | } |
76 | break; | 102 | |
77 | default: | 103 | static void virtblk_add_buf_wait(struct virtio_blk *vblk, |
78 | error = -EIO; | 104 | struct virtblk_req *vbr, |
105 | unsigned long out, | ||
106 | unsigned long in) | ||
107 | { | ||
108 | DEFINE_WAIT(wait); | ||
109 | |||
110 | for (;;) { | ||
111 | prepare_to_wait_exclusive(&vblk->queue_wait, &wait, | ||
112 | TASK_UNINTERRUPTIBLE); | ||
113 | |||
114 | spin_lock_irq(vblk->disk->queue->queue_lock); | ||
115 | if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr, | ||
116 | GFP_ATOMIC) < 0) { | ||
117 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
118 | io_schedule(); | ||
119 | } else { | ||
120 | virtqueue_kick(vblk->vq); | ||
121 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
79 | break; | 122 | break; |
80 | } | 123 | } |
81 | 124 | ||
82 | switch (vbr->req->cmd_type) { | 125 | } |
83 | case REQ_TYPE_BLOCK_PC: | 126 | |
84 | vbr->req->resid_len = vbr->in_hdr.residual; | 127 | finish_wait(&vblk->queue_wait, &wait); |
85 | vbr->req->sense_len = vbr->in_hdr.sense_len; | 128 | } |
86 | vbr->req->errors = vbr->in_hdr.errors; | 129 | |
87 | break; | 130 | static inline void virtblk_add_req(struct virtblk_req *vbr, |
88 | case REQ_TYPE_SPECIAL: | 131 | unsigned int out, unsigned int in) |
89 | vbr->req->errors = (error != 0); | 132 | { |
90 | break; | 133 | struct virtio_blk *vblk = vbr->vblk; |
91 | default: | 134 | |
92 | break; | 135 | spin_lock_irq(vblk->disk->queue->queue_lock); |
136 | if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr, | ||
137 | GFP_ATOMIC) < 0)) { | ||
138 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
139 | virtblk_add_buf_wait(vblk, vbr, out, in); | ||
140 | return; | ||
141 | } | ||
142 | virtqueue_kick(vblk->vq); | ||
143 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
144 | } | ||
145 | |||
146 | static int virtblk_bio_send_flush(struct virtblk_req *vbr) | ||
147 | { | ||
148 | unsigned int out = 0, in = 0; | ||
149 | |||
150 | vbr->flags |= VBLK_IS_FLUSH; | ||
151 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; | ||
152 | vbr->out_hdr.sector = 0; | ||
153 | vbr->out_hdr.ioprio = 0; | ||
154 | sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); | ||
155 | sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status)); | ||
156 | |||
157 | virtblk_add_req(vbr, out, in); | ||
158 | |||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | static int virtblk_bio_send_data(struct virtblk_req *vbr) | ||
163 | { | ||
164 | struct virtio_blk *vblk = vbr->vblk; | ||
165 | unsigned int num, out = 0, in = 0; | ||
166 | struct bio *bio = vbr->bio; | ||
167 | |||
168 | vbr->flags &= ~VBLK_IS_FLUSH; | ||
169 | vbr->out_hdr.type = 0; | ||
170 | vbr->out_hdr.sector = bio->bi_sector; | ||
171 | vbr->out_hdr.ioprio = bio_prio(bio); | ||
172 | |||
173 | sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); | ||
174 | |||
175 | num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out); | ||
176 | |||
177 | sg_set_buf(&vbr->sg[num + out + in++], &vbr->status, | ||
178 | sizeof(vbr->status)); | ||
179 | |||
180 | if (num) { | ||
181 | if (bio->bi_rw & REQ_WRITE) { | ||
182 | vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; | ||
183 | out += num; | ||
184 | } else { | ||
185 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; | ||
186 | in += num; | ||
93 | } | 187 | } |
188 | } | ||
189 | |||
190 | virtblk_add_req(vbr, out, in); | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | static void virtblk_bio_send_data_work(struct work_struct *work) | ||
196 | { | ||
197 | struct virtblk_req *vbr; | ||
198 | |||
199 | vbr = container_of(work, struct virtblk_req, work); | ||
200 | |||
201 | virtblk_bio_send_data(vbr); | ||
202 | } | ||
203 | |||
204 | static void virtblk_bio_send_flush_work(struct work_struct *work) | ||
205 | { | ||
206 | struct virtblk_req *vbr; | ||
207 | |||
208 | vbr = container_of(work, struct virtblk_req, work); | ||
209 | |||
210 | virtblk_bio_send_flush(vbr); | ||
211 | } | ||
212 | |||
213 | static inline void virtblk_request_done(struct virtblk_req *vbr) | ||
214 | { | ||
215 | struct virtio_blk *vblk = vbr->vblk; | ||
216 | struct request *req = vbr->req; | ||
217 | int error = virtblk_result(vbr); | ||
218 | |||
219 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { | ||
220 | req->resid_len = vbr->in_hdr.residual; | ||
221 | req->sense_len = vbr->in_hdr.sense_len; | ||
222 | req->errors = vbr->in_hdr.errors; | ||
223 | } else if (req->cmd_type == REQ_TYPE_SPECIAL) { | ||
224 | req->errors = (error != 0); | ||
225 | } | ||
226 | |||
227 | __blk_end_request_all(req, error); | ||
228 | mempool_free(vbr, vblk->pool); | ||
229 | } | ||
230 | |||
231 | static inline void virtblk_bio_flush_done(struct virtblk_req *vbr) | ||
232 | { | ||
233 | struct virtio_blk *vblk = vbr->vblk; | ||
234 | |||
235 | if (vbr->flags & VBLK_REQ_DATA) { | ||
236 | /* Send out the actual write data */ | ||
237 | INIT_WORK(&vbr->work, virtblk_bio_send_data_work); | ||
238 | queue_work(virtblk_wq, &vbr->work); | ||
239 | } else { | ||
240 | bio_endio(vbr->bio, virtblk_result(vbr)); | ||
241 | mempool_free(vbr, vblk->pool); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | static inline void virtblk_bio_data_done(struct virtblk_req *vbr) | ||
246 | { | ||
247 | struct virtio_blk *vblk = vbr->vblk; | ||
94 | 248 | ||
95 | __blk_end_request_all(vbr->req, error); | 249 | if (unlikely(vbr->flags & VBLK_REQ_FUA)) { |
250 | /* Send out a flush before end the bio */ | ||
251 | vbr->flags &= ~VBLK_REQ_DATA; | ||
252 | INIT_WORK(&vbr->work, virtblk_bio_send_flush_work); | ||
253 | queue_work(virtblk_wq, &vbr->work); | ||
254 | } else { | ||
255 | bio_endio(vbr->bio, virtblk_result(vbr)); | ||
96 | mempool_free(vbr, vblk->pool); | 256 | mempool_free(vbr, vblk->pool); |
97 | } | 257 | } |
258 | } | ||
259 | |||
260 | static inline void virtblk_bio_done(struct virtblk_req *vbr) | ||
261 | { | ||
262 | if (unlikely(vbr->flags & VBLK_IS_FLUSH)) | ||
263 | virtblk_bio_flush_done(vbr); | ||
264 | else | ||
265 | virtblk_bio_data_done(vbr); | ||
266 | } | ||
267 | |||
268 | static void virtblk_done(struct virtqueue *vq) | ||
269 | { | ||
270 | struct virtio_blk *vblk = vq->vdev->priv; | ||
271 | bool bio_done = false, req_done = false; | ||
272 | struct virtblk_req *vbr; | ||
273 | unsigned long flags; | ||
274 | unsigned int len; | ||
275 | |||
276 | spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); | ||
277 | do { | ||
278 | virtqueue_disable_cb(vq); | ||
279 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | ||
280 | if (vbr->bio) { | ||
281 | virtblk_bio_done(vbr); | ||
282 | bio_done = true; | ||
283 | } else { | ||
284 | virtblk_request_done(vbr); | ||
285 | req_done = true; | ||
286 | } | ||
287 | } | ||
288 | } while (!virtqueue_enable_cb(vq)); | ||
98 | /* In case queue is stopped waiting for more buffers. */ | 289 | /* In case queue is stopped waiting for more buffers. */ |
99 | blk_start_queue(vblk->disk->queue); | 290 | if (req_done) |
291 | blk_start_queue(vblk->disk->queue); | ||
100 | spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); | 292 | spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); |
293 | |||
294 | if (bio_done) | ||
295 | wake_up(&vblk->queue_wait); | ||
101 | } | 296 | } |
102 | 297 | ||
103 | static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | 298 | static bool do_req(struct request_queue *q, struct virtio_blk *vblk, |
@@ -106,13 +301,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
106 | unsigned long num, out = 0, in = 0; | 301 | unsigned long num, out = 0, in = 0; |
107 | struct virtblk_req *vbr; | 302 | struct virtblk_req *vbr; |
108 | 303 | ||
109 | vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); | 304 | vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); |
110 | if (!vbr) | 305 | if (!vbr) |
111 | /* When another request finishes we'll try again. */ | 306 | /* When another request finishes we'll try again. */ |
112 | return false; | 307 | return false; |
113 | 308 | ||
114 | vbr->req = req; | 309 | vbr->req = req; |
115 | 310 | vbr->bio = NULL; | |
116 | if (req->cmd_flags & REQ_FLUSH) { | 311 | if (req->cmd_flags & REQ_FLUSH) { |
117 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; | 312 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; |
118 | vbr->out_hdr.sector = 0; | 313 | vbr->out_hdr.sector = 0; |
@@ -172,7 +367,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
172 | } | 367 | } |
173 | } | 368 | } |
174 | 369 | ||
175 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { | 370 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, |
371 | GFP_ATOMIC) < 0) { | ||
176 | mempool_free(vbr, vblk->pool); | 372 | mempool_free(vbr, vblk->pool); |
177 | return false; | 373 | return false; |
178 | } | 374 | } |
@@ -180,7 +376,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
180 | return true; | 376 | return true; |
181 | } | 377 | } |
182 | 378 | ||
183 | static void do_virtblk_request(struct request_queue *q) | 379 | static void virtblk_request(struct request_queue *q) |
184 | { | 380 | { |
185 | struct virtio_blk *vblk = q->queuedata; | 381 | struct virtio_blk *vblk = q->queuedata; |
186 | struct request *req; | 382 | struct request *req; |
@@ -203,6 +399,34 @@ static void do_virtblk_request(struct request_queue *q) | |||
203 | virtqueue_kick(vblk->vq); | 399 | virtqueue_kick(vblk->vq); |
204 | } | 400 | } |
205 | 401 | ||
402 | static void virtblk_make_request(struct request_queue *q, struct bio *bio) | ||
403 | { | ||
404 | struct virtio_blk *vblk = q->queuedata; | ||
405 | struct virtblk_req *vbr; | ||
406 | |||
407 | BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems); | ||
408 | |||
409 | vbr = virtblk_alloc_req(vblk, GFP_NOIO); | ||
410 | if (!vbr) { | ||
411 | bio_endio(bio, -ENOMEM); | ||
412 | return; | ||
413 | } | ||
414 | |||
415 | vbr->bio = bio; | ||
416 | vbr->flags = 0; | ||
417 | if (bio->bi_rw & REQ_FLUSH) | ||
418 | vbr->flags |= VBLK_REQ_FLUSH; | ||
419 | if (bio->bi_rw & REQ_FUA) | ||
420 | vbr->flags |= VBLK_REQ_FUA; | ||
421 | if (bio->bi_size) | ||
422 | vbr->flags |= VBLK_REQ_DATA; | ||
423 | |||
424 | if (unlikely(vbr->flags & VBLK_REQ_FLUSH)) | ||
425 | virtblk_bio_send_flush(vbr); | ||
426 | else | ||
427 | virtblk_bio_send_data(vbr); | ||
428 | } | ||
429 | |||
206 | /* return id (s/n) string for *disk to *id_str | 430 | /* return id (s/n) string for *disk to *id_str |
207 | */ | 431 | */ |
208 | static int virtblk_get_id(struct gendisk *disk, char *id_str) | 432 | static int virtblk_get_id(struct gendisk *disk, char *id_str) |
@@ -360,7 +584,7 @@ static int init_vq(struct virtio_blk *vblk) | |||
360 | int err = 0; | 584 | int err = 0; |
361 | 585 | ||
362 | /* We expect one virtqueue, for output. */ | 586 | /* We expect one virtqueue, for output. */ |
363 | vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests"); | 587 | vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); |
364 | if (IS_ERR(vblk->vq)) | 588 | if (IS_ERR(vblk->vq)) |
365 | err = PTR_ERR(vblk->vq); | 589 | err = PTR_ERR(vblk->vq); |
366 | 590 | ||
@@ -477,6 +701,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
477 | struct virtio_blk *vblk; | 701 | struct virtio_blk *vblk; |
478 | struct request_queue *q; | 702 | struct request_queue *q; |
479 | int err, index; | 703 | int err, index; |
704 | int pool_size; | ||
705 | |||
480 | u64 cap; | 706 | u64 cap; |
481 | u32 v, blk_size, sg_elems, opt_io_size; | 707 | u32 v, blk_size, sg_elems, opt_io_size; |
482 | u16 min_io_size; | 708 | u16 min_io_size; |
@@ -506,10 +732,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
506 | goto out_free_index; | 732 | goto out_free_index; |
507 | } | 733 | } |
508 | 734 | ||
735 | init_waitqueue_head(&vblk->queue_wait); | ||
509 | vblk->vdev = vdev; | 736 | vblk->vdev = vdev; |
510 | vblk->sg_elems = sg_elems; | 737 | vblk->sg_elems = sg_elems; |
511 | sg_init_table(vblk->sg, vblk->sg_elems); | 738 | sg_init_table(vblk->sg, vblk->sg_elems); |
512 | mutex_init(&vblk->config_lock); | 739 | mutex_init(&vblk->config_lock); |
740 | |||
513 | INIT_WORK(&vblk->config_work, virtblk_config_changed_work); | 741 | INIT_WORK(&vblk->config_work, virtblk_config_changed_work); |
514 | vblk->config_enable = true; | 742 | vblk->config_enable = true; |
515 | 743 | ||
@@ -517,7 +745,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
517 | if (err) | 745 | if (err) |
518 | goto out_free_vblk; | 746 | goto out_free_vblk; |
519 | 747 | ||
520 | vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); | 748 | pool_size = sizeof(struct virtblk_req); |
749 | if (use_bio) | ||
750 | pool_size += sizeof(struct scatterlist) * sg_elems; | ||
751 | vblk->pool = mempool_create_kmalloc_pool(1, pool_size); | ||
521 | if (!vblk->pool) { | 752 | if (!vblk->pool) { |
522 | err = -ENOMEM; | 753 | err = -ENOMEM; |
523 | goto out_free_vq; | 754 | goto out_free_vq; |
@@ -530,12 +761,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
530 | goto out_mempool; | 761 | goto out_mempool; |
531 | } | 762 | } |
532 | 763 | ||
533 | q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); | 764 | q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); |
534 | if (!q) { | 765 | if (!q) { |
535 | err = -ENOMEM; | 766 | err = -ENOMEM; |
536 | goto out_put_disk; | 767 | goto out_put_disk; |
537 | } | 768 | } |
538 | 769 | ||
770 | if (use_bio) | ||
771 | blk_queue_make_request(q, virtblk_make_request); | ||
539 | q->queuedata = vblk; | 772 | q->queuedata = vblk; |
540 | 773 | ||
541 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); | 774 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); |
@@ -620,7 +853,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
620 | if (!err && opt_io_size) | 853 | if (!err && opt_io_size) |
621 | blk_queue_io_opt(q, blk_size * opt_io_size); | 854 | blk_queue_io_opt(q, blk_size * opt_io_size); |
622 | 855 | ||
623 | |||
624 | add_disk(vblk->disk); | 856 | add_disk(vblk->disk); |
625 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); | 857 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); |
626 | if (err) | 858 | if (err) |
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 73f196ca713f..280a13846e6c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -42,6 +42,7 @@ | |||
42 | 42 | ||
43 | #include <xen/events.h> | 43 | #include <xen/events.h> |
44 | #include <xen/page.h> | 44 | #include <xen/page.h> |
45 | #include <xen/xen.h> | ||
45 | #include <asm/xen/hypervisor.h> | 46 | #include <asm/xen/hypervisor.h> |
46 | #include <asm/xen/hypercall.h> | 47 | #include <asm/xen/hypercall.h> |
47 | #include "common.h" | 48 | #include "common.h" |
@@ -337,7 +338,7 @@ static void xen_blkbk_unmap(struct pending_req *req) | |||
337 | invcount++; | 338 | invcount++; |
338 | } | 339 | } |
339 | 340 | ||
340 | ret = gnttab_unmap_refs(unmap, pages, invcount, false); | 341 | ret = gnttab_unmap_refs(unmap, NULL, pages, invcount); |
341 | BUG_ON(ret); | 342 | BUG_ON(ret); |
342 | } | 343 | } |
343 | 344 | ||
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c2d2e5c1597..007db8986e84 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -670,7 +670,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) | |||
670 | spin_unlock_irqrestore(&info->io_lock, flags); | 670 | spin_unlock_irqrestore(&info->io_lock, flags); |
671 | 671 | ||
672 | /* Flush gnttab callback work. Must be done with no locks held. */ | 672 | /* Flush gnttab callback work. Must be done with no locks held. */ |
673 | flush_work_sync(&info->work); | 673 | flush_work(&info->work); |
674 | 674 | ||
675 | del_gendisk(info->gd); | 675 | del_gendisk(info->gd); |
676 | 676 | ||
@@ -719,7 +719,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) | |||
719 | spin_unlock_irq(&info->io_lock); | 719 | spin_unlock_irq(&info->io_lock); |
720 | 720 | ||
721 | /* Flush gnttab callback work. Must be done with no locks held. */ | 721 | /* Flush gnttab callback work. Must be done with no locks held. */ |
722 | flush_work_sync(&info->work); | 722 | flush_work(&info->work); |
723 | 723 | ||
724 | /* Free resources associated with old device channel. */ | 724 | /* Free resources associated with old device channel. */ |
725 | if (info->ring_ref != GRANT_INVALID_REF) { | 725 | if (info->ring_ref != GRANT_INVALID_REF) { |