diff options
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r-- | drivers/block/nbd.c | 731 |
1 files changed, 731 insertions, 0 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c new file mode 100644 index 000000000000..efdf04450bf7 --- /dev/null +++ b/drivers/block/nbd.c | |||
@@ -0,0 +1,731 @@ | |||
1 | /* | ||
2 | * Network block device - make block devices work over TCP | ||
3 | * | ||
4 | * Note that you can not swap over this thing, yet. Seems to work but | ||
5 | * deadlocks sometimes - you can not swap over TCP in general. | ||
6 | * | ||
7 | * Copyright 1997-2000 Pavel Machek <pavel@ucw.cz> | ||
8 | * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> | ||
9 | * | ||
10 | * (part of code stolen from loop.c) | ||
11 | * | ||
12 | * 97-3-25 compiled 0-th version, not yet tested it | ||
13 | * (it did not work, BTW) (later that day) HEY! it works! | ||
14 | * (bit later) hmm, not that much... 2:00am next day: | ||
15 | * yes, it works, but it gives something like 50kB/sec | ||
16 | * 97-4-01 complete rewrite to make it possible for many requests at | ||
17 | * once to be processed | ||
18 | * 97-4-11 Making protocol independent of endianity etc. | ||
19 | * 97-9-13 Cosmetic changes | ||
20 | * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines | ||
21 | * 99-1-11 Attempt to make 64-bit-clean on 32-bit machines <ankry@mif.pg.gda.pl> | ||
22 | * 01-2-27 Fix to store proper blockcount for kernel (calculated using | ||
23 | * BLOCK_SIZE_BITS, not device blocksize) <aga@permonline.ru> | ||
24 | * 01-3-11 Make nbd work with new Linux block layer code. It now supports | ||
25 | * plugging like all the other block devices. Also added in MSG_MORE to | ||
26 | * reduce number of partial TCP segments sent. <steve@chygwyn.com> | ||
27 | * 01-12-6 Fix deadlock condition by making queue locks independent of | ||
28 | * the transmit lock. <steve@chygwyn.com> | ||
29 | * 02-10-11 Allow hung xmit to be aborted via SIGKILL & various fixes. | ||
30 | * <Paul.Clements@SteelEye.com> <James.Bottomley@SteelEye.com> | ||
31 | * 03-06-22 Make nbd work with new linux 2.5 block layer design. This fixes | ||
32 | * memory corruption from module removal and possible memory corruption | ||
33 | * from sending/receiving disk data. <ldl@aros.net> | ||
34 | * 03-06-23 Cosmetic changes. <ldl@aros.net> | ||
35 | * 03-06-23 Enhance diagnostics support. <ldl@aros.net> | ||
36 | * 03-06-24 Remove unneeded blksize_bits field from nbd_device struct. | ||
37 | * <ldl@aros.net> | ||
38 | * 03-06-24 Cleanup PARANOIA usage & code. <ldl@aros.net> | ||
39 | * 04-02-19 Remove PARANOIA, plus various cleanups (Paul Clements) | ||
40 | * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall | ||
41 | * why not: would need access_ok and friends, would share yet another | ||
42 | * structure with userland | ||
43 | */ | ||
44 | |||
45 | #include <linux/major.h> | ||
46 | |||
47 | #include <linux/blkdev.h> | ||
48 | #include <linux/module.h> | ||
49 | #include <linux/init.h> | ||
50 | #include <linux/sched.h> | ||
51 | #include <linux/fs.h> | ||
52 | #include <linux/bio.h> | ||
53 | #include <linux/stat.h> | ||
54 | #include <linux/errno.h> | ||
55 | #include <linux/file.h> | ||
56 | #include <linux/ioctl.h> | ||
57 | #include <net/sock.h> | ||
58 | |||
59 | #include <linux/devfs_fs_kernel.h> | ||
60 | |||
61 | #include <asm/uaccess.h> | ||
62 | #include <asm/types.h> | ||
63 | |||
64 | #include <linux/nbd.h> | ||
65 | |||
66 | #define LO_MAGIC 0x68797548 | ||
67 | |||
68 | #ifdef NDEBUG | ||
69 | #define dprintk(flags, fmt...) | ||
70 | #else /* NDEBUG */ | ||
71 | #define dprintk(flags, fmt...) do { \ | ||
72 | if (debugflags & (flags)) printk(KERN_DEBUG fmt); \ | ||
73 | } while (0) | ||
74 | #define DBG_IOCTL 0x0004 | ||
75 | #define DBG_INIT 0x0010 | ||
76 | #define DBG_EXIT 0x0020 | ||
77 | #define DBG_BLKDEV 0x0100 | ||
78 | #define DBG_RX 0x0200 | ||
79 | #define DBG_TX 0x0400 | ||
80 | static unsigned int debugflags; | ||
81 | #endif /* NDEBUG */ | ||
82 | |||
83 | static struct nbd_device nbd_dev[MAX_NBD]; | ||
84 | |||
85 | /* | ||
86 | * Use just one lock (or at most 1 per NIC). Two arguments for this: | ||
87 | * 1. Each NIC is essentially a synchronization point for all servers | ||
88 | * accessed through that NIC so there's no need to have more locks | ||
89 | * than NICs anyway. | ||
90 | * 2. More locks lead to more "Dirty cache line bouncing" which will slow | ||
91 | * down each lock to the point where they're actually slower than just | ||
92 | * a single lock. | ||
93 | * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this! | ||
94 | */ | ||
95 | static DEFINE_SPINLOCK(nbd_lock); | ||
96 | |||
97 | #ifndef NDEBUG | ||
98 | static const char *ioctl_cmd_to_ascii(int cmd) | ||
99 | { | ||
100 | switch (cmd) { | ||
101 | case NBD_SET_SOCK: return "set-sock"; | ||
102 | case NBD_SET_BLKSIZE: return "set-blksize"; | ||
103 | case NBD_SET_SIZE: return "set-size"; | ||
104 | case NBD_DO_IT: return "do-it"; | ||
105 | case NBD_CLEAR_SOCK: return "clear-sock"; | ||
106 | case NBD_CLEAR_QUE: return "clear-que"; | ||
107 | case NBD_PRINT_DEBUG: return "print-debug"; | ||
108 | case NBD_SET_SIZE_BLOCKS: return "set-size-blocks"; | ||
109 | case NBD_DISCONNECT: return "disconnect"; | ||
110 | case BLKROSET: return "set-read-only"; | ||
111 | case BLKFLSBUF: return "flush-buffer-cache"; | ||
112 | } | ||
113 | return "unknown"; | ||
114 | } | ||
115 | |||
116 | static const char *nbdcmd_to_ascii(int cmd) | ||
117 | { | ||
118 | switch (cmd) { | ||
119 | case NBD_CMD_READ: return "read"; | ||
120 | case NBD_CMD_WRITE: return "write"; | ||
121 | case NBD_CMD_DISC: return "disconnect"; | ||
122 | } | ||
123 | return "invalid"; | ||
124 | } | ||
125 | #endif /* NDEBUG */ | ||
126 | |||
127 | static void nbd_end_request(struct request *req) | ||
128 | { | ||
129 | int uptodate = (req->errors == 0) ? 1 : 0; | ||
130 | request_queue_t *q = req->q; | ||
131 | unsigned long flags; | ||
132 | |||
133 | dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name, | ||
134 | req, uptodate? "done": "failed"); | ||
135 | |||
136 | spin_lock_irqsave(q->queue_lock, flags); | ||
137 | if (!end_that_request_first(req, uptodate, req->nr_sectors)) { | ||
138 | end_that_request_last(req); | ||
139 | } | ||
140 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Send or receive packet. | ||
145 | */ | ||
146 | static int sock_xmit(struct socket *sock, int send, void *buf, int size, | ||
147 | int msg_flags) | ||
148 | { | ||
149 | int result; | ||
150 | struct msghdr msg; | ||
151 | struct kvec iov; | ||
152 | unsigned long flags; | ||
153 | sigset_t oldset; | ||
154 | |||
155 | /* Allow interception of SIGKILL only | ||
156 | * Don't allow other signals to interrupt the transmission */ | ||
157 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
158 | oldset = current->blocked; | ||
159 | sigfillset(¤t->blocked); | ||
160 | sigdelsetmask(¤t->blocked, sigmask(SIGKILL)); | ||
161 | recalc_sigpending(); | ||
162 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
163 | |||
164 | do { | ||
165 | sock->sk->sk_allocation = GFP_NOIO; | ||
166 | iov.iov_base = buf; | ||
167 | iov.iov_len = size; | ||
168 | msg.msg_name = NULL; | ||
169 | msg.msg_namelen = 0; | ||
170 | msg.msg_control = NULL; | ||
171 | msg.msg_controllen = 0; | ||
172 | msg.msg_namelen = 0; | ||
173 | msg.msg_flags = msg_flags | MSG_NOSIGNAL; | ||
174 | |||
175 | if (send) | ||
176 | result = kernel_sendmsg(sock, &msg, &iov, 1, size); | ||
177 | else | ||
178 | result = kernel_recvmsg(sock, &msg, &iov, 1, size, 0); | ||
179 | |||
180 | if (signal_pending(current)) { | ||
181 | siginfo_t info; | ||
182 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
183 | printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n", | ||
184 | current->pid, current->comm, | ||
185 | dequeue_signal(current, ¤t->blocked, &info)); | ||
186 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
187 | result = -EINTR; | ||
188 | break; | ||
189 | } | ||
190 | |||
191 | if (result <= 0) { | ||
192 | if (result == 0) | ||
193 | result = -EPIPE; /* short read */ | ||
194 | break; | ||
195 | } | ||
196 | size -= result; | ||
197 | buf += result; | ||
198 | } while (size > 0); | ||
199 | |||
200 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
201 | current->blocked = oldset; | ||
202 | recalc_sigpending(); | ||
203 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
204 | |||
205 | return result; | ||
206 | } | ||
207 | |||
208 | static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec, | ||
209 | int flags) | ||
210 | { | ||
211 | int result; | ||
212 | void *kaddr = kmap(bvec->bv_page); | ||
213 | result = sock_xmit(sock, 1, kaddr + bvec->bv_offset, bvec->bv_len, | ||
214 | flags); | ||
215 | kunmap(bvec->bv_page); | ||
216 | return result; | ||
217 | } | ||
218 | |||
219 | static int nbd_send_req(struct nbd_device *lo, struct request *req) | ||
220 | { | ||
221 | int result, i, flags; | ||
222 | struct nbd_request request; | ||
223 | unsigned long size = req->nr_sectors << 9; | ||
224 | struct socket *sock = lo->sock; | ||
225 | |||
226 | request.magic = htonl(NBD_REQUEST_MAGIC); | ||
227 | request.type = htonl(nbd_cmd(req)); | ||
228 | request.from = cpu_to_be64((u64) req->sector << 9); | ||
229 | request.len = htonl(size); | ||
230 | memcpy(request.handle, &req, sizeof(req)); | ||
231 | |||
232 | down(&lo->tx_lock); | ||
233 | |||
234 | if (!sock || !lo->sock) { | ||
235 | printk(KERN_ERR "%s: Attempted send on closed socket\n", | ||
236 | lo->disk->disk_name); | ||
237 | goto error_out; | ||
238 | } | ||
239 | |||
240 | dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n", | ||
241 | lo->disk->disk_name, req, | ||
242 | nbdcmd_to_ascii(nbd_cmd(req)), | ||
243 | (unsigned long long)req->sector << 9, | ||
244 | req->nr_sectors << 9); | ||
245 | result = sock_xmit(sock, 1, &request, sizeof(request), | ||
246 | (nbd_cmd(req) == NBD_CMD_WRITE)? MSG_MORE: 0); | ||
247 | if (result <= 0) { | ||
248 | printk(KERN_ERR "%s: Send control failed (result %d)\n", | ||
249 | lo->disk->disk_name, result); | ||
250 | goto error_out; | ||
251 | } | ||
252 | |||
253 | if (nbd_cmd(req) == NBD_CMD_WRITE) { | ||
254 | struct bio *bio; | ||
255 | /* | ||
256 | * we are really probing at internals to determine | ||
257 | * whether to set MSG_MORE or not... | ||
258 | */ | ||
259 | rq_for_each_bio(bio, req) { | ||
260 | struct bio_vec *bvec; | ||
261 | bio_for_each_segment(bvec, bio, i) { | ||
262 | flags = 0; | ||
263 | if ((i < (bio->bi_vcnt - 1)) || bio->bi_next) | ||
264 | flags = MSG_MORE; | ||
265 | dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", | ||
266 | lo->disk->disk_name, req, | ||
267 | bvec->bv_len); | ||
268 | result = sock_send_bvec(sock, bvec, flags); | ||
269 | if (result <= 0) { | ||
270 | printk(KERN_ERR "%s: Send data failed (result %d)\n", | ||
271 | lo->disk->disk_name, | ||
272 | result); | ||
273 | goto error_out; | ||
274 | } | ||
275 | } | ||
276 | } | ||
277 | } | ||
278 | up(&lo->tx_lock); | ||
279 | return 0; | ||
280 | |||
281 | error_out: | ||
282 | up(&lo->tx_lock); | ||
283 | return 1; | ||
284 | } | ||
285 | |||
286 | static struct request *nbd_find_request(struct nbd_device *lo, char *handle) | ||
287 | { | ||
288 | struct request *req; | ||
289 | struct list_head *tmp; | ||
290 | struct request *xreq; | ||
291 | |||
292 | memcpy(&xreq, handle, sizeof(xreq)); | ||
293 | |||
294 | spin_lock(&lo->queue_lock); | ||
295 | list_for_each(tmp, &lo->queue_head) { | ||
296 | req = list_entry(tmp, struct request, queuelist); | ||
297 | if (req != xreq) | ||
298 | continue; | ||
299 | list_del_init(&req->queuelist); | ||
300 | spin_unlock(&lo->queue_lock); | ||
301 | return req; | ||
302 | } | ||
303 | spin_unlock(&lo->queue_lock); | ||
304 | return NULL; | ||
305 | } | ||
306 | |||
307 | static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec) | ||
308 | { | ||
309 | int result; | ||
310 | void *kaddr = kmap(bvec->bv_page); | ||
311 | result = sock_xmit(sock, 0, kaddr + bvec->bv_offset, bvec->bv_len, | ||
312 | MSG_WAITALL); | ||
313 | kunmap(bvec->bv_page); | ||
314 | return result; | ||
315 | } | ||
316 | |||
317 | /* NULL returned = something went wrong, inform userspace */ | ||
318 | static struct request *nbd_read_stat(struct nbd_device *lo) | ||
319 | { | ||
320 | int result; | ||
321 | struct nbd_reply reply; | ||
322 | struct request *req; | ||
323 | struct socket *sock = lo->sock; | ||
324 | |||
325 | reply.magic = 0; | ||
326 | result = sock_xmit(sock, 0, &reply, sizeof(reply), MSG_WAITALL); | ||
327 | if (result <= 0) { | ||
328 | printk(KERN_ERR "%s: Receive control failed (result %d)\n", | ||
329 | lo->disk->disk_name, result); | ||
330 | goto harderror; | ||
331 | } | ||
332 | req = nbd_find_request(lo, reply.handle); | ||
333 | if (req == NULL) { | ||
334 | printk(KERN_ERR "%s: Unexpected reply (%p)\n", | ||
335 | lo->disk->disk_name, reply.handle); | ||
336 | result = -EBADR; | ||
337 | goto harderror; | ||
338 | } | ||
339 | |||
340 | if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { | ||
341 | printk(KERN_ERR "%s: Wrong magic (0x%lx)\n", | ||
342 | lo->disk->disk_name, | ||
343 | (unsigned long)ntohl(reply.magic)); | ||
344 | result = -EPROTO; | ||
345 | goto harderror; | ||
346 | } | ||
347 | if (ntohl(reply.error)) { | ||
348 | printk(KERN_ERR "%s: Other side returned error (%d)\n", | ||
349 | lo->disk->disk_name, ntohl(reply.error)); | ||
350 | req->errors++; | ||
351 | return req; | ||
352 | } | ||
353 | |||
354 | dprintk(DBG_RX, "%s: request %p: got reply\n", | ||
355 | lo->disk->disk_name, req); | ||
356 | if (nbd_cmd(req) == NBD_CMD_READ) { | ||
357 | int i; | ||
358 | struct bio *bio; | ||
359 | rq_for_each_bio(bio, req) { | ||
360 | struct bio_vec *bvec; | ||
361 | bio_for_each_segment(bvec, bio, i) { | ||
362 | result = sock_recv_bvec(sock, bvec); | ||
363 | if (result <= 0) { | ||
364 | printk(KERN_ERR "%s: Receive data failed (result %d)\n", | ||
365 | lo->disk->disk_name, | ||
366 | result); | ||
367 | goto harderror; | ||
368 | } | ||
369 | dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", | ||
370 | lo->disk->disk_name, req, bvec->bv_len); | ||
371 | } | ||
372 | } | ||
373 | } | ||
374 | return req; | ||
375 | harderror: | ||
376 | lo->harderror = result; | ||
377 | return NULL; | ||
378 | } | ||
379 | |||
380 | static void nbd_do_it(struct nbd_device *lo) | ||
381 | { | ||
382 | struct request *req; | ||
383 | |||
384 | BUG_ON(lo->magic != LO_MAGIC); | ||
385 | |||
386 | while ((req = nbd_read_stat(lo)) != NULL) | ||
387 | nbd_end_request(req); | ||
388 | return; | ||
389 | } | ||
390 | |||
391 | static void nbd_clear_que(struct nbd_device *lo) | ||
392 | { | ||
393 | struct request *req; | ||
394 | |||
395 | BUG_ON(lo->magic != LO_MAGIC); | ||
396 | |||
397 | do { | ||
398 | req = NULL; | ||
399 | spin_lock(&lo->queue_lock); | ||
400 | if (!list_empty(&lo->queue_head)) { | ||
401 | req = list_entry(lo->queue_head.next, struct request, queuelist); | ||
402 | list_del_init(&req->queuelist); | ||
403 | } | ||
404 | spin_unlock(&lo->queue_lock); | ||
405 | if (req) { | ||
406 | req->errors++; | ||
407 | nbd_end_request(req); | ||
408 | } | ||
409 | } while (req); | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * We always wait for result of write, for now. It would be nice to make it optional | ||
414 | * in future | ||
415 | * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) | ||
416 | * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } | ||
417 | */ | ||
418 | |||
419 | static void do_nbd_request(request_queue_t * q) | ||
420 | { | ||
421 | struct request *req; | ||
422 | |||
423 | while ((req = elv_next_request(q)) != NULL) { | ||
424 | struct nbd_device *lo; | ||
425 | |||
426 | blkdev_dequeue_request(req); | ||
427 | dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n", | ||
428 | req->rq_disk->disk_name, req, req->flags); | ||
429 | |||
430 | if (!(req->flags & REQ_CMD)) | ||
431 | goto error_out; | ||
432 | |||
433 | lo = req->rq_disk->private_data; | ||
434 | |||
435 | BUG_ON(lo->magic != LO_MAGIC); | ||
436 | |||
437 | if (!lo->file) { | ||
438 | printk(KERN_ERR "%s: Request when not-ready\n", | ||
439 | lo->disk->disk_name); | ||
440 | goto error_out; | ||
441 | } | ||
442 | nbd_cmd(req) = NBD_CMD_READ; | ||
443 | if (rq_data_dir(req) == WRITE) { | ||
444 | nbd_cmd(req) = NBD_CMD_WRITE; | ||
445 | if (lo->flags & NBD_READ_ONLY) { | ||
446 | printk(KERN_ERR "%s: Write on read-only\n", | ||
447 | lo->disk->disk_name); | ||
448 | goto error_out; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | req->errors = 0; | ||
453 | spin_unlock_irq(q->queue_lock); | ||
454 | |||
455 | spin_lock(&lo->queue_lock); | ||
456 | |||
457 | if (!lo->file) { | ||
458 | spin_unlock(&lo->queue_lock); | ||
459 | printk(KERN_ERR "%s: failed between accept and semaphore, file lost\n", | ||
460 | lo->disk->disk_name); | ||
461 | req->errors++; | ||
462 | nbd_end_request(req); | ||
463 | spin_lock_irq(q->queue_lock); | ||
464 | continue; | ||
465 | } | ||
466 | |||
467 | list_add(&req->queuelist, &lo->queue_head); | ||
468 | spin_unlock(&lo->queue_lock); | ||
469 | |||
470 | if (nbd_send_req(lo, req) != 0) { | ||
471 | printk(KERN_ERR "%s: Request send failed\n", | ||
472 | lo->disk->disk_name); | ||
473 | if (nbd_find_request(lo, (char *)&req) != NULL) { | ||
474 | /* we still own req */ | ||
475 | req->errors++; | ||
476 | nbd_end_request(req); | ||
477 | } else /* we're racing with nbd_clear_que */ | ||
478 | printk(KERN_DEBUG "nbd: can't find req\n"); | ||
479 | } | ||
480 | |||
481 | spin_lock_irq(q->queue_lock); | ||
482 | continue; | ||
483 | |||
484 | error_out: | ||
485 | req->errors++; | ||
486 | spin_unlock(q->queue_lock); | ||
487 | nbd_end_request(req); | ||
488 | spin_lock(q->queue_lock); | ||
489 | } | ||
490 | return; | ||
491 | } | ||
492 | |||
493 | static int nbd_ioctl(struct inode *inode, struct file *file, | ||
494 | unsigned int cmd, unsigned long arg) | ||
495 | { | ||
496 | struct nbd_device *lo = inode->i_bdev->bd_disk->private_data; | ||
497 | int error; | ||
498 | struct request sreq ; | ||
499 | |||
500 | if (!capable(CAP_SYS_ADMIN)) | ||
501 | return -EPERM; | ||
502 | |||
503 | BUG_ON(lo->magic != LO_MAGIC); | ||
504 | |||
505 | /* Anyone capable of this syscall can do *real bad* things */ | ||
506 | dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", | ||
507 | lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); | ||
508 | |||
509 | switch (cmd) { | ||
510 | case NBD_DISCONNECT: | ||
511 | printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); | ||
512 | sreq.flags = REQ_SPECIAL; | ||
513 | nbd_cmd(&sreq) = NBD_CMD_DISC; | ||
514 | /* | ||
515 | * Set these to sane values in case server implementation | ||
516 | * fails to check the request type first and also to keep | ||
517 | * debugging output cleaner. | ||
518 | */ | ||
519 | sreq.sector = 0; | ||
520 | sreq.nr_sectors = 0; | ||
521 | if (!lo->sock) | ||
522 | return -EINVAL; | ||
523 | nbd_send_req(lo, &sreq); | ||
524 | return 0; | ||
525 | |||
526 | case NBD_CLEAR_SOCK: | ||
527 | error = 0; | ||
528 | down(&lo->tx_lock); | ||
529 | lo->sock = NULL; | ||
530 | up(&lo->tx_lock); | ||
531 | spin_lock(&lo->queue_lock); | ||
532 | file = lo->file; | ||
533 | lo->file = NULL; | ||
534 | spin_unlock(&lo->queue_lock); | ||
535 | nbd_clear_que(lo); | ||
536 | spin_lock(&lo->queue_lock); | ||
537 | if (!list_empty(&lo->queue_head)) { | ||
538 | printk(KERN_ERR "nbd: disconnect: some requests are in progress -> please try again.\n"); | ||
539 | error = -EBUSY; | ||
540 | } | ||
541 | spin_unlock(&lo->queue_lock); | ||
542 | if (file) | ||
543 | fput(file); | ||
544 | return error; | ||
545 | case NBD_SET_SOCK: | ||
546 | if (lo->file) | ||
547 | return -EBUSY; | ||
548 | error = -EINVAL; | ||
549 | file = fget(arg); | ||
550 | if (file) { | ||
551 | inode = file->f_dentry->d_inode; | ||
552 | if (S_ISSOCK(inode->i_mode)) { | ||
553 | lo->file = file; | ||
554 | lo->sock = SOCKET_I(inode); | ||
555 | error = 0; | ||
556 | } else { | ||
557 | fput(file); | ||
558 | } | ||
559 | } | ||
560 | return error; | ||
561 | case NBD_SET_BLKSIZE: | ||
562 | lo->blksize = arg; | ||
563 | lo->bytesize &= ~(lo->blksize-1); | ||
564 | inode->i_bdev->bd_inode->i_size = lo->bytesize; | ||
565 | set_blocksize(inode->i_bdev, lo->blksize); | ||
566 | set_capacity(lo->disk, lo->bytesize >> 9); | ||
567 | return 0; | ||
568 | case NBD_SET_SIZE: | ||
569 | lo->bytesize = arg & ~(lo->blksize-1); | ||
570 | inode->i_bdev->bd_inode->i_size = lo->bytesize; | ||
571 | set_blocksize(inode->i_bdev, lo->blksize); | ||
572 | set_capacity(lo->disk, lo->bytesize >> 9); | ||
573 | return 0; | ||
574 | case NBD_SET_SIZE_BLOCKS: | ||
575 | lo->bytesize = ((u64) arg) * lo->blksize; | ||
576 | inode->i_bdev->bd_inode->i_size = lo->bytesize; | ||
577 | set_blocksize(inode->i_bdev, lo->blksize); | ||
578 | set_capacity(lo->disk, lo->bytesize >> 9); | ||
579 | return 0; | ||
580 | case NBD_DO_IT: | ||
581 | if (!lo->file) | ||
582 | return -EINVAL; | ||
583 | nbd_do_it(lo); | ||
584 | /* on return tidy up in case we have a signal */ | ||
585 | /* Forcibly shutdown the socket causing all listeners | ||
586 | * to error | ||
587 | * | ||
588 | * FIXME: This code is duplicated from sys_shutdown, but | ||
589 | * there should be a more generic interface rather than | ||
590 | * calling socket ops directly here */ | ||
591 | down(&lo->tx_lock); | ||
592 | if (lo->sock) { | ||
593 | printk(KERN_WARNING "%s: shutting down socket\n", | ||
594 | lo->disk->disk_name); | ||
595 | lo->sock->ops->shutdown(lo->sock, | ||
596 | SEND_SHUTDOWN|RCV_SHUTDOWN); | ||
597 | lo->sock = NULL; | ||
598 | } | ||
599 | up(&lo->tx_lock); | ||
600 | spin_lock(&lo->queue_lock); | ||
601 | file = lo->file; | ||
602 | lo->file = NULL; | ||
603 | spin_unlock(&lo->queue_lock); | ||
604 | nbd_clear_que(lo); | ||
605 | printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name); | ||
606 | if (file) | ||
607 | fput(file); | ||
608 | return lo->harderror; | ||
609 | case NBD_CLEAR_QUE: | ||
610 | down(&lo->tx_lock); | ||
611 | if (lo->sock) { | ||
612 | up(&lo->tx_lock); | ||
613 | return 0; /* probably should be error, but that would | ||
614 | * break "nbd-client -d", so just return 0 */ | ||
615 | } | ||
616 | up(&lo->tx_lock); | ||
617 | nbd_clear_que(lo); | ||
618 | return 0; | ||
619 | case NBD_PRINT_DEBUG: | ||
620 | printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n", | ||
621 | inode->i_bdev->bd_disk->disk_name, | ||
622 | lo->queue_head.next, lo->queue_head.prev, | ||
623 | &lo->queue_head); | ||
624 | return 0; | ||
625 | } | ||
626 | return -EINVAL; | ||
627 | } | ||
628 | |||
629 | static struct block_device_operations nbd_fops = | ||
630 | { | ||
631 | .owner = THIS_MODULE, | ||
632 | .ioctl = nbd_ioctl, | ||
633 | }; | ||
634 | |||
635 | /* | ||
636 | * And here should be modules and kernel interface | ||
637 | * (Just smiley confuses emacs :-) | ||
638 | */ | ||
639 | |||
640 | static int __init nbd_init(void) | ||
641 | { | ||
642 | int err = -ENOMEM; | ||
643 | int i; | ||
644 | |||
645 | if (sizeof(struct nbd_request) != 28) { | ||
646 | printk(KERN_CRIT "nbd: sizeof nbd_request needs to be 28 in order to work!\n" ); | ||
647 | return -EIO; | ||
648 | } | ||
649 | |||
650 | for (i = 0; i < MAX_NBD; i++) { | ||
651 | struct gendisk *disk = alloc_disk(1); | ||
652 | if (!disk) | ||
653 | goto out; | ||
654 | nbd_dev[i].disk = disk; | ||
655 | /* | ||
656 | * The new linux 2.5 block layer implementation requires | ||
657 | * every gendisk to have its very own request_queue struct. | ||
658 | * These structs are big so we dynamically allocate them. | ||
659 | */ | ||
660 | disk->queue = blk_init_queue(do_nbd_request, &nbd_lock); | ||
661 | if (!disk->queue) { | ||
662 | put_disk(disk); | ||
663 | goto out; | ||
664 | } | ||
665 | } | ||
666 | |||
667 | if (register_blkdev(NBD_MAJOR, "nbd")) { | ||
668 | err = -EIO; | ||
669 | goto out; | ||
670 | } | ||
671 | |||
672 | printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); | ||
673 | dprintk(DBG_INIT, "nbd: debugflags=0x%x\n", debugflags); | ||
674 | |||
675 | devfs_mk_dir("nbd"); | ||
676 | for (i = 0; i < MAX_NBD; i++) { | ||
677 | struct gendisk *disk = nbd_dev[i].disk; | ||
678 | nbd_dev[i].file = NULL; | ||
679 | nbd_dev[i].magic = LO_MAGIC; | ||
680 | nbd_dev[i].flags = 0; | ||
681 | spin_lock_init(&nbd_dev[i].queue_lock); | ||
682 | INIT_LIST_HEAD(&nbd_dev[i].queue_head); | ||
683 | init_MUTEX(&nbd_dev[i].tx_lock); | ||
684 | nbd_dev[i].blksize = 1024; | ||
685 | nbd_dev[i].bytesize = 0x7ffffc00ULL << 10; /* 2TB */ | ||
686 | disk->major = NBD_MAJOR; | ||
687 | disk->first_minor = i; | ||
688 | disk->fops = &nbd_fops; | ||
689 | disk->private_data = &nbd_dev[i]; | ||
690 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | ||
691 | sprintf(disk->disk_name, "nbd%d", i); | ||
692 | sprintf(disk->devfs_name, "nbd/%d", i); | ||
693 | set_capacity(disk, 0x7ffffc00ULL << 1); /* 2 TB */ | ||
694 | add_disk(disk); | ||
695 | } | ||
696 | |||
697 | return 0; | ||
698 | out: | ||
699 | while (i--) { | ||
700 | blk_cleanup_queue(nbd_dev[i].disk->queue); | ||
701 | put_disk(nbd_dev[i].disk); | ||
702 | } | ||
703 | return err; | ||
704 | } | ||
705 | |||
706 | static void __exit nbd_cleanup(void) | ||
707 | { | ||
708 | int i; | ||
709 | for (i = 0; i < MAX_NBD; i++) { | ||
710 | struct gendisk *disk = nbd_dev[i].disk; | ||
711 | if (disk) { | ||
712 | del_gendisk(disk); | ||
713 | blk_cleanup_queue(disk->queue); | ||
714 | put_disk(disk); | ||
715 | } | ||
716 | } | ||
717 | devfs_remove("nbd"); | ||
718 | unregister_blkdev(NBD_MAJOR, "nbd"); | ||
719 | printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR); | ||
720 | } | ||
721 | |||
722 | module_init(nbd_init); | ||
723 | module_exit(nbd_cleanup); | ||
724 | |||
725 | MODULE_DESCRIPTION("Network Block Device"); | ||
726 | MODULE_LICENSE("GPL"); | ||
727 | |||
728 | #ifndef NDEBUG | ||
729 | module_param(debugflags, int, 0644); | ||
730 | MODULE_PARM_DESC(debugflags, "flags for controlling debug output"); | ||
731 | #endif | ||