diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-07-10 22:12:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-07-10 22:12:24 -0400 |
commit | 04eef90c2e2fb860db71bff5f60d5ff0ec4c6dea (patch) | |
tree | 0190a2610ade331b4be8e1efd094c0ad2b2ba181 | |
parent | bb8ad2815a1210bfa56b8f8ebf0e40fbdf9198ff (diff) | |
parent | bc47df0fa705887242c26c7b040e7cf0170ab1f1 (diff) |
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
* 'for-linus' of git://git.open-osd.org/linux-open-osd:
osdblk: Adjust queue limits to lower device's limits
osdblk: a Linux block device for OSD objects
MAINTAINERS: Add osd maintained files (F:)
exofs: Avoid using file_fsync()
exofs: Remove IBM copyrights
exofs: Fix bio leak in error handling path (sync read)
-rw-r--r-- | MAINTAINERS | 5 | ||||
-rw-r--r-- | drivers/block/Kconfig | 16 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/osdblk.c | 701 | ||||
-rw-r--r-- | fs/exofs/common.h | 4 | ||||
-rw-r--r-- | fs/exofs/dir.c | 4 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 7 | ||||
-rw-r--r-- | fs/exofs/file.c | 21 | ||||
-rw-r--r-- | fs/exofs/inode.c | 7 | ||||
-rw-r--r-- | fs/exofs/namei.c | 4 | ||||
-rw-r--r-- | fs/exofs/osd.c | 4 | ||||
-rw-r--r-- | fs/exofs/super.c | 6 | ||||
-rw-r--r-- | fs/exofs/symlink.c | 4 |
13 files changed, 750 insertions, 34 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index e4b1a3d596cc..494059d60134 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4407,7 +4407,7 @@ W: http://www.nongnu.org/orinoco/ | |||
4407 | S: Maintained | 4407 | S: Maintained |
4408 | F: drivers/net/wireless/orinoco/ | 4408 | F: drivers/net/wireless/orinoco/ |
4409 | 4409 | ||
4410 | OSD LIBRARY | 4410 | OSD LIBRARY and FILESYSTEM |
4411 | P: Boaz Harrosh | 4411 | P: Boaz Harrosh |
4412 | M: bharrosh@panasas.com | 4412 | M: bharrosh@panasas.com |
4413 | P: Benny Halevy | 4413 | P: Benny Halevy |
@@ -4416,6 +4416,9 @@ L: osd-dev@open-osd.org | |||
4416 | W: http://open-osd.org | 4416 | W: http://open-osd.org |
4417 | T: git git://git.open-osd.org/open-osd.git | 4417 | T: git git://git.open-osd.org/open-osd.git |
4418 | S: Maintained | 4418 | S: Maintained |
4419 | F: drivers/scsi/osd/ | ||
4420 | F: drivers/include/scsi/osd_* | ||
4421 | F: fs/exofs/ | ||
4419 | 4422 | ||
4420 | P54 WIRELESS DRIVER | 4423 | P54 WIRELESS DRIVER |
4421 | P: Michael Wu | 4424 | P: Michael Wu |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index bb72ada9f074..1d886e079c58 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -298,6 +298,22 @@ config BLK_DEV_NBD | |||
298 | 298 | ||
299 | If unsure, say N. | 299 | If unsure, say N. |
300 | 300 | ||
301 | config BLK_DEV_OSD | ||
302 | tristate "OSD object-as-blkdev support" | ||
303 | depends on SCSI_OSD_ULD | ||
304 | ---help--- | ||
305 | Saying Y or M here will allow the exporting of a single SCSI | ||
306 | OSD (object-based storage) object as a Linux block device. | ||
307 | |||
308 | For example, if you create a 2G object on an OSD device, | ||
309 | you can then use this module to present that 2G object as | ||
310 | a Linux block device. | ||
311 | |||
312 | To compile this driver as a module, choose M here: the | ||
313 | module will be called osdblk. | ||
314 | |||
315 | If unsure, say N. | ||
316 | |||
301 | config BLK_DEV_SX8 | 317 | config BLK_DEV_SX8 |
302 | tristate "Promise SATA SX8 support" | 318 | tristate "Promise SATA SX8 support" |
303 | depends on PCI | 319 | depends on PCI |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7755a5e2a85e..cdaa3f8fddf0 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o | |||
23 | obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o | 23 | obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o |
24 | obj-$(CONFIG_MG_DISK) += mg_disk.o | 24 | obj-$(CONFIG_MG_DISK) += mg_disk.o |
25 | obj-$(CONFIG_SUNVDC) += sunvdc.o | 25 | obj-$(CONFIG_SUNVDC) += sunvdc.o |
26 | obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o | ||
26 | 27 | ||
27 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o | 28 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o |
28 | obj-$(CONFIG_BLK_DEV_NBD) += nbd.o | 29 | obj-$(CONFIG_BLK_DEV_NBD) += nbd.o |
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c new file mode 100644 index 000000000000..13c1aee6aa3f --- /dev/null +++ b/drivers/block/osdblk.c | |||
@@ -0,0 +1,701 @@ | |||
1 | |||
2 | /* | ||
3 | osdblk.c -- Export a single SCSI OSD object as a Linux block device | ||
4 | |||
5 | |||
6 | Copyright 2009 Red Hat, Inc. | ||
7 | |||
8 | This program is free software; you can redistribute it and/or modify | ||
9 | it under the terms of the GNU General Public License as published by | ||
10 | the Free Software Foundation. | ||
11 | |||
12 | This program is distributed in the hope that it will be useful, | ||
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | GNU General Public License for more details. | ||
16 | |||
17 | You should have received a copy of the GNU General Public License | ||
18 | along with this program; see the file COPYING. If not, write to | ||
19 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | |||
21 | |||
22 | Instructions for use | ||
23 | -------------------- | ||
24 | |||
25 | 1) Map a Linux block device to an existing OSD object. | ||
26 | |||
27 | In this example, we will use partition id 1234, object id 5678, | ||
28 | OSD device /dev/osd1. | ||
29 | |||
30 | $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add | ||
31 | |||
32 | |||
33 | 2) List all active blkdev<->object mappings. | ||
34 | |||
35 | In this example, we have performed step #1 twice, creating two blkdevs, | ||
36 | mapped to two separate OSD objects. | ||
37 | |||
38 | $ cat /sys/class/osdblk/list | ||
39 | 0 174 1234 5678 /dev/osd1 | ||
40 | 1 179 1994 897123 /dev/osd0 | ||
41 | |||
42 | The columns, in order, are: | ||
43 | - blkdev unique id | ||
44 | - blkdev assigned major | ||
45 | - OSD object partition id | ||
46 | - OSD object id | ||
47 | - OSD device | ||
48 | |||
49 | |||
50 | 3) Remove an active blkdev<->object mapping. | ||
51 | |||
52 | In this example, we remove the mapping with blkdev unique id 1. | ||
53 | |||
54 | $ echo 1 > /sys/class/osdblk/remove | ||
55 | |||
56 | |||
57 | NOTE: The actual creation and deletion of OSD objects is outside the scope | ||
58 | of this driver. | ||
59 | |||
60 | */ | ||
61 | |||
62 | #include <linux/kernel.h> | ||
63 | #include <linux/device.h> | ||
64 | #include <linux/module.h> | ||
65 | #include <linux/fs.h> | ||
66 | #include <scsi/osd_initiator.h> | ||
67 | #include <scsi/osd_attributes.h> | ||
68 | #include <scsi/osd_sec.h> | ||
69 | #include <scsi/scsi_device.h> | ||
70 | |||
71 | #define DRV_NAME "osdblk" | ||
72 | #define PFX DRV_NAME ": " | ||
73 | |||
74 | /* #define _OSDBLK_DEBUG */ | ||
75 | #ifdef _OSDBLK_DEBUG | ||
76 | #define OSDBLK_DEBUG(fmt, a...) \ | ||
77 | printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) | ||
78 | #else | ||
79 | #define OSDBLK_DEBUG(fmt, a...) \ | ||
80 | do { if (0) printk(fmt, ##a); } while (0) | ||
81 | #endif | ||
82 | |||
83 | MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); | ||
84 | MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); | ||
85 | MODULE_LICENSE("GPL"); | ||
86 | |||
87 | struct osdblk_device; | ||
88 | |||
89 | enum { | ||
90 | OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ | ||
91 | OSDBLK_MAX_REQ = 32, /* max parallel requests */ | ||
92 | OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ | ||
93 | }; | ||
94 | |||
95 | struct osdblk_request { | ||
96 | struct request *rq; /* blk layer request */ | ||
97 | struct bio *bio; /* cloned bio */ | ||
98 | struct osdblk_device *osdev; /* associated blkdev */ | ||
99 | }; | ||
100 | |||
101 | struct osdblk_device { | ||
102 | int id; /* blkdev unique id */ | ||
103 | |||
104 | int major; /* blkdev assigned major */ | ||
105 | struct gendisk *disk; /* blkdev's gendisk and rq */ | ||
106 | struct request_queue *q; | ||
107 | |||
108 | struct osd_dev *osd; /* associated OSD */ | ||
109 | |||
110 | char name[32]; /* blkdev name, e.g. osdblk34 */ | ||
111 | |||
112 | spinlock_t lock; /* queue lock */ | ||
113 | |||
114 | struct osd_obj_id obj; /* OSD partition, obj id */ | ||
115 | uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ | ||
116 | |||
117 | struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ | ||
118 | |||
119 | struct list_head node; | ||
120 | |||
121 | char osd_path[0]; /* OSD device path */ | ||
122 | }; | ||
123 | |||
124 | static struct class *class_osdblk; /* /sys/class/osdblk */ | ||
125 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ | ||
126 | static LIST_HEAD(osdblkdev_list); | ||
127 | |||
128 | static struct block_device_operations osdblk_bd_ops = { | ||
129 | .owner = THIS_MODULE, | ||
130 | }; | ||
131 | |||
132 | static const struct osd_attr g_attr_logical_length = ATTR_DEF( | ||
133 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | ||
134 | |||
135 | static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
136 | const struct osd_obj_id *obj) | ||
137 | { | ||
138 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
139 | } | ||
140 | |||
141 | /* copied from exofs; move to libosd? */ | ||
142 | /* | ||
143 | * Perform a synchronous OSD operation. copied from exofs; move to libosd? | ||
144 | */ | ||
145 | static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | ||
146 | { | ||
147 | int ret; | ||
148 | |||
149 | or->timeout = timeout; | ||
150 | ret = osd_finalize_request(or, 0, credential, NULL); | ||
151 | if (ret) | ||
152 | return ret; | ||
153 | |||
154 | ret = osd_execute_request(or); | ||
155 | |||
156 | /* osd_req_decode_sense(or, ret); */ | ||
157 | return ret; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Perform an asynchronous OSD operation. copied from exofs; move to libosd? | ||
162 | */ | ||
163 | static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, | ||
164 | void *caller_context, u8 *cred) | ||
165 | { | ||
166 | int ret; | ||
167 | |||
168 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
169 | if (ret) | ||
170 | return ret; | ||
171 | |||
172 | ret = osd_execute_request_async(or, async_done, caller_context); | ||
173 | |||
174 | return ret; | ||
175 | } | ||
176 | |||
177 | /* copied from exofs; move to libosd? */ | ||
178 | static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | ||
179 | { | ||
180 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | ||
181 | void *iter = NULL; | ||
182 | int nelem; | ||
183 | |||
184 | do { | ||
185 | nelem = 1; | ||
186 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | ||
187 | if ((cur_attr.attr_page == attr->attr_page) && | ||
188 | (cur_attr.attr_id == attr->attr_id)) { | ||
189 | attr->len = cur_attr.len; | ||
190 | attr->val_ptr = cur_attr.val_ptr; | ||
191 | return 0; | ||
192 | } | ||
193 | } while (iter); | ||
194 | |||
195 | return -EIO; | ||
196 | } | ||
197 | |||
198 | static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) | ||
199 | { | ||
200 | struct osd_request *or; | ||
201 | struct osd_attr attr; | ||
202 | int ret; | ||
203 | |||
204 | /* start request */ | ||
205 | or = osd_start_request(osdev->osd, GFP_KERNEL); | ||
206 | if (!or) | ||
207 | return -ENOMEM; | ||
208 | |||
209 | /* create a get-attributes(length) request */ | ||
210 | osd_req_get_attributes(or, &osdev->obj); | ||
211 | |||
212 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | ||
213 | |||
214 | /* execute op synchronously */ | ||
215 | ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); | ||
216 | if (ret) | ||
217 | goto out; | ||
218 | |||
219 | /* extract length from returned attribute info */ | ||
220 | attr = g_attr_logical_length; | ||
221 | ret = extract_attr_from_req(or, &attr); | ||
222 | if (ret) | ||
223 | goto out; | ||
224 | |||
225 | *size_out = get_unaligned_be64(attr.val_ptr); | ||
226 | |||
227 | out: | ||
228 | osd_end_request(or); | ||
229 | return ret; | ||
230 | |||
231 | } | ||
232 | |||
233 | static void osdblk_osd_complete(struct osd_request *or, void *private) | ||
234 | { | ||
235 | struct osdblk_request *orq = private; | ||
236 | struct osd_sense_info osi; | ||
237 | int ret = osd_req_decode_sense(or, &osi); | ||
238 | |||
239 | if (ret) { | ||
240 | ret = -EIO; | ||
241 | OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); | ||
242 | } | ||
243 | |||
244 | /* complete OSD request */ | ||
245 | osd_end_request(or); | ||
246 | |||
247 | /* complete request passed to osdblk by block layer */ | ||
248 | __blk_end_request_all(orq->rq, ret); | ||
249 | } | ||
250 | |||
251 | static void bio_chain_put(struct bio *chain) | ||
252 | { | ||
253 | struct bio *tmp; | ||
254 | |||
255 | while (chain) { | ||
256 | tmp = chain; | ||
257 | chain = chain->bi_next; | ||
258 | |||
259 | bio_put(tmp); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) | ||
264 | { | ||
265 | struct bio *tmp, *new_chain = NULL, *tail = NULL; | ||
266 | |||
267 | while (old_chain) { | ||
268 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | ||
269 | if (!tmp) | ||
270 | goto err_out; | ||
271 | |||
272 | __bio_clone(tmp, old_chain); | ||
273 | tmp->bi_bdev = NULL; | ||
274 | gfpmask &= ~__GFP_WAIT; | ||
275 | tmp->bi_next = NULL; | ||
276 | |||
277 | if (!new_chain) | ||
278 | new_chain = tail = tmp; | ||
279 | else { | ||
280 | tail->bi_next = tmp; | ||
281 | tail = tmp; | ||
282 | } | ||
283 | |||
284 | old_chain = old_chain->bi_next; | ||
285 | } | ||
286 | |||
287 | return new_chain; | ||
288 | |||
289 | err_out: | ||
290 | OSDBLK_DEBUG("bio_chain_clone with err\n"); | ||
291 | bio_chain_put(new_chain); | ||
292 | return NULL; | ||
293 | } | ||
294 | |||
295 | static void osdblk_rq_fn(struct request_queue *q) | ||
296 | { | ||
297 | struct osdblk_device *osdev = q->queuedata; | ||
298 | |||
299 | while (1) { | ||
300 | struct request *rq; | ||
301 | struct osdblk_request *orq; | ||
302 | struct osd_request *or; | ||
303 | struct bio *bio; | ||
304 | bool do_write, do_flush; | ||
305 | |||
306 | /* peek at request from block layer */ | ||
307 | rq = blk_fetch_request(q); | ||
308 | if (!rq) | ||
309 | break; | ||
310 | |||
311 | /* filter out block requests we don't understand */ | ||
312 | if (!blk_fs_request(rq) && !blk_barrier_rq(rq)) { | ||
313 | blk_end_request_all(rq, 0); | ||
314 | continue; | ||
315 | } | ||
316 | |||
317 | /* deduce our operation (read, write, flush) */ | ||
318 | /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] | ||
319 | * into a clearly defined set of RPC commands: | ||
320 | * read, write, flush, scsi command, power mgmt req, | ||
321 | * driver-specific, etc. | ||
322 | */ | ||
323 | |||
324 | do_flush = (rq->special == (void *) 0xdeadbeefUL); | ||
325 | do_write = (rq_data_dir(rq) == WRITE); | ||
326 | |||
327 | if (!do_flush) { /* osd_flush does not use a bio */ | ||
328 | /* a bio clone to be passed down to OSD request */ | ||
329 | bio = bio_chain_clone(rq->bio, GFP_ATOMIC); | ||
330 | if (!bio) | ||
331 | break; | ||
332 | } else | ||
333 | bio = NULL; | ||
334 | |||
335 | /* alloc internal OSD request, for OSD command execution */ | ||
336 | or = osd_start_request(osdev->osd, GFP_ATOMIC); | ||
337 | if (!or) { | ||
338 | bio_chain_put(bio); | ||
339 | OSDBLK_DEBUG("osd_start_request with err\n"); | ||
340 | break; | ||
341 | } | ||
342 | |||
343 | orq = &osdev->req[rq->tag]; | ||
344 | orq->rq = rq; | ||
345 | orq->bio = bio; | ||
346 | orq->osdev = osdev; | ||
347 | |||
348 | /* init OSD command: flush, write or read */ | ||
349 | if (do_flush) | ||
350 | osd_req_flush_object(or, &osdev->obj, | ||
351 | OSD_CDB_FLUSH_ALL, 0, 0); | ||
352 | else if (do_write) | ||
353 | osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, | ||
354 | bio, blk_rq_bytes(rq)); | ||
355 | else | ||
356 | osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, | ||
357 | bio, blk_rq_bytes(rq)); | ||
358 | |||
359 | OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", | ||
360 | do_flush ? "flush" : do_write ? | ||
361 | "write" : "read", blk_rq_bytes(rq), | ||
362 | blk_rq_pos(rq) * 512ULL); | ||
363 | |||
364 | /* begin OSD command execution */ | ||
365 | if (osd_async_op(or, osdblk_osd_complete, orq, | ||
366 | osdev->obj_cred)) { | ||
367 | osd_end_request(or); | ||
368 | blk_requeue_request(q, rq); | ||
369 | bio_chain_put(bio); | ||
370 | OSDBLK_DEBUG("osd_execute_request_async with err\n"); | ||
371 | break; | ||
372 | } | ||
373 | |||
374 | /* remove the special 'flush' marker, now that the command | ||
375 | * is executing | ||
376 | */ | ||
377 | rq->special = NULL; | ||
378 | } | ||
379 | } | ||
380 | |||
381 | static void osdblk_prepare_flush(struct request_queue *q, struct request *rq) | ||
382 | { | ||
383 | /* add driver-specific marker, to indicate that this request | ||
384 | * is a flush command | ||
385 | */ | ||
386 | rq->special = (void *) 0xdeadbeefUL; | ||
387 | } | ||
388 | |||
389 | static void osdblk_free_disk(struct osdblk_device *osdev) | ||
390 | { | ||
391 | struct gendisk *disk = osdev->disk; | ||
392 | |||
393 | if (!disk) | ||
394 | return; | ||
395 | |||
396 | if (disk->flags & GENHD_FL_UP) | ||
397 | del_gendisk(disk); | ||
398 | if (disk->queue) | ||
399 | blk_cleanup_queue(disk->queue); | ||
400 | put_disk(disk); | ||
401 | } | ||
402 | |||
403 | static int osdblk_init_disk(struct osdblk_device *osdev) | ||
404 | { | ||
405 | struct gendisk *disk; | ||
406 | struct request_queue *q; | ||
407 | int rc; | ||
408 | u64 obj_size = 0; | ||
409 | |||
410 | /* contact OSD, request size info about the object being mapped */ | ||
411 | rc = osdblk_get_obj_size(osdev, &obj_size); | ||
412 | if (rc) | ||
413 | return rc; | ||
414 | |||
415 | /* create gendisk info */ | ||
416 | disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); | ||
417 | if (!disk) | ||
418 | return -ENOMEM; | ||
419 | |||
420 | sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); | ||
421 | disk->major = osdev->major; | ||
422 | disk->first_minor = 0; | ||
423 | disk->fops = &osdblk_bd_ops; | ||
424 | disk->private_data = osdev; | ||
425 | |||
426 | /* init rq */ | ||
427 | q = blk_init_queue(osdblk_rq_fn, &osdev->lock); | ||
428 | if (!q) { | ||
429 | put_disk(disk); | ||
430 | return -ENOMEM; | ||
431 | } | ||
432 | |||
433 | /* switch queue to TCQ mode; allocate tag map */ | ||
434 | rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); | ||
435 | if (rc) { | ||
436 | blk_cleanup_queue(q); | ||
437 | put_disk(disk); | ||
438 | return rc; | ||
439 | } | ||
440 | |||
441 | /* Set our limits to the lower device limits, because osdblk cannot | ||
442 | * sleep when allocating a lower-request and therefore cannot be | ||
443 | * bouncing. | ||
444 | */ | ||
445 | blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); | ||
446 | |||
447 | blk_queue_prep_rq(q, blk_queue_start_tag); | ||
448 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush); | ||
449 | |||
450 | disk->queue = q; | ||
451 | |||
452 | q->queuedata = osdev; | ||
453 | |||
454 | osdev->disk = disk; | ||
455 | osdev->q = q; | ||
456 | |||
457 | /* finally, announce the disk to the world */ | ||
458 | set_capacity(disk, obj_size / 512ULL); | ||
459 | add_disk(disk); | ||
460 | |||
461 | printk(KERN_INFO "%s: Added of size 0x%llx\n", | ||
462 | disk->disk_name, (unsigned long long)obj_size); | ||
463 | |||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | /******************************************************************** | ||
468 | * /sys/class/osdblk/ | ||
469 | * add map OSD object to blkdev | ||
470 | * remove unmap OSD object | ||
471 | * list show mappings | ||
472 | *******************************************************************/ | ||
473 | |||
474 | static void class_osdblk_release(struct class *cls) | ||
475 | { | ||
476 | kfree(cls); | ||
477 | } | ||
478 | |||
479 | static ssize_t class_osdblk_list(struct class *c, char *data) | ||
480 | { | ||
481 | int n = 0; | ||
482 | struct list_head *tmp; | ||
483 | |||
484 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
485 | |||
486 | list_for_each(tmp, &osdblkdev_list) { | ||
487 | struct osdblk_device *osdev; | ||
488 | |||
489 | osdev = list_entry(tmp, struct osdblk_device, node); | ||
490 | |||
491 | n += sprintf(data+n, "%d %d %llu %llu %s\n", | ||
492 | osdev->id, | ||
493 | osdev->major, | ||
494 | osdev->obj.partition, | ||
495 | osdev->obj.id, | ||
496 | osdev->osd_path); | ||
497 | } | ||
498 | |||
499 | mutex_unlock(&ctl_mutex); | ||
500 | return n; | ||
501 | } | ||
502 | |||
503 | static ssize_t class_osdblk_add(struct class *c, const char *buf, size_t count) | ||
504 | { | ||
505 | struct osdblk_device *osdev; | ||
506 | ssize_t rc; | ||
507 | int irc, new_id = 0; | ||
508 | struct list_head *tmp; | ||
509 | |||
510 | if (!try_module_get(THIS_MODULE)) | ||
511 | return -ENODEV; | ||
512 | |||
513 | /* new osdblk_device object */ | ||
514 | osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); | ||
515 | if (!osdev) { | ||
516 | rc = -ENOMEM; | ||
517 | goto err_out_mod; | ||
518 | } | ||
519 | |||
520 | /* static osdblk_device initialization */ | ||
521 | spin_lock_init(&osdev->lock); | ||
522 | INIT_LIST_HEAD(&osdev->node); | ||
523 | |||
524 | /* generate unique id: find highest unique id, add one */ | ||
525 | |||
526 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
527 | |||
528 | list_for_each(tmp, &osdblkdev_list) { | ||
529 | struct osdblk_device *osdev; | ||
530 | |||
531 | osdev = list_entry(tmp, struct osdblk_device, node); | ||
532 | if (osdev->id > new_id) | ||
533 | new_id = osdev->id + 1; | ||
534 | } | ||
535 | |||
536 | osdev->id = new_id; | ||
537 | |||
538 | /* add to global list */ | ||
539 | list_add_tail(&osdev->node, &osdblkdev_list); | ||
540 | |||
541 | mutex_unlock(&ctl_mutex); | ||
542 | |||
543 | /* parse add command */ | ||
544 | if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, | ||
545 | osdev->osd_path) != 3) { | ||
546 | rc = -EINVAL; | ||
547 | goto err_out_slot; | ||
548 | } | ||
549 | |||
550 | /* initialize rest of new object */ | ||
551 | sprintf(osdev->name, DRV_NAME "%d", osdev->id); | ||
552 | |||
553 | /* contact requested OSD */ | ||
554 | osdev->osd = osduld_path_lookup(osdev->osd_path); | ||
555 | if (IS_ERR(osdev->osd)) { | ||
556 | rc = PTR_ERR(osdev->osd); | ||
557 | goto err_out_slot; | ||
558 | } | ||
559 | |||
560 | /* build OSD credential */ | ||
561 | osdblk_make_credential(osdev->obj_cred, &osdev->obj); | ||
562 | |||
563 | /* register our block device */ | ||
564 | irc = register_blkdev(0, osdev->name); | ||
565 | if (irc < 0) { | ||
566 | rc = irc; | ||
567 | goto err_out_osd; | ||
568 | } | ||
569 | |||
570 | osdev->major = irc; | ||
571 | |||
572 | /* set up and announce blkdev mapping */ | ||
573 | rc = osdblk_init_disk(osdev); | ||
574 | if (rc) | ||
575 | goto err_out_blkdev; | ||
576 | |||
577 | return count; | ||
578 | |||
579 | err_out_blkdev: | ||
580 | unregister_blkdev(osdev->major, osdev->name); | ||
581 | err_out_osd: | ||
582 | osduld_put_device(osdev->osd); | ||
583 | err_out_slot: | ||
584 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
585 | list_del_init(&osdev->node); | ||
586 | mutex_unlock(&ctl_mutex); | ||
587 | |||
588 | kfree(osdev); | ||
589 | err_out_mod: | ||
590 | OSDBLK_DEBUG("Error adding device %s\n", buf); | ||
591 | module_put(THIS_MODULE); | ||
592 | return rc; | ||
593 | } | ||
594 | |||
595 | static ssize_t class_osdblk_remove(struct class *c, const char *buf, | ||
596 | size_t count) | ||
597 | { | ||
598 | struct osdblk_device *osdev = NULL; | ||
599 | int target_id, rc; | ||
600 | unsigned long ul; | ||
601 | struct list_head *tmp; | ||
602 | |||
603 | rc = strict_strtoul(buf, 10, &ul); | ||
604 | if (rc) | ||
605 | return rc; | ||
606 | |||
607 | /* convert to int; abort if we lost anything in the conversion */ | ||
608 | target_id = (int) ul; | ||
609 | if (target_id != ul) | ||
610 | return -EINVAL; | ||
611 | |||
612 | /* remove object from list immediately */ | ||
613 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
614 | |||
615 | list_for_each(tmp, &osdblkdev_list) { | ||
616 | osdev = list_entry(tmp, struct osdblk_device, node); | ||
617 | if (osdev->id == target_id) { | ||
618 | list_del_init(&osdev->node); | ||
619 | break; | ||
620 | } | ||
621 | osdev = NULL; | ||
622 | } | ||
623 | |||
624 | mutex_unlock(&ctl_mutex); | ||
625 | |||
626 | if (!osdev) | ||
627 | return -ENOENT; | ||
628 | |||
629 | /* clean up and free blkdev and associated OSD connection */ | ||
630 | osdblk_free_disk(osdev); | ||
631 | unregister_blkdev(osdev->major, osdev->name); | ||
632 | osduld_put_device(osdev->osd); | ||
633 | kfree(osdev); | ||
634 | |||
635 | /* release module ref */ | ||
636 | module_put(THIS_MODULE); | ||
637 | |||
638 | return count; | ||
639 | } | ||
640 | |||
641 | static struct class_attribute class_osdblk_attrs[] = { | ||
642 | __ATTR(add, 0200, NULL, class_osdblk_add), | ||
643 | __ATTR(remove, 0200, NULL, class_osdblk_remove), | ||
644 | __ATTR(list, 0444, class_osdblk_list, NULL), | ||
645 | __ATTR_NULL | ||
646 | }; | ||
647 | |||
648 | static int osdblk_sysfs_init(void) | ||
649 | { | ||
650 | int ret = 0; | ||
651 | |||
652 | /* | ||
653 | * create control files in sysfs | ||
654 | * /sys/class/osdblk/... | ||
655 | */ | ||
656 | class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); | ||
657 | if (!class_osdblk) | ||
658 | return -ENOMEM; | ||
659 | |||
660 | class_osdblk->name = DRV_NAME; | ||
661 | class_osdblk->owner = THIS_MODULE; | ||
662 | class_osdblk->class_release = class_osdblk_release; | ||
663 | class_osdblk->class_attrs = class_osdblk_attrs; | ||
664 | |||
665 | ret = class_register(class_osdblk); | ||
666 | if (ret) { | ||
667 | kfree(class_osdblk); | ||
668 | class_osdblk = NULL; | ||
669 | printk(PFX "failed to create class osdblk\n"); | ||
670 | return ret; | ||
671 | } | ||
672 | |||
673 | return 0; | ||
674 | } | ||
675 | |||
676 | static void osdblk_sysfs_cleanup(void) | ||
677 | { | ||
678 | if (class_osdblk) | ||
679 | class_destroy(class_osdblk); | ||
680 | class_osdblk = NULL; | ||
681 | } | ||
682 | |||
683 | static int __init osdblk_init(void) | ||
684 | { | ||
685 | int rc; | ||
686 | |||
687 | rc = osdblk_sysfs_init(); | ||
688 | if (rc) | ||
689 | return rc; | ||
690 | |||
691 | return 0; | ||
692 | } | ||
693 | |||
694 | static void __exit osdblk_exit(void) | ||
695 | { | ||
696 | osdblk_sysfs_cleanup(); | ||
697 | } | ||
698 | |||
699 | module_init(osdblk_init); | ||
700 | module_exit(osdblk_exit); | ||
701 | |||
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index 24667eedc023..c6718e4817fe 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
@@ -2,9 +2,7 @@ | |||
2 | * common.h - Common definitions for both Kernel and user-mode utilities | 2 | * common.h - Common definitions for both Kernel and user-mode utilities |
3 | * | 3 | * |
4 | * Copyright (C) 2005, 2006 | 4 | * Copyright (C) 2005, 2006 |
5 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 5 | * Avishay Traeger (avishay@gmail.com) |
6 | * Copyright (C) 2005, 2006 | ||
7 | * International Business Machines | ||
8 | * Copyright (C) 2008, 2009 | 6 | * Copyright (C) 2008, 2009 |
9 | * Boaz Harrosh <bharrosh@panasas.com> | 7 | * Boaz Harrosh <bharrosh@panasas.com> |
10 | * | 8 | * |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 65b0c8c776a1..4cfab1cc75c0 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 0fd4c7859679..5ec72e020b22 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |
@@ -156,6 +154,9 @@ ino_t exofs_parent_ino(struct dentry *child); | |||
156 | int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, | 154 | int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, |
157 | struct inode *); | 155 | struct inode *); |
158 | 156 | ||
157 | /* super.c */ | ||
158 | int exofs_sync_fs(struct super_block *sb, int wait); | ||
159 | |||
159 | /********************* | 160 | /********************* |
160 | * operation vectors * | 161 | * operation vectors * |
161 | *********************/ | 162 | *********************/ |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 6ed7fe484752..839b9dc1e70f 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |
@@ -47,16 +45,23 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry, | |||
47 | { | 45 | { |
48 | int ret; | 46 | int ret; |
49 | struct address_space *mapping = filp->f_mapping; | 47 | struct address_space *mapping = filp->f_mapping; |
48 | struct inode *inode = dentry->d_inode; | ||
49 | struct super_block *sb; | ||
50 | 50 | ||
51 | ret = filemap_write_and_wait(mapping); | 51 | ret = filemap_write_and_wait(mapping); |
52 | if (ret) | 52 | if (ret) |
53 | return ret; | 53 | return ret; |
54 | 54 | ||
55 | /*Note: file_fsync below also calles sync_blockdev, which is a no-op | 55 | /* sync the inode attributes */ |
56 | * for exofs, but other then that it does sync_inode and | 56 | ret = write_inode_now(inode, 1); |
57 | * sync_superblock which is what we need here. | 57 | |
58 | */ | 58 | /* This is a good place to write the sb */ |
59 | return file_fsync(filp, dentry, datasync); | 59 | /* TODO: Sechedule an sb-sync on create */ |
60 | sb = inode->i_sb; | ||
61 | if (sb->s_dirt) | ||
62 | exofs_sync_fs(sb, 1); | ||
63 | |||
64 | return ret; | ||
60 | } | 65 | } |
61 | 66 | ||
62 | static int exofs_flush(struct file *file, fl_owner_t id) | 67 | static int exofs_flush(struct file *file, fl_owner_t id) |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 77d0a295eb1c..6c10f7476699 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |
@@ -295,6 +293,9 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
295 | err: | 293 | err: |
296 | if (!is_sync) | 294 | if (!is_sync) |
297 | _unlock_pcol_pages(pcol, ret, READ); | 295 | _unlock_pcol_pages(pcol, ret, READ); |
296 | else /* Pages unlocked by caller in sync mode only free bio */ | ||
297 | pcol_free(pcol); | ||
298 | |||
298 | kfree(pcol_copy); | 299 | kfree(pcol_copy); |
299 | if (or) | 300 | if (or) |
300 | osd_end_request(or); | 301 | osd_end_request(or); |
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 77fdd765e76d..b7dd0c236863 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b3d2ccb87aaa..4372542df284 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8216c5b77b53..a343b4ea62f6 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |
@@ -200,7 +198,7 @@ static const struct export_operations exofs_export_ops; | |||
200 | /* | 198 | /* |
201 | * Write the superblock to the OSD | 199 | * Write the superblock to the OSD |
202 | */ | 200 | */ |
203 | static int exofs_sync_fs(struct super_block *sb, int wait) | 201 | int exofs_sync_fs(struct super_block *sb, int wait) |
204 | { | 202 | { |
205 | struct exofs_sb_info *sbi; | 203 | struct exofs_sb_info *sbi; |
206 | struct exofs_fscb *fscb; | 204 | struct exofs_fscb *fscb; |
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c index 36e2d7bc7f7b..4dd687c3e747 100644 --- a/fs/exofs/symlink.c +++ b/fs/exofs/symlink.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <bharrosh@panasas.com> |
8 | * | 6 | * |