diff options
-rw-r--r-- | fs/nfs/blocklayout/Makefile | 2 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 92 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.h | 83 | ||||
-rw-r--r-- | fs/nfs/blocklayout/dev.c | 360 | ||||
-rw-r--r-- | fs/nfs/blocklayout/rpc_pipefs.c | 141 |
5 files changed, 530 insertions, 148 deletions
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile index e177026e0119..3ca14c36d08b 100644 --- a/fs/nfs/blocklayout/Makefile +++ b/fs/nfs/blocklayout/Makefile | |||
@@ -3,4 +3,4 @@ | |||
3 | # | 3 | # |
4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o | 4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o |
5 | 5 | ||
6 | blocklayoutdriver-y += blocklayout.o extent_tree.o rpc_pipefs.o | 6 | blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 65a6b19b17a2..c41a718854e3 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -114,13 +114,10 @@ bl_submit_bio(int rw, struct bio *bio) | |||
114 | return NULL; | 114 | return NULL; |
115 | } | 115 | } |
116 | 116 | ||
117 | static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | 117 | static struct bio * |
118 | struct pnfs_block_extent *be, | 118 | bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, |
119 | void (*end_io)(struct bio *, int err), | 119 | void (*end_io)(struct bio *, int err), struct parallel_io *par) |
120 | struct parallel_io *par) | ||
121 | { | 120 | { |
122 | struct pnfs_block_dev *dev = | ||
123 | container_of(be->be_device, struct pnfs_block_dev, d_node); | ||
124 | struct bio *bio; | 121 | struct bio *bio; |
125 | 122 | ||
126 | npg = min(npg, BIO_MAX_PAGES); | 123 | npg = min(npg, BIO_MAX_PAGES); |
@@ -131,32 +128,55 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | |||
131 | } | 128 | } |
132 | 129 | ||
133 | if (bio) { | 130 | if (bio) { |
134 | bio->bi_iter.bi_sector = isect - be->be_f_offset + | 131 | bio->bi_iter.bi_sector = disk_sector; |
135 | be->be_v_offset; | 132 | bio->bi_bdev = bdev; |
136 | bio->bi_bdev = dev->d_bdev; | ||
137 | bio->bi_end_io = end_io; | 133 | bio->bi_end_io = end_io; |
138 | bio->bi_private = par; | 134 | bio->bi_private = par; |
139 | } | 135 | } |
140 | return bio; | 136 | return bio; |
141 | } | 137 | } |
142 | 138 | ||
143 | static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, | 139 | static struct bio * |
144 | sector_t isect, struct page *page, | 140 | do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, |
145 | struct pnfs_block_extent *be, | 141 | struct page *page, struct pnfs_block_dev_map *map, |
146 | void (*end_io)(struct bio *, int err), | 142 | struct pnfs_block_extent *be, |
147 | struct parallel_io *par, | 143 | void (*end_io)(struct bio *, int err), |
148 | unsigned int offset, int len) | 144 | struct parallel_io *par, unsigned int offset, int *len) |
149 | { | 145 | { |
150 | isect = isect + (offset >> SECTOR_SHIFT); | 146 | struct pnfs_block_dev *dev = |
147 | container_of(be->be_device, struct pnfs_block_dev, node); | ||
148 | u64 disk_addr, end; | ||
149 | |||
151 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, | 150 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, |
152 | npg, rw, (unsigned long long)isect, offset, len); | 151 | npg, rw, (unsigned long long)isect, offset, *len); |
152 | |||
153 | /* translate to device offset */ | ||
154 | isect += be->be_v_offset; | ||
155 | isect -= be->be_f_offset; | ||
156 | |||
157 | /* translate to physical disk offset */ | ||
158 | disk_addr = (u64)isect << SECTOR_SHIFT; | ||
159 | if (disk_addr < map->start || disk_addr >= map->start + map->len) { | ||
160 | if (!dev->map(dev, disk_addr, map)) | ||
161 | return ERR_PTR(-EIO); | ||
162 | bio = bl_submit_bio(rw, bio); | ||
163 | } | ||
164 | disk_addr += map->disk_offset; | ||
165 | disk_addr -= map->start; | ||
166 | |||
167 | /* limit length to what the device mapping allows */ | ||
168 | end = disk_addr + *len; | ||
169 | if (end >= map->start + map->len) | ||
170 | *len = map->start + map->len - disk_addr; | ||
171 | |||
153 | retry: | 172 | retry: |
154 | if (!bio) { | 173 | if (!bio) { |
155 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | 174 | bio = bl_alloc_init_bio(npg, map->bdev, |
175 | disk_addr >> SECTOR_SHIFT, end_io, par); | ||
156 | if (!bio) | 176 | if (!bio) |
157 | return ERR_PTR(-ENOMEM); | 177 | return ERR_PTR(-ENOMEM); |
158 | } | 178 | } |
159 | if (bio_add_page(bio, page, len, offset) < len) { | 179 | if (bio_add_page(bio, page, *len, offset) < *len) { |
160 | bio = bl_submit_bio(rw, bio); | 180 | bio = bl_submit_bio(rw, bio); |
161 | goto retry; | 181 | goto retry; |
162 | } | 182 | } |
@@ -203,6 +223,7 @@ static enum pnfs_try_status | |||
203 | bl_read_pagelist(struct nfs_pgio_header *header) | 223 | bl_read_pagelist(struct nfs_pgio_header *header) |
204 | { | 224 | { |
205 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); | 225 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
226 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; | ||
206 | struct bio *bio = NULL; | 227 | struct bio *bio = NULL; |
207 | struct pnfs_block_extent be; | 228 | struct pnfs_block_extent be; |
208 | sector_t isect, extent_length = 0; | 229 | sector_t isect, extent_length = 0; |
@@ -248,28 +269,29 @@ bl_read_pagelist(struct nfs_pgio_header *header) | |||
248 | pg_len = PAGE_CACHE_SIZE - pg_offset; | 269 | pg_len = PAGE_CACHE_SIZE - pg_offset; |
249 | else | 270 | else |
250 | pg_len = bytes_left; | 271 | pg_len = bytes_left; |
251 | |||
252 | f_offset += pg_len; | ||
253 | bytes_left -= pg_len; | ||
254 | isect += (pg_offset >> SECTOR_SHIFT); | ||
255 | extent_length -= (pg_offset >> SECTOR_SHIFT); | ||
256 | } else { | 272 | } else { |
257 | BUG_ON(pg_offset != 0); | 273 | BUG_ON(pg_offset != 0); |
258 | pg_len = PAGE_CACHE_SIZE; | 274 | pg_len = PAGE_CACHE_SIZE; |
259 | } | 275 | } |
260 | 276 | ||
277 | isect += (pg_offset >> SECTOR_SHIFT); | ||
278 | extent_length -= (pg_offset >> SECTOR_SHIFT); | ||
279 | |||
261 | if (is_hole(&be)) { | 280 | if (is_hole(&be)) { |
262 | bio = bl_submit_bio(READ, bio); | 281 | bio = bl_submit_bio(READ, bio); |
263 | /* Fill hole w/ zeroes w/o accessing device */ | 282 | /* Fill hole w/ zeroes w/o accessing device */ |
264 | dprintk("%s Zeroing page for hole\n", __func__); | 283 | dprintk("%s Zeroing page for hole\n", __func__); |
265 | zero_user_segment(pages[i], pg_offset, pg_len); | 284 | zero_user_segment(pages[i], pg_offset, pg_len); |
285 | |||
286 | /* invalidate map */ | ||
287 | map.start = NFS4_MAX_UINT64; | ||
266 | } else { | 288 | } else { |
267 | bio = do_add_page_to_bio(bio, | 289 | bio = do_add_page_to_bio(bio, |
268 | header->page_array.npages - i, | 290 | header->page_array.npages - i, |
269 | READ, | 291 | READ, |
270 | isect, pages[i], &be, | 292 | isect, pages[i], &map, &be, |
271 | bl_end_io_read, par, | 293 | bl_end_io_read, par, |
272 | pg_offset, pg_len); | 294 | pg_offset, &pg_len); |
273 | if (IS_ERR(bio)) { | 295 | if (IS_ERR(bio)) { |
274 | header->pnfs_error = PTR_ERR(bio); | 296 | header->pnfs_error = PTR_ERR(bio); |
275 | bio = NULL; | 297 | bio = NULL; |
@@ -278,6 +300,8 @@ bl_read_pagelist(struct nfs_pgio_header *header) | |||
278 | } | 300 | } |
279 | isect += (pg_len >> SECTOR_SHIFT); | 301 | isect += (pg_len >> SECTOR_SHIFT); |
280 | extent_length -= (pg_len >> SECTOR_SHIFT); | 302 | extent_length -= (pg_len >> SECTOR_SHIFT); |
303 | f_offset += pg_len; | ||
304 | bytes_left -= pg_len; | ||
281 | } | 305 | } |
282 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { | 306 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { |
283 | header->res.eof = 1; | 307 | header->res.eof = 1; |
@@ -346,6 +370,7 @@ static enum pnfs_try_status | |||
346 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) | 370 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) |
347 | { | 371 | { |
348 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); | 372 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
373 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; | ||
349 | struct bio *bio = NULL; | 374 | struct bio *bio = NULL; |
350 | struct pnfs_block_extent be; | 375 | struct pnfs_block_extent be; |
351 | sector_t isect, extent_length = 0; | 376 | sector_t isect, extent_length = 0; |
@@ -354,6 +379,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) | |||
354 | size_t count = header->args.count; | 379 | size_t count = header->args.count; |
355 | struct page **pages = header->args.pages; | 380 | struct page **pages = header->args.pages; |
356 | int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; | 381 | int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
382 | unsigned int pg_len; | ||
357 | struct blk_plug plug; | 383 | struct blk_plug plug; |
358 | int i; | 384 | int i; |
359 | 385 | ||
@@ -387,19 +413,21 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) | |||
387 | extent_length = be.be_length - (isect - be.be_f_offset); | 413 | extent_length = be.be_length - (isect - be.be_f_offset); |
388 | } | 414 | } |
389 | 415 | ||
416 | pg_len = PAGE_CACHE_SIZE; | ||
390 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, | 417 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, |
391 | WRITE, isect, pages[i], &be, | 418 | WRITE, isect, pages[i], &map, &be, |
392 | bl_end_io_write, par, | 419 | bl_end_io_write, par, |
393 | 0, PAGE_CACHE_SIZE); | 420 | 0, &pg_len); |
394 | if (IS_ERR(bio)) { | 421 | if (IS_ERR(bio)) { |
395 | header->pnfs_error = PTR_ERR(bio); | 422 | header->pnfs_error = PTR_ERR(bio); |
396 | bio = NULL; | 423 | bio = NULL; |
397 | goto out; | 424 | goto out; |
398 | } | 425 | } |
399 | offset += PAGE_CACHE_SIZE; | 426 | |
400 | count -= PAGE_CACHE_SIZE; | 427 | offset += pg_len; |
401 | isect += PAGE_CACHE_SECTORS; | 428 | count -= pg_len; |
402 | extent_length -= PAGE_CACHE_SECTORS; | 429 | isect += (pg_len >> SECTOR_SHIFT); |
430 | extent_length -= (pg_len >> SECTOR_SHIFT); | ||
403 | } | 431 | } |
404 | 432 | ||
405 | header->res.count = header->args.count; | 433 | header->res.count = header->args.count; |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index c98d98a62664..92dca9e90d8d 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -44,9 +44,77 @@ | |||
44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | 44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) |
45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) | 45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) |
46 | 46 | ||
47 | struct pnfs_block_dev; | ||
48 | |||
49 | enum pnfs_block_volume_type { | ||
50 | PNFS_BLOCK_VOLUME_SIMPLE = 0, | ||
51 | PNFS_BLOCK_VOLUME_SLICE = 1, | ||
52 | PNFS_BLOCK_VOLUME_CONCAT = 2, | ||
53 | PNFS_BLOCK_VOLUME_STRIPE = 3, | ||
54 | }; | ||
55 | |||
56 | #define PNFS_BLOCK_MAX_UUIDS 4 | ||
57 | #define PNFS_BLOCK_MAX_DEVICES 64 | ||
58 | |||
59 | /* | ||
60 | * Random upper cap for the uuid length to avoid unbounded allocation. | ||
61 | * Not actually limited by the protocol. | ||
62 | */ | ||
63 | #define PNFS_BLOCK_UUID_LEN 128 | ||
64 | |||
65 | |||
66 | struct pnfs_block_volume { | ||
67 | enum pnfs_block_volume_type type; | ||
68 | union { | ||
69 | struct { | ||
70 | int len; | ||
71 | int nr_sigs; | ||
72 | struct { | ||
73 | u64 offset; | ||
74 | u32 sig_len; | ||
75 | u8 sig[PNFS_BLOCK_UUID_LEN]; | ||
76 | } sigs[PNFS_BLOCK_MAX_UUIDS]; | ||
77 | } simple; | ||
78 | struct { | ||
79 | u64 start; | ||
80 | u64 len; | ||
81 | u32 volume; | ||
82 | } slice; | ||
83 | struct { | ||
84 | u32 volumes_count; | ||
85 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
86 | } concat; | ||
87 | struct { | ||
88 | u64 chunk_size; | ||
89 | u32 volumes_count; | ||
90 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
91 | } stripe; | ||
92 | }; | ||
93 | }; | ||
94 | |||
95 | struct pnfs_block_dev_map { | ||
96 | sector_t start; | ||
97 | sector_t len; | ||
98 | |||
99 | sector_t disk_offset; | ||
100 | struct block_device *bdev; | ||
101 | }; | ||
102 | |||
47 | struct pnfs_block_dev { | 103 | struct pnfs_block_dev { |
48 | struct nfs4_deviceid_node d_node; | 104 | struct nfs4_deviceid_node node; |
49 | struct block_device *d_bdev; | 105 | |
106 | u64 start; | ||
107 | u64 len; | ||
108 | |||
109 | u32 nr_children; | ||
110 | struct pnfs_block_dev *children; | ||
111 | u64 chunk_size; | ||
112 | |||
113 | struct block_device *bdev; | ||
114 | u64 disk_offset; | ||
115 | |||
116 | bool (*map)(struct pnfs_block_dev *dev, u64 offset, | ||
117 | struct pnfs_block_dev_map *map); | ||
50 | }; | 118 | }; |
51 | 119 | ||
52 | enum exstate4 { | 120 | enum exstate4 { |
@@ -110,6 +178,11 @@ struct bl_msg_hdr { | |||
110 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ | 178 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ |
111 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ | 179 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ |
112 | 180 | ||
181 | /* dev.c */ | ||
182 | struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, | ||
183 | struct pnfs_device *pdev, gfp_t gfp_mask); | ||
184 | void bl_free_deviceid_node(struct nfs4_deviceid_node *d); | ||
185 | |||
113 | /* extent_tree.c */ | 186 | /* extent_tree.c */ |
114 | int ext_tree_insert(struct pnfs_block_layout *bl, | 187 | int ext_tree_insert(struct pnfs_block_layout *bl, |
115 | struct pnfs_block_extent *new); | 188 | struct pnfs_block_extent *new); |
@@ -123,10 +196,8 @@ int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); | |||
123 | void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); | 196 | void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); |
124 | 197 | ||
125 | /* rpc_pipefs.c */ | 198 | /* rpc_pipefs.c */ |
126 | struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, | 199 | dev_t bl_resolve_deviceid(struct nfs_server *server, |
127 | struct pnfs_device *pdev, gfp_t gfp_mask); | 200 | struct pnfs_block_volume *b, gfp_t gfp_mask); |
128 | void bl_free_deviceid_node(struct nfs4_deviceid_node *d); | ||
129 | |||
130 | int __init bl_init_pipefs(void); | 201 | int __init bl_init_pipefs(void); |
131 | void __exit bl_cleanup_pipefs(void); | 202 | void __exit bl_cleanup_pipefs(void); |
132 | 203 | ||
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c new file mode 100644 index 000000000000..00f159da06ee --- /dev/null +++ b/fs/nfs/blocklayout/dev.c | |||
@@ -0,0 +1,360 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #include <linux/sunrpc/svc.h> | ||
5 | #include <linux/blkdev.h> | ||
6 | #include <linux/nfs4.h> | ||
7 | #include <linux/nfs_fs.h> | ||
8 | #include <linux/nfs_xdr.h> | ||
9 | |||
10 | #include "blocklayout.h" | ||
11 | |||
12 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
13 | |||
14 | static void | ||
15 | bl_free_device(struct pnfs_block_dev *dev) | ||
16 | { | ||
17 | if (dev->nr_children) { | ||
18 | int i; | ||
19 | |||
20 | for (i = 0; i < dev->nr_children; i++) | ||
21 | bl_free_device(&dev->children[i]); | ||
22 | kfree(dev->children); | ||
23 | } else { | ||
24 | if (dev->bdev) | ||
25 | blkdev_put(dev->bdev, FMODE_READ); | ||
26 | } | ||
27 | } | ||
28 | |||
29 | void | ||
30 | bl_free_deviceid_node(struct nfs4_deviceid_node *d) | ||
31 | { | ||
32 | struct pnfs_block_dev *dev = | ||
33 | container_of(d, struct pnfs_block_dev, node); | ||
34 | |||
35 | bl_free_device(dev); | ||
36 | kfree(dev); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | ||
41 | { | ||
42 | __be32 *p; | ||
43 | int i; | ||
44 | |||
45 | p = xdr_inline_decode(xdr, 4); | ||
46 | if (!p) | ||
47 | return -EIO; | ||
48 | b->type = be32_to_cpup(p++); | ||
49 | |||
50 | switch (b->type) { | ||
51 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
52 | p = xdr_inline_decode(xdr, 4); | ||
53 | if (!p) | ||
54 | return -EIO; | ||
55 | b->simple.nr_sigs = be32_to_cpup(p++); | ||
56 | if (!b->simple.nr_sigs) { | ||
57 | dprintk("no signature\n"); | ||
58 | return -EIO; | ||
59 | } | ||
60 | |||
61 | b->simple.len = 4 + 4; | ||
62 | for (i = 0; i < b->simple.nr_sigs; i++) { | ||
63 | p = xdr_inline_decode(xdr, 8 + 4); | ||
64 | if (!p) | ||
65 | return -EIO; | ||
66 | p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); | ||
67 | b->simple.sigs[i].sig_len = be32_to_cpup(p++); | ||
68 | |||
69 | p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); | ||
70 | if (!p) | ||
71 | return -EIO; | ||
72 | memcpy(&b->simple.sigs[i].sig, p, | ||
73 | b->simple.sigs[i].sig_len); | ||
74 | |||
75 | b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; | ||
76 | } | ||
77 | break; | ||
78 | case PNFS_BLOCK_VOLUME_SLICE: | ||
79 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
80 | if (!p) | ||
81 | return -EIO; | ||
82 | p = xdr_decode_hyper(p, &b->slice.start); | ||
83 | p = xdr_decode_hyper(p, &b->slice.len); | ||
84 | b->slice.volume = be32_to_cpup(p++); | ||
85 | break; | ||
86 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
87 | p = xdr_inline_decode(xdr, 4); | ||
88 | if (!p) | ||
89 | return -EIO; | ||
90 | b->concat.volumes_count = be32_to_cpup(p++); | ||
91 | |||
92 | p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); | ||
93 | if (!p) | ||
94 | return -EIO; | ||
95 | for (i = 0; i < b->concat.volumes_count; i++) | ||
96 | b->concat.volumes[i] = be32_to_cpup(p++); | ||
97 | break; | ||
98 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
99 | p = xdr_inline_decode(xdr, 8 + 4); | ||
100 | if (!p) | ||
101 | return -EIO; | ||
102 | p = xdr_decode_hyper(p, &b->stripe.chunk_size); | ||
103 | b->stripe.volumes_count = be32_to_cpup(p++); | ||
104 | |||
105 | p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); | ||
106 | if (!p) | ||
107 | return -EIO; | ||
108 | for (i = 0; i < b->stripe.volumes_count; i++) | ||
109 | b->stripe.volumes[i] = be32_to_cpup(p++); | ||
110 | break; | ||
111 | default: | ||
112 | dprintk("unknown volume type!\n"); | ||
113 | return -EIO; | ||
114 | } | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, | ||
120 | struct pnfs_block_dev_map *map) | ||
121 | { | ||
122 | map->start = dev->start; | ||
123 | map->len = dev->len; | ||
124 | map->disk_offset = dev->disk_offset; | ||
125 | map->bdev = dev->bdev; | ||
126 | return true; | ||
127 | } | ||
128 | |||
129 | static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, | ||
130 | struct pnfs_block_dev_map *map) | ||
131 | { | ||
132 | int i; | ||
133 | |||
134 | for (i = 0; i < dev->nr_children; i++) { | ||
135 | struct pnfs_block_dev *child = &dev->children[i]; | ||
136 | |||
137 | if (child->start > offset || | ||
138 | child->start + child->len <= offset) | ||
139 | continue; | ||
140 | |||
141 | child->map(child, offset - child->start, map); | ||
142 | return true; | ||
143 | } | ||
144 | |||
145 | dprintk("%s: ran off loop!\n", __func__); | ||
146 | return false; | ||
147 | } | ||
148 | |||
149 | static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, | ||
150 | struct pnfs_block_dev_map *map) | ||
151 | { | ||
152 | struct pnfs_block_dev *child; | ||
153 | u64 chunk = (offset / dev->chunk_size); | ||
154 | int chunk_idx = chunk % dev->nr_children; | ||
155 | u64 disk_offset; | ||
156 | |||
157 | if (chunk_idx > dev->nr_children) { | ||
158 | dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", | ||
159 | __func__, chunk_idx, offset, dev->chunk_size); | ||
160 | /* error, should not happen */ | ||
161 | return false; | ||
162 | } | ||
163 | |||
164 | /* truncate offset to the beginning of the stripe */ | ||
165 | offset = chunk * dev->chunk_size; | ||
166 | |||
167 | /* disk offset of the stripe */ | ||
168 | disk_offset = offset / dev->nr_children; | ||
169 | |||
170 | child = &dev->children[chunk_idx]; | ||
171 | child->map(child, disk_offset, map); | ||
172 | |||
173 | map->start += offset; | ||
174 | map->disk_offset += disk_offset; | ||
175 | map->len = dev->chunk_size; | ||
176 | return true; | ||
177 | } | ||
178 | |||
179 | static int | ||
180 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
181 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); | ||
182 | |||
183 | |||
184 | static int | ||
185 | bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, | ||
186 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
187 | { | ||
188 | struct pnfs_block_volume *v = &volumes[idx]; | ||
189 | dev_t dev; | ||
190 | |||
191 | dev = bl_resolve_deviceid(server, v, gfp_mask); | ||
192 | if (!dev) | ||
193 | return -EIO; | ||
194 | |||
195 | d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); | ||
196 | if (IS_ERR(d->bdev)) { | ||
197 | printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", | ||
198 | MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); | ||
199 | return PTR_ERR(d->bdev); | ||
200 | } | ||
201 | |||
202 | |||
203 | d->len = i_size_read(d->bdev->bd_inode); | ||
204 | d->map = bl_map_simple; | ||
205 | |||
206 | printk(KERN_INFO "pNFS: using block device %s\n", | ||
207 | d->bdev->bd_disk->disk_name); | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | static int | ||
212 | bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, | ||
213 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
214 | { | ||
215 | struct pnfs_block_volume *v = &volumes[idx]; | ||
216 | int ret; | ||
217 | |||
218 | ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); | ||
219 | if (ret) | ||
220 | return ret; | ||
221 | |||
222 | d->disk_offset = v->slice.start; | ||
223 | d->len = v->slice.len; | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static int | ||
228 | bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, | ||
229 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
230 | { | ||
231 | struct pnfs_block_volume *v = &volumes[idx]; | ||
232 | u64 len = 0; | ||
233 | int ret, i; | ||
234 | |||
235 | d->children = kcalloc(v->concat.volumes_count, | ||
236 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
237 | if (!d->children) | ||
238 | return -ENOMEM; | ||
239 | |||
240 | for (i = 0; i < v->concat.volumes_count; i++) { | ||
241 | ret = bl_parse_deviceid(server, &d->children[i], | ||
242 | volumes, v->concat.volumes[i], gfp_mask); | ||
243 | if (ret) | ||
244 | return ret; | ||
245 | |||
246 | d->nr_children++; | ||
247 | d->children[i].start += len; | ||
248 | len += d->children[i].len; | ||
249 | } | ||
250 | |||
251 | d->len = len; | ||
252 | d->map = bl_map_concat; | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static int | ||
257 | bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, | ||
258 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
259 | { | ||
260 | struct pnfs_block_volume *v = &volumes[idx]; | ||
261 | u64 len = 0; | ||
262 | int ret, i; | ||
263 | |||
264 | d->children = kcalloc(v->stripe.volumes_count, | ||
265 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
266 | if (!d->children) | ||
267 | return -ENOMEM; | ||
268 | |||
269 | for (i = 0; i < v->stripe.volumes_count; i++) { | ||
270 | ret = bl_parse_deviceid(server, &d->children[i], | ||
271 | volumes, v->stripe.volumes[i], gfp_mask); | ||
272 | if (ret) | ||
273 | return ret; | ||
274 | |||
275 | d->nr_children++; | ||
276 | len += d->children[i].len; | ||
277 | } | ||
278 | |||
279 | d->len = len; | ||
280 | d->chunk_size = v->stripe.chunk_size; | ||
281 | d->map = bl_map_stripe; | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | static int | ||
286 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
287 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
288 | { | ||
289 | switch (volumes[idx].type) { | ||
290 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
291 | return bl_parse_simple(server, d, volumes, idx, gfp_mask); | ||
292 | case PNFS_BLOCK_VOLUME_SLICE: | ||
293 | return bl_parse_slice(server, d, volumes, idx, gfp_mask); | ||
294 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
295 | return bl_parse_concat(server, d, volumes, idx, gfp_mask); | ||
296 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
297 | return bl_parse_stripe(server, d, volumes, idx, gfp_mask); | ||
298 | default: | ||
299 | dprintk("unsupported volume type: %d\n", volumes[idx].type); | ||
300 | return -EIO; | ||
301 | } | ||
302 | } | ||
303 | |||
304 | struct nfs4_deviceid_node * | ||
305 | bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
306 | gfp_t gfp_mask) | ||
307 | { | ||
308 | struct nfs4_deviceid_node *node = NULL; | ||
309 | struct pnfs_block_volume *volumes; | ||
310 | struct pnfs_block_dev *top; | ||
311 | struct xdr_stream xdr; | ||
312 | struct xdr_buf buf; | ||
313 | struct page *scratch; | ||
314 | int nr_volumes, ret, i; | ||
315 | __be32 *p; | ||
316 | |||
317 | scratch = alloc_page(gfp_mask); | ||
318 | if (!scratch) | ||
319 | goto out; | ||
320 | |||
321 | xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); | ||
322 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | ||
323 | |||
324 | p = xdr_inline_decode(&xdr, sizeof(__be32)); | ||
325 | if (!p) | ||
326 | goto out_free_scratch; | ||
327 | nr_volumes = be32_to_cpup(p++); | ||
328 | |||
329 | volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), | ||
330 | gfp_mask); | ||
331 | if (!volumes) | ||
332 | goto out_free_scratch; | ||
333 | |||
334 | for (i = 0; i < nr_volumes; i++) { | ||
335 | ret = nfs4_block_decode_volume(&xdr, &volumes[i]); | ||
336 | if (ret < 0) | ||
337 | goto out_free_volumes; | ||
338 | } | ||
339 | |||
340 | top = kzalloc(sizeof(*top), gfp_mask); | ||
341 | if (!top) | ||
342 | goto out_free_volumes; | ||
343 | |||
344 | ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); | ||
345 | if (ret) { | ||
346 | bl_free_device(top); | ||
347 | kfree(top); | ||
348 | goto out_free_volumes; | ||
349 | } | ||
350 | |||
351 | node = &top->node; | ||
352 | nfs4_init_deviceid_node(node, server, &pdev->dev_id); | ||
353 | |||
354 | out_free_volumes: | ||
355 | kfree(volumes); | ||
356 | out_free_scratch: | ||
357 | __free_page(scratch); | ||
358 | out: | ||
359 | return node; | ||
360 | } | ||
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index bfb04861eb61..8d04bda2bd2e 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c | |||
@@ -34,94 +34,53 @@ | |||
34 | 34 | ||
35 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 35 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
36 | 36 | ||
37 | static void bl_dm_remove(struct net *net, dev_t dev) | 37 | static void |
38 | nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b) | ||
38 | { | 39 | { |
39 | struct bl_pipe_msg bl_pipe_msg; | 40 | int i; |
40 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | 41 | |
41 | struct bl_dev_msg bl_umount_request; | 42 | *p++ = cpu_to_be32(1); |
42 | struct bl_msg_hdr bl_msg = { | 43 | *p++ = cpu_to_be32(b->type); |
43 | .type = BL_DEVICE_UMOUNT, | 44 | *p++ = cpu_to_be32(b->simple.nr_sigs); |
44 | .totallen = sizeof(bl_umount_request), | 45 | for (i = 0; i < b->simple.nr_sigs; i++) { |
45 | }; | 46 | p = xdr_encode_hyper(p, b->simple.sigs[i].offset); |
46 | uint8_t *dataptr; | 47 | p = xdr_encode_opaque(p, b->simple.sigs[i].sig, |
47 | DECLARE_WAITQUEUE(wq, current); | 48 | b->simple.sigs[i].sig_len); |
48 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
49 | |||
50 | dprintk("Entering %s\n", __func__); | ||
51 | |||
52 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
53 | memset(msg, 0, sizeof(*msg)); | ||
54 | msg->len = sizeof(bl_msg) + bl_msg.totallen; | ||
55 | msg->data = kzalloc(msg->len, GFP_NOFS); | ||
56 | if (!msg->data) | ||
57 | goto out; | ||
58 | |||
59 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); | ||
60 | bl_umount_request.major = MAJOR(dev); | ||
61 | bl_umount_request.minor = MINOR(dev); | ||
62 | |||
63 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | ||
64 | dataptr = (uint8_t *) msg->data; | ||
65 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | ||
66 | |||
67 | add_wait_queue(&nn->bl_wq, &wq); | ||
68 | if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { | ||
69 | remove_wait_queue(&nn->bl_wq, &wq); | ||
70 | goto out; | ||
71 | } | 49 | } |
72 | |||
73 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
74 | schedule(); | ||
75 | __set_current_state(TASK_RUNNING); | ||
76 | remove_wait_queue(&nn->bl_wq, &wq); | ||
77 | |||
78 | out: | ||
79 | kfree(msg->data); | ||
80 | } | 50 | } |
81 | 51 | ||
82 | /* | 52 | dev_t |
83 | * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. | 53 | bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, |
84 | */ | ||
85 | struct nfs4_deviceid_node * | ||
86 | bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev, | ||
87 | gfp_t gfp_mask) | 54 | gfp_t gfp_mask) |
88 | { | 55 | { |
89 | struct pnfs_block_dev *rv; | ||
90 | struct block_device *bd; | ||
91 | struct bl_pipe_msg bl_pipe_msg; | ||
92 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
93 | struct bl_msg_hdr bl_msg = { | ||
94 | .type = BL_DEVICE_MOUNT, | ||
95 | .totallen = dev->mincount, | ||
96 | }; | ||
97 | uint8_t *dataptr; | ||
98 | DECLARE_WAITQUEUE(wq, current); | ||
99 | int offset, len, i, rc; | ||
100 | struct net *net = server->nfs_client->cl_net; | 56 | struct net *net = server->nfs_client->cl_net; |
101 | struct nfs_net *nn = net_generic(net, nfs_net_id); | 57 | struct nfs_net *nn = net_generic(net, nfs_net_id); |
102 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | 58 | struct bl_dev_msg *reply = &nn->bl_mount_reply; |
59 | struct bl_pipe_msg bl_pipe_msg; | ||
60 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
61 | struct bl_msg_hdr *bl_msg; | ||
62 | DECLARE_WAITQUEUE(wq, current); | ||
63 | dev_t dev = 0; | ||
64 | int rc; | ||
103 | 65 | ||
104 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | 66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); |
105 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | ||
106 | dev->mincount); | ||
107 | 67 | ||
108 | bl_pipe_msg.bl_wq = &nn->bl_wq; | 68 | bl_pipe_msg.bl_wq = &nn->bl_wq; |
69 | |||
70 | b->simple.len += 4; /* single volume */ | ||
71 | if (b->simple.len > PAGE_SIZE) | ||
72 | return -EIO; | ||
73 | |||
109 | memset(msg, 0, sizeof(*msg)); | 74 | memset(msg, 0, sizeof(*msg)); |
110 | msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, gfp_mask); | 75 | msg->len = sizeof(*bl_msg) + b->simple.len; |
76 | msg->data = kzalloc(msg->len, gfp_mask); | ||
111 | if (!msg->data) | 77 | if (!msg->data) |
112 | goto out; | 78 | goto out; |
113 | 79 | ||
114 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | 80 | bl_msg = msg->data; |
115 | dataptr = (uint8_t *) msg->data; | 81 | bl_msg->type = BL_DEVICE_MOUNT, |
116 | len = dev->mincount; | 82 | bl_msg->totallen = b->simple.len; |
117 | offset = sizeof(bl_msg); | 83 | nfs4_encode_simple(msg->data + sizeof(*bl_msg), b); |
118 | for (i = 0; len > 0; i++) { | ||
119 | memcpy(&dataptr[offset], page_address(dev->pages[i]), | ||
120 | len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); | ||
121 | len -= PAGE_CACHE_SIZE; | ||
122 | offset += PAGE_CACHE_SIZE; | ||
123 | } | ||
124 | msg->len = sizeof(bl_msg) + dev->mincount; | ||
125 | 84 | ||
126 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | 85 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); |
127 | add_wait_queue(&nn->bl_wq, &wq); | 86 | add_wait_queue(&nn->bl_wq, &wq); |
@@ -142,46 +101,10 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev, | |||
142 | goto out; | 101 | goto out; |
143 | } | 102 | } |
144 | 103 | ||
145 | bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), | 104 | dev = MKDEV(reply->major, reply->minor); |
146 | FMODE_READ, NULL); | ||
147 | if (IS_ERR(bd)) { | ||
148 | printk(KERN_WARNING "%s failed to open device %d:%d (%ld)\n", | ||
149 | __func__, reply->major, reply->minor, | ||
150 | PTR_ERR(bd)); | ||
151 | goto out; | ||
152 | } | ||
153 | |||
154 | rv = kzalloc(sizeof(*rv), gfp_mask); | ||
155 | if (!rv) | ||
156 | goto out; | ||
157 | |||
158 | nfs4_init_deviceid_node(&rv->d_node, server, &dev->dev_id); | ||
159 | rv->d_bdev = bd; | ||
160 | |||
161 | dprintk("%s Created device %s with bd_block_size %u\n", | ||
162 | __func__, | ||
163 | bd->bd_disk->disk_name, | ||
164 | bd->bd_block_size); | ||
165 | |||
166 | kfree(msg->data); | ||
167 | return &rv->d_node; | ||
168 | |||
169 | out: | 105 | out: |
170 | kfree(msg->data); | 106 | kfree(msg->data); |
171 | return NULL; | 107 | return dev; |
172 | } | ||
173 | |||
174 | void | ||
175 | bl_free_deviceid_node(struct nfs4_deviceid_node *d) | ||
176 | { | ||
177 | struct pnfs_block_dev *dev = | ||
178 | container_of(d, struct pnfs_block_dev, d_node); | ||
179 | struct net *net = d->nfs_client->cl_net; | ||
180 | |||
181 | blkdev_put(dev->d_bdev, FMODE_READ); | ||
182 | bl_dm_remove(net, dev->d_bdev->bd_dev); | ||
183 | |||
184 | kfree(dev); | ||
185 | } | 108 | } |
186 | 109 | ||
187 | static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | 110 | static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, |