aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2014-09-10 20:37:27 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-09-12 13:33:50 -0400
commit5c83746a0cf2831d4b59f5cf99ef5fbf138564e4 (patch)
treefebe14fb7fea5b7716fc07a2996be4253f09a663
parent871760ce97a9a544cfb1ae4589598b25b8570a25 (diff)
pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing
This patches moves parsing of the GETDEVICEINFO XDR to kernel space, as well as the management of complex devices. The reason for that is we might have multiple outstanding complex devices after a NOTIFY_DEVICEID4_CHANGE, which device mapper or md can't handle as they claim devices exclusively. But as is turns out simple striping / concatenation is fairly trivial to implement anyway, so we make our life simpler by reducing the reliance on blkmapd. For now we still use blkmapd by feeding it synthetic SIMPLE device XDR to translate device signatures to device numbers, but in the long runs I have plans to eliminate it entirely. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r--fs/nfs/blocklayout/Makefile2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c92
-rw-r--r--fs/nfs/blocklayout/blocklayout.h83
-rw-r--r--fs/nfs/blocklayout/dev.c360
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c141
5 files changed, 530 insertions, 148 deletions
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile
index e177026e0119..3ca14c36d08b 100644
--- a/fs/nfs/blocklayout/Makefile
+++ b/fs/nfs/blocklayout/Makefile
@@ -3,4 +3,4 @@
3# 3#
4obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o 4obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
5 5
6blocklayoutdriver-y += blocklayout.o extent_tree.o rpc_pipefs.o 6blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65a6b19b17a2..c41a718854e3 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -114,13 +114,10 @@ bl_submit_bio(int rw, struct bio *bio)
114 return NULL; 114 return NULL;
115} 115}
116 116
117static struct bio *bl_alloc_init_bio(int npg, sector_t isect, 117static struct bio *
118 struct pnfs_block_extent *be, 118bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
119 void (*end_io)(struct bio *, int err), 119 void (*end_io)(struct bio *, int err), struct parallel_io *par)
120 struct parallel_io *par)
121{ 120{
122 struct pnfs_block_dev *dev =
123 container_of(be->be_device, struct pnfs_block_dev, d_node);
124 struct bio *bio; 121 struct bio *bio;
125 122
126 npg = min(npg, BIO_MAX_PAGES); 123 npg = min(npg, BIO_MAX_PAGES);
@@ -131,32 +128,55 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
131 } 128 }
132 129
133 if (bio) { 130 if (bio) {
134 bio->bi_iter.bi_sector = isect - be->be_f_offset + 131 bio->bi_iter.bi_sector = disk_sector;
135 be->be_v_offset; 132 bio->bi_bdev = bdev;
136 bio->bi_bdev = dev->d_bdev;
137 bio->bi_end_io = end_io; 133 bio->bi_end_io = end_io;
138 bio->bi_private = par; 134 bio->bi_private = par;
139 } 135 }
140 return bio; 136 return bio;
141} 137}
142 138
143static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, 139static struct bio *
144 sector_t isect, struct page *page, 140do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
145 struct pnfs_block_extent *be, 141 struct page *page, struct pnfs_block_dev_map *map,
146 void (*end_io)(struct bio *, int err), 142 struct pnfs_block_extent *be,
147 struct parallel_io *par, 143 void (*end_io)(struct bio *, int err),
148 unsigned int offset, int len) 144 struct parallel_io *par, unsigned int offset, int *len)
149{ 145{
150 isect = isect + (offset >> SECTOR_SHIFT); 146 struct pnfs_block_dev *dev =
147 container_of(be->be_device, struct pnfs_block_dev, node);
148 u64 disk_addr, end;
149
151 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, 150 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
152 npg, rw, (unsigned long long)isect, offset, len); 151 npg, rw, (unsigned long long)isect, offset, *len);
152
153 /* translate to device offset */
154 isect += be->be_v_offset;
155 isect -= be->be_f_offset;
156
157 /* translate to physical disk offset */
158 disk_addr = (u64)isect << SECTOR_SHIFT;
159 if (disk_addr < map->start || disk_addr >= map->start + map->len) {
160 if (!dev->map(dev, disk_addr, map))
161 return ERR_PTR(-EIO);
162 bio = bl_submit_bio(rw, bio);
163 }
164 disk_addr += map->disk_offset;
165 disk_addr -= map->start;
166
167 /* limit length to what the device mapping allows */
168 end = disk_addr + *len;
169 if (end >= map->start + map->len)
170 *len = map->start + map->len - disk_addr;
171
153retry: 172retry:
154 if (!bio) { 173 if (!bio) {
155 bio = bl_alloc_init_bio(npg, isect, be, end_io, par); 174 bio = bl_alloc_init_bio(npg, map->bdev,
175 disk_addr >> SECTOR_SHIFT, end_io, par);
156 if (!bio) 176 if (!bio)
157 return ERR_PTR(-ENOMEM); 177 return ERR_PTR(-ENOMEM);
158 } 178 }
159 if (bio_add_page(bio, page, len, offset) < len) { 179 if (bio_add_page(bio, page, *len, offset) < *len) {
160 bio = bl_submit_bio(rw, bio); 180 bio = bl_submit_bio(rw, bio);
161 goto retry; 181 goto retry;
162 } 182 }
@@ -203,6 +223,7 @@ static enum pnfs_try_status
203bl_read_pagelist(struct nfs_pgio_header *header) 223bl_read_pagelist(struct nfs_pgio_header *header)
204{ 224{
205 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 225 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
226 struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
206 struct bio *bio = NULL; 227 struct bio *bio = NULL;
207 struct pnfs_block_extent be; 228 struct pnfs_block_extent be;
208 sector_t isect, extent_length = 0; 229 sector_t isect, extent_length = 0;
@@ -248,28 +269,29 @@ bl_read_pagelist(struct nfs_pgio_header *header)
248 pg_len = PAGE_CACHE_SIZE - pg_offset; 269 pg_len = PAGE_CACHE_SIZE - pg_offset;
249 else 270 else
250 pg_len = bytes_left; 271 pg_len = bytes_left;
251
252 f_offset += pg_len;
253 bytes_left -= pg_len;
254 isect += (pg_offset >> SECTOR_SHIFT);
255 extent_length -= (pg_offset >> SECTOR_SHIFT);
256 } else { 272 } else {
257 BUG_ON(pg_offset != 0); 273 BUG_ON(pg_offset != 0);
258 pg_len = PAGE_CACHE_SIZE; 274 pg_len = PAGE_CACHE_SIZE;
259 } 275 }
260 276
277 isect += (pg_offset >> SECTOR_SHIFT);
278 extent_length -= (pg_offset >> SECTOR_SHIFT);
279
261 if (is_hole(&be)) { 280 if (is_hole(&be)) {
262 bio = bl_submit_bio(READ, bio); 281 bio = bl_submit_bio(READ, bio);
263 /* Fill hole w/ zeroes w/o accessing device */ 282 /* Fill hole w/ zeroes w/o accessing device */
264 dprintk("%s Zeroing page for hole\n", __func__); 283 dprintk("%s Zeroing page for hole\n", __func__);
265 zero_user_segment(pages[i], pg_offset, pg_len); 284 zero_user_segment(pages[i], pg_offset, pg_len);
285
286 /* invalidate map */
287 map.start = NFS4_MAX_UINT64;
266 } else { 288 } else {
267 bio = do_add_page_to_bio(bio, 289 bio = do_add_page_to_bio(bio,
268 header->page_array.npages - i, 290 header->page_array.npages - i,
269 READ, 291 READ,
270 isect, pages[i], &be, 292 isect, pages[i], &map, &be,
271 bl_end_io_read, par, 293 bl_end_io_read, par,
272 pg_offset, pg_len); 294 pg_offset, &pg_len);
273 if (IS_ERR(bio)) { 295 if (IS_ERR(bio)) {
274 header->pnfs_error = PTR_ERR(bio); 296 header->pnfs_error = PTR_ERR(bio);
275 bio = NULL; 297 bio = NULL;
@@ -278,6 +300,8 @@ bl_read_pagelist(struct nfs_pgio_header *header)
278 } 300 }
279 isect += (pg_len >> SECTOR_SHIFT); 301 isect += (pg_len >> SECTOR_SHIFT);
280 extent_length -= (pg_len >> SECTOR_SHIFT); 302 extent_length -= (pg_len >> SECTOR_SHIFT);
303 f_offset += pg_len;
304 bytes_left -= pg_len;
281 } 305 }
282 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 306 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
283 header->res.eof = 1; 307 header->res.eof = 1;
@@ -346,6 +370,7 @@ static enum pnfs_try_status
346bl_write_pagelist(struct nfs_pgio_header *header, int sync) 370bl_write_pagelist(struct nfs_pgio_header *header, int sync)
347{ 371{
348 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 372 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
373 struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
349 struct bio *bio = NULL; 374 struct bio *bio = NULL;
350 struct pnfs_block_extent be; 375 struct pnfs_block_extent be;
351 sector_t isect, extent_length = 0; 376 sector_t isect, extent_length = 0;
@@ -354,6 +379,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
354 size_t count = header->args.count; 379 size_t count = header->args.count;
355 struct page **pages = header->args.pages; 380 struct page **pages = header->args.pages;
356 int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; 381 int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
382 unsigned int pg_len;
357 struct blk_plug plug; 383 struct blk_plug plug;
358 int i; 384 int i;
359 385
@@ -387,19 +413,21 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
387 extent_length = be.be_length - (isect - be.be_f_offset); 413 extent_length = be.be_length - (isect - be.be_f_offset);
388 } 414 }
389 415
416 pg_len = PAGE_CACHE_SIZE;
390 bio = do_add_page_to_bio(bio, header->page_array.npages - i, 417 bio = do_add_page_to_bio(bio, header->page_array.npages - i,
391 WRITE, isect, pages[i], &be, 418 WRITE, isect, pages[i], &map, &be,
392 bl_end_io_write, par, 419 bl_end_io_write, par,
393 0, PAGE_CACHE_SIZE); 420 0, &pg_len);
394 if (IS_ERR(bio)) { 421 if (IS_ERR(bio)) {
395 header->pnfs_error = PTR_ERR(bio); 422 header->pnfs_error = PTR_ERR(bio);
396 bio = NULL; 423 bio = NULL;
397 goto out; 424 goto out;
398 } 425 }
399 offset += PAGE_CACHE_SIZE; 426
400 count -= PAGE_CACHE_SIZE; 427 offset += pg_len;
401 isect += PAGE_CACHE_SECTORS; 428 count -= pg_len;
402 extent_length -= PAGE_CACHE_SECTORS; 429 isect += (pg_len >> SECTOR_SHIFT);
430 extent_length -= (pg_len >> SECTOR_SHIFT);
403 } 431 }
404 432
405 header->res.count = header->args.count; 433 header->res.count = header->args.count;
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index c98d98a62664..92dca9e90d8d 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -44,9 +44,77 @@
44#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) 44#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
45#define SECTOR_SIZE (1 << SECTOR_SHIFT) 45#define SECTOR_SIZE (1 << SECTOR_SHIFT)
46 46
47struct pnfs_block_dev;
48
49enum pnfs_block_volume_type {
50 PNFS_BLOCK_VOLUME_SIMPLE = 0,
51 PNFS_BLOCK_VOLUME_SLICE = 1,
52 PNFS_BLOCK_VOLUME_CONCAT = 2,
53 PNFS_BLOCK_VOLUME_STRIPE = 3,
54};
55
56#define PNFS_BLOCK_MAX_UUIDS 4
57#define PNFS_BLOCK_MAX_DEVICES 64
58
59/*
60 * Random upper cap for the uuid length to avoid unbounded allocation.
61 * Not actually limited by the protocol.
62 */
63#define PNFS_BLOCK_UUID_LEN 128
64
65
66struct pnfs_block_volume {
67 enum pnfs_block_volume_type type;
68 union {
69 struct {
70 int len;
71 int nr_sigs;
72 struct {
73 u64 offset;
74 u32 sig_len;
75 u8 sig[PNFS_BLOCK_UUID_LEN];
76 } sigs[PNFS_BLOCK_MAX_UUIDS];
77 } simple;
78 struct {
79 u64 start;
80 u64 len;
81 u32 volume;
82 } slice;
83 struct {
84 u32 volumes_count;
85 u32 volumes[PNFS_BLOCK_MAX_DEVICES];
86 } concat;
87 struct {
88 u64 chunk_size;
89 u32 volumes_count;
90 u32 volumes[PNFS_BLOCK_MAX_DEVICES];
91 } stripe;
92 };
93};
94
95struct pnfs_block_dev_map {
96 sector_t start;
97 sector_t len;
98
99 sector_t disk_offset;
100 struct block_device *bdev;
101};
102
47struct pnfs_block_dev { 103struct pnfs_block_dev {
48 struct nfs4_deviceid_node d_node; 104 struct nfs4_deviceid_node node;
49 struct block_device *d_bdev; 105
106 u64 start;
107 u64 len;
108
109 u32 nr_children;
110 struct pnfs_block_dev *children;
111 u64 chunk_size;
112
113 struct block_device *bdev;
114 u64 disk_offset;
115
116 bool (*map)(struct pnfs_block_dev *dev, u64 offset,
117 struct pnfs_block_dev_map *map);
50}; 118};
51 119
52enum exstate4 { 120enum exstate4 {
@@ -110,6 +178,11 @@ struct bl_msg_hdr {
110#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ 178#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */
111#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ 179#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
112 180
181/* dev.c */
182struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
183 struct pnfs_device *pdev, gfp_t gfp_mask);
184void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
185
113/* extent_tree.c */ 186/* extent_tree.c */
114int ext_tree_insert(struct pnfs_block_layout *bl, 187int ext_tree_insert(struct pnfs_block_layout *bl,
115 struct pnfs_block_extent *new); 188 struct pnfs_block_extent *new);
@@ -123,10 +196,8 @@ int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
123void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); 196void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
124 197
125/* rpc_pipefs.c */ 198/* rpc_pipefs.c */
126struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, 199dev_t bl_resolve_deviceid(struct nfs_server *server,
127 struct pnfs_device *pdev, gfp_t gfp_mask); 200 struct pnfs_block_volume *b, gfp_t gfp_mask);
128void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
129
130int __init bl_init_pipefs(void); 201int __init bl_init_pipefs(void);
131void __exit bl_cleanup_pipefs(void); 202void __exit bl_cleanup_pipefs(void);
132 203
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
new file mode 100644
index 000000000000..00f159da06ee
--- /dev/null
+++ b/fs/nfs/blocklayout/dev.c
@@ -0,0 +1,360 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#include <linux/sunrpc/svc.h>
5#include <linux/blkdev.h>
6#include <linux/nfs4.h>
7#include <linux/nfs_fs.h>
8#include <linux/nfs_xdr.h>
9
10#include "blocklayout.h"
11
12#define NFSDBG_FACILITY NFSDBG_PNFS_LD
13
14static void
15bl_free_device(struct pnfs_block_dev *dev)
16{
17 if (dev->nr_children) {
18 int i;
19
20 for (i = 0; i < dev->nr_children; i++)
21 bl_free_device(&dev->children[i]);
22 kfree(dev->children);
23 } else {
24 if (dev->bdev)
25 blkdev_put(dev->bdev, FMODE_READ);
26 }
27}
28
29void
30bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31{
32 struct pnfs_block_dev *dev =
33 container_of(d, struct pnfs_block_dev, node);
34
35 bl_free_device(dev);
36 kfree(dev);
37}
38
39static int
40nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41{
42 __be32 *p;
43 int i;
44
45 p = xdr_inline_decode(xdr, 4);
46 if (!p)
47 return -EIO;
48 b->type = be32_to_cpup(p++);
49
50 switch (b->type) {
51 case PNFS_BLOCK_VOLUME_SIMPLE:
52 p = xdr_inline_decode(xdr, 4);
53 if (!p)
54 return -EIO;
55 b->simple.nr_sigs = be32_to_cpup(p++);
56 if (!b->simple.nr_sigs) {
57 dprintk("no signature\n");
58 return -EIO;
59 }
60
61 b->simple.len = 4 + 4;
62 for (i = 0; i < b->simple.nr_sigs; i++) {
63 p = xdr_inline_decode(xdr, 8 + 4);
64 if (!p)
65 return -EIO;
66 p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67 b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68
69 p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
70 if (!p)
71 return -EIO;
72 memcpy(&b->simple.sigs[i].sig, p,
73 b->simple.sigs[i].sig_len);
74
75 b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
76 }
77 break;
78 case PNFS_BLOCK_VOLUME_SLICE:
79 p = xdr_inline_decode(xdr, 8 + 8 + 4);
80 if (!p)
81 return -EIO;
82 p = xdr_decode_hyper(p, &b->slice.start);
83 p = xdr_decode_hyper(p, &b->slice.len);
84 b->slice.volume = be32_to_cpup(p++);
85 break;
86 case PNFS_BLOCK_VOLUME_CONCAT:
87 p = xdr_inline_decode(xdr, 4);
88 if (!p)
89 return -EIO;
90 b->concat.volumes_count = be32_to_cpup(p++);
91
92 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
93 if (!p)
94 return -EIO;
95 for (i = 0; i < b->concat.volumes_count; i++)
96 b->concat.volumes[i] = be32_to_cpup(p++);
97 break;
98 case PNFS_BLOCK_VOLUME_STRIPE:
99 p = xdr_inline_decode(xdr, 8 + 4);
100 if (!p)
101 return -EIO;
102 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
103 b->stripe.volumes_count = be32_to_cpup(p++);
104
105 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
106 if (!p)
107 return -EIO;
108 for (i = 0; i < b->stripe.volumes_count; i++)
109 b->stripe.volumes[i] = be32_to_cpup(p++);
110 break;
111 default:
112 dprintk("unknown volume type!\n");
113 return -EIO;
114 }
115
116 return 0;
117}
118
119static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
120 struct pnfs_block_dev_map *map)
121{
122 map->start = dev->start;
123 map->len = dev->len;
124 map->disk_offset = dev->disk_offset;
125 map->bdev = dev->bdev;
126 return true;
127}
128
129static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
130 struct pnfs_block_dev_map *map)
131{
132 int i;
133
134 for (i = 0; i < dev->nr_children; i++) {
135 struct pnfs_block_dev *child = &dev->children[i];
136
137 if (child->start > offset ||
138 child->start + child->len <= offset)
139 continue;
140
141 child->map(child, offset - child->start, map);
142 return true;
143 }
144
145 dprintk("%s: ran off loop!\n", __func__);
146 return false;
147}
148
149static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
150 struct pnfs_block_dev_map *map)
151{
152 struct pnfs_block_dev *child;
153 u64 chunk = (offset / dev->chunk_size);
154 int chunk_idx = chunk % dev->nr_children;
155 u64 disk_offset;
156
157 if (chunk_idx > dev->nr_children) {
158 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
159 __func__, chunk_idx, offset, dev->chunk_size);
160 /* error, should not happen */
161 return false;
162 }
163
164 /* truncate offset to the beginning of the stripe */
165 offset = chunk * dev->chunk_size;
166
167 /* disk offset of the stripe */
168 disk_offset = offset / dev->nr_children;
169
170 child = &dev->children[chunk_idx];
171 child->map(child, disk_offset, map);
172
173 map->start += offset;
174 map->disk_offset += disk_offset;
175 map->len = dev->chunk_size;
176 return true;
177}
178
179static int
180bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
181 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
182
183
184static int
185bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
186 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
187{
188 struct pnfs_block_volume *v = &volumes[idx];
189 dev_t dev;
190
191 dev = bl_resolve_deviceid(server, v, gfp_mask);
192 if (!dev)
193 return -EIO;
194
195 d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
196 if (IS_ERR(d->bdev)) {
197 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
198 MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
199 return PTR_ERR(d->bdev);
200 }
201
202
203 d->len = i_size_read(d->bdev->bd_inode);
204 d->map = bl_map_simple;
205
206 printk(KERN_INFO "pNFS: using block device %s\n",
207 d->bdev->bd_disk->disk_name);
208 return 0;
209}
210
211static int
212bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
213 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
214{
215 struct pnfs_block_volume *v = &volumes[idx];
216 int ret;
217
218 ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
219 if (ret)
220 return ret;
221
222 d->disk_offset = v->slice.start;
223 d->len = v->slice.len;
224 return 0;
225}
226
227static int
228bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
229 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
230{
231 struct pnfs_block_volume *v = &volumes[idx];
232 u64 len = 0;
233 int ret, i;
234
235 d->children = kcalloc(v->concat.volumes_count,
236 sizeof(struct pnfs_block_dev), GFP_KERNEL);
237 if (!d->children)
238 return -ENOMEM;
239
240 for (i = 0; i < v->concat.volumes_count; i++) {
241 ret = bl_parse_deviceid(server, &d->children[i],
242 volumes, v->concat.volumes[i], gfp_mask);
243 if (ret)
244 return ret;
245
246 d->nr_children++;
247 d->children[i].start += len;
248 len += d->children[i].len;
249 }
250
251 d->len = len;
252 d->map = bl_map_concat;
253 return 0;
254}
255
256static int
257bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
258 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
259{
260 struct pnfs_block_volume *v = &volumes[idx];
261 u64 len = 0;
262 int ret, i;
263
264 d->children = kcalloc(v->stripe.volumes_count,
265 sizeof(struct pnfs_block_dev), GFP_KERNEL);
266 if (!d->children)
267 return -ENOMEM;
268
269 for (i = 0; i < v->stripe.volumes_count; i++) {
270 ret = bl_parse_deviceid(server, &d->children[i],
271 volumes, v->stripe.volumes[i], gfp_mask);
272 if (ret)
273 return ret;
274
275 d->nr_children++;
276 len += d->children[i].len;
277 }
278
279 d->len = len;
280 d->chunk_size = v->stripe.chunk_size;
281 d->map = bl_map_stripe;
282 return 0;
283}
284
285static int
286bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
287 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
288{
289 switch (volumes[idx].type) {
290 case PNFS_BLOCK_VOLUME_SIMPLE:
291 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
292 case PNFS_BLOCK_VOLUME_SLICE:
293 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
294 case PNFS_BLOCK_VOLUME_CONCAT:
295 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
296 case PNFS_BLOCK_VOLUME_STRIPE:
297 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
298 default:
299 dprintk("unsupported volume type: %d\n", volumes[idx].type);
300 return -EIO;
301 }
302}
303
304struct nfs4_deviceid_node *
305bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
306 gfp_t gfp_mask)
307{
308 struct nfs4_deviceid_node *node = NULL;
309 struct pnfs_block_volume *volumes;
310 struct pnfs_block_dev *top;
311 struct xdr_stream xdr;
312 struct xdr_buf buf;
313 struct page *scratch;
314 int nr_volumes, ret, i;
315 __be32 *p;
316
317 scratch = alloc_page(gfp_mask);
318 if (!scratch)
319 goto out;
320
321 xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
322 xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
323
324 p = xdr_inline_decode(&xdr, sizeof(__be32));
325 if (!p)
326 goto out_free_scratch;
327 nr_volumes = be32_to_cpup(p++);
328
329 volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
330 gfp_mask);
331 if (!volumes)
332 goto out_free_scratch;
333
334 for (i = 0; i < nr_volumes; i++) {
335 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
336 if (ret < 0)
337 goto out_free_volumes;
338 }
339
340 top = kzalloc(sizeof(*top), gfp_mask);
341 if (!top)
342 goto out_free_volumes;
343
344 ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
345 if (ret) {
346 bl_free_device(top);
347 kfree(top);
348 goto out_free_volumes;
349 }
350
351 node = &top->node;
352 nfs4_init_deviceid_node(node, server, &pdev->dev_id);
353
354out_free_volumes:
355 kfree(volumes);
356out_free_scratch:
357 __free_page(scratch);
358out:
359 return node;
360}
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index bfb04861eb61..8d04bda2bd2e 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -34,94 +34,53 @@
34 34
35#define NFSDBG_FACILITY NFSDBG_PNFS_LD 35#define NFSDBG_FACILITY NFSDBG_PNFS_LD
36 36
37static void bl_dm_remove(struct net *net, dev_t dev) 37static void
38nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b)
38{ 39{
39 struct bl_pipe_msg bl_pipe_msg; 40 int i;
40 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; 41
41 struct bl_dev_msg bl_umount_request; 42 *p++ = cpu_to_be32(1);
42 struct bl_msg_hdr bl_msg = { 43 *p++ = cpu_to_be32(b->type);
43 .type = BL_DEVICE_UMOUNT, 44 *p++ = cpu_to_be32(b->simple.nr_sigs);
44 .totallen = sizeof(bl_umount_request), 45 for (i = 0; i < b->simple.nr_sigs; i++) {
45 }; 46 p = xdr_encode_hyper(p, b->simple.sigs[i].offset);
46 uint8_t *dataptr; 47 p = xdr_encode_opaque(p, b->simple.sigs[i].sig,
47 DECLARE_WAITQUEUE(wq, current); 48 b->simple.sigs[i].sig_len);
48 struct nfs_net *nn = net_generic(net, nfs_net_id);
49
50 dprintk("Entering %s\n", __func__);
51
52 bl_pipe_msg.bl_wq = &nn->bl_wq;
53 memset(msg, 0, sizeof(*msg));
54 msg->len = sizeof(bl_msg) + bl_msg.totallen;
55 msg->data = kzalloc(msg->len, GFP_NOFS);
56 if (!msg->data)
57 goto out;
58
59 memset(&bl_umount_request, 0, sizeof(bl_umount_request));
60 bl_umount_request.major = MAJOR(dev);
61 bl_umount_request.minor = MINOR(dev);
62
63 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
64 dataptr = (uint8_t *) msg->data;
65 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
66
67 add_wait_queue(&nn->bl_wq, &wq);
68 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
69 remove_wait_queue(&nn->bl_wq, &wq);
70 goto out;
71 } 49 }
72
73 set_current_state(TASK_UNINTERRUPTIBLE);
74 schedule();
75 __set_current_state(TASK_RUNNING);
76 remove_wait_queue(&nn->bl_wq, &wq);
77
78out:
79 kfree(msg->data);
80} 50}
81 51
82/* 52dev_t
83 * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. 53bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
84 */
85struct nfs4_deviceid_node *
86bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev,
87 gfp_t gfp_mask) 54 gfp_t gfp_mask)
88{ 55{
89 struct pnfs_block_dev *rv;
90 struct block_device *bd;
91 struct bl_pipe_msg bl_pipe_msg;
92 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
93 struct bl_msg_hdr bl_msg = {
94 .type = BL_DEVICE_MOUNT,
95 .totallen = dev->mincount,
96 };
97 uint8_t *dataptr;
98 DECLARE_WAITQUEUE(wq, current);
99 int offset, len, i, rc;
100 struct net *net = server->nfs_client->cl_net; 56 struct net *net = server->nfs_client->cl_net;
101 struct nfs_net *nn = net_generic(net, nfs_net_id); 57 struct nfs_net *nn = net_generic(net, nfs_net_id);
102 struct bl_dev_msg *reply = &nn->bl_mount_reply; 58 struct bl_dev_msg *reply = &nn->bl_mount_reply;
59 struct bl_pipe_msg bl_pipe_msg;
60 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
61 struct bl_msg_hdr *bl_msg;
62 DECLARE_WAITQUEUE(wq, current);
63 dev_t dev = 0;
64 int rc;
103 65
104 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); 66 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
105 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
106 dev->mincount);
107 67
108 bl_pipe_msg.bl_wq = &nn->bl_wq; 68 bl_pipe_msg.bl_wq = &nn->bl_wq;
69
70 b->simple.len += 4; /* single volume */
71 if (b->simple.len > PAGE_SIZE)
72 return -EIO;
73
109 memset(msg, 0, sizeof(*msg)); 74 memset(msg, 0, sizeof(*msg));
110 msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, gfp_mask); 75 msg->len = sizeof(*bl_msg) + b->simple.len;
76 msg->data = kzalloc(msg->len, gfp_mask);
111 if (!msg->data) 77 if (!msg->data)
112 goto out; 78 goto out;
113 79
114 memcpy(msg->data, &bl_msg, sizeof(bl_msg)); 80 bl_msg = msg->data;
115 dataptr = (uint8_t *) msg->data; 81 bl_msg->type = BL_DEVICE_MOUNT,
116 len = dev->mincount; 82 bl_msg->totallen = b->simple.len;
117 offset = sizeof(bl_msg); 83 nfs4_encode_simple(msg->data + sizeof(*bl_msg), b);
118 for (i = 0; len > 0; i++) {
119 memcpy(&dataptr[offset], page_address(dev->pages[i]),
120 len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE);
121 len -= PAGE_CACHE_SIZE;
122 offset += PAGE_CACHE_SIZE;
123 }
124 msg->len = sizeof(bl_msg) + dev->mincount;
125 84
126 dprintk("%s CALLING USERSPACE DAEMON\n", __func__); 85 dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
127 add_wait_queue(&nn->bl_wq, &wq); 86 add_wait_queue(&nn->bl_wq, &wq);
@@ -142,46 +101,10 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev,
142 goto out; 101 goto out;
143 } 102 }
144 103
145 bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), 104 dev = MKDEV(reply->major, reply->minor);
146 FMODE_READ, NULL);
147 if (IS_ERR(bd)) {
148 printk(KERN_WARNING "%s failed to open device %d:%d (%ld)\n",
149 __func__, reply->major, reply->minor,
150 PTR_ERR(bd));
151 goto out;
152 }
153
154 rv = kzalloc(sizeof(*rv), gfp_mask);
155 if (!rv)
156 goto out;
157
158 nfs4_init_deviceid_node(&rv->d_node, server, &dev->dev_id);
159 rv->d_bdev = bd;
160
161 dprintk("%s Created device %s with bd_block_size %u\n",
162 __func__,
163 bd->bd_disk->disk_name,
164 bd->bd_block_size);
165
166 kfree(msg->data);
167 return &rv->d_node;
168
169out: 105out:
170 kfree(msg->data); 106 kfree(msg->data);
171 return NULL; 107 return dev;
172}
173
174void
175bl_free_deviceid_node(struct nfs4_deviceid_node *d)
176{
177 struct pnfs_block_dev *dev =
178 container_of(d, struct pnfs_block_dev, d_node);
179 struct net *net = d->nfs_client->cl_net;
180
181 blkdev_put(dev->d_bdev, FMODE_READ);
182 bl_dm_remove(net, dev->d_bdev->bd_dev);
183
184 kfree(dev);
185} 108}
186 109
187static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, 110static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,