aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/blocklayout/Makefile2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c92
-rw-r--r--fs/nfs/blocklayout/blocklayout.h83
-rw-r--r--fs/nfs/blocklayout/dev.c360
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c141
5 files changed, 530 insertions, 148 deletions
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile
index e177026e0119..3ca14c36d08b 100644
--- a/fs/nfs/blocklayout/Makefile
+++ b/fs/nfs/blocklayout/Makefile
@@ -3,4 +3,4 @@
3# 3#
4obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o 4obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
5 5
6blocklayoutdriver-y += blocklayout.o extent_tree.o rpc_pipefs.o 6blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65a6b19b17a2..c41a718854e3 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -114,13 +114,10 @@ bl_submit_bio(int rw, struct bio *bio)
114 return NULL; 114 return NULL;
115} 115}
116 116
117static struct bio *bl_alloc_init_bio(int npg, sector_t isect, 117static struct bio *
118 struct pnfs_block_extent *be, 118bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
119 void (*end_io)(struct bio *, int err), 119 void (*end_io)(struct bio *, int err), struct parallel_io *par)
120 struct parallel_io *par)
121{ 120{
122 struct pnfs_block_dev *dev =
123 container_of(be->be_device, struct pnfs_block_dev, d_node);
124 struct bio *bio; 121 struct bio *bio;
125 122
126 npg = min(npg, BIO_MAX_PAGES); 123 npg = min(npg, BIO_MAX_PAGES);
@@ -131,32 +128,55 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
131 } 128 }
132 129
133 if (bio) { 130 if (bio) {
134 bio->bi_iter.bi_sector = isect - be->be_f_offset + 131 bio->bi_iter.bi_sector = disk_sector;
135 be->be_v_offset; 132 bio->bi_bdev = bdev;
136 bio->bi_bdev = dev->d_bdev;
137 bio->bi_end_io = end_io; 133 bio->bi_end_io = end_io;
138 bio->bi_private = par; 134 bio->bi_private = par;
139 } 135 }
140 return bio; 136 return bio;
141} 137}
142 138
143static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, 139static struct bio *
144 sector_t isect, struct page *page, 140do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
145 struct pnfs_block_extent *be, 141 struct page *page, struct pnfs_block_dev_map *map,
146 void (*end_io)(struct bio *, int err), 142 struct pnfs_block_extent *be,
147 struct parallel_io *par, 143 void (*end_io)(struct bio *, int err),
148 unsigned int offset, int len) 144 struct parallel_io *par, unsigned int offset, int *len)
149{ 145{
150 isect = isect + (offset >> SECTOR_SHIFT); 146 struct pnfs_block_dev *dev =
147 container_of(be->be_device, struct pnfs_block_dev, node);
148 u64 disk_addr, end;
149
151 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, 150 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
152 npg, rw, (unsigned long long)isect, offset, len); 151 npg, rw, (unsigned long long)isect, offset, *len);
152
153 /* translate to device offset */
154 isect += be->be_v_offset;
155 isect -= be->be_f_offset;
156
157 /* translate to physical disk offset */
158 disk_addr = (u64)isect << SECTOR_SHIFT;
159 if (disk_addr < map->start || disk_addr >= map->start + map->len) {
160 if (!dev->map(dev, disk_addr, map))
161 return ERR_PTR(-EIO);
162 bio = bl_submit_bio(rw, bio);
163 }
164 disk_addr += map->disk_offset;
165 disk_addr -= map->start;
166
167 /* limit length to what the device mapping allows */
168 end = disk_addr + *len;
169 if (end >= map->start + map->len)
170 *len = map->start + map->len - disk_addr;
171
153retry: 172retry:
154 if (!bio) { 173 if (!bio) {
155 bio = bl_alloc_init_bio(npg, isect, be, end_io, par); 174 bio = bl_alloc_init_bio(npg, map->bdev,
175 disk_addr >> SECTOR_SHIFT, end_io, par);
156 if (!bio) 176 if (!bio)
157 return ERR_PTR(-ENOMEM); 177 return ERR_PTR(-ENOMEM);
158 } 178 }
159 if (bio_add_page(bio, page, len, offset) < len) { 179 if (bio_add_page(bio, page, *len, offset) < *len) {
160 bio = bl_submit_bio(rw, bio); 180 bio = bl_submit_bio(rw, bio);
161 goto retry; 181 goto retry;
162 } 182 }
@@ -203,6 +223,7 @@ static enum pnfs_try_status
203bl_read_pagelist(struct nfs_pgio_header *header) 223bl_read_pagelist(struct nfs_pgio_header *header)
204{ 224{
205 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 225 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
226 struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
206 struct bio *bio = NULL; 227 struct bio *bio = NULL;
207 struct pnfs_block_extent be; 228 struct pnfs_block_extent be;
208 sector_t isect, extent_length = 0; 229 sector_t isect, extent_length = 0;
@@ -248,28 +269,29 @@ bl_read_pagelist(struct nfs_pgio_header *header)
248 pg_len = PAGE_CACHE_SIZE - pg_offset; 269 pg_len = PAGE_CACHE_SIZE - pg_offset;
249 else 270 else
250 pg_len = bytes_left; 271 pg_len = bytes_left;
251
252 f_offset += pg_len;
253 bytes_left -= pg_len;
254 isect += (pg_offset >> SECTOR_SHIFT);
255 extent_length -= (pg_offset >> SECTOR_SHIFT);
256 } else { 272 } else {
257 BUG_ON(pg_offset != 0); 273 BUG_ON(pg_offset != 0);
258 pg_len = PAGE_CACHE_SIZE; 274 pg_len = PAGE_CACHE_SIZE;
259 } 275 }
260 276
277 isect += (pg_offset >> SECTOR_SHIFT);
278 extent_length -= (pg_offset >> SECTOR_SHIFT);
279
261 if (is_hole(&be)) { 280 if (is_hole(&be)) {
262 bio = bl_submit_bio(READ, bio); 281 bio = bl_submit_bio(READ, bio);
263 /* Fill hole w/ zeroes w/o accessing device */ 282 /* Fill hole w/ zeroes w/o accessing device */
264 dprintk("%s Zeroing page for hole\n", __func__); 283 dprintk("%s Zeroing page for hole\n", __func__);
265 zero_user_segment(pages[i], pg_offset, pg_len); 284 zero_user_segment(pages[i], pg_offset, pg_len);
285
286 /* invalidate map */
287 map.start = NFS4_MAX_UINT64;
266 } else { 288 } else {
267 bio = do_add_page_to_bio(bio, 289 bio = do_add_page_to_bio(bio,
268 header->page_array.npages - i, 290 header->page_array.npages - i,
269 READ, 291 READ,
270 isect, pages[i], &be, 292 isect, pages[i], &map, &be,
271 bl_end_io_read, par, 293 bl_end_io_read, par,
272 pg_offset, pg_len); 294 pg_offset, &pg_len);
273 if (IS_ERR(bio)) { 295 if (IS_ERR(bio)) {
274 header->pnfs_error = PTR_ERR(bio); 296 header->pnfs_error = PTR_ERR(bio);
275 bio = NULL; 297 bio = NULL;
@@ -278,6 +300,8 @@ bl_read_pagelist(struct nfs_pgio_header *header)
278 } 300 }
279 isect += (pg_len >> SECTOR_SHIFT); 301 isect += (pg_len >> SECTOR_SHIFT);
280 extent_length -= (pg_len >> SECTOR_SHIFT); 302 extent_length -= (pg_len >> SECTOR_SHIFT);
303 f_offset += pg_len;
304 bytes_left -= pg_len;
281 } 305 }
282 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 306 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
283 header->res.eof = 1; 307 header->res.eof = 1;
@@ -346,6 +370,7 @@ static enum pnfs_try_status
346bl_write_pagelist(struct nfs_pgio_header *header, int sync) 370bl_write_pagelist(struct nfs_pgio_header *header, int sync)
347{ 371{
348 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); 372 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
373 struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
349 struct bio *bio = NULL; 374 struct bio *bio = NULL;
350 struct pnfs_block_extent be; 375 struct pnfs_block_extent be;
351 sector_t isect, extent_length = 0; 376 sector_t isect, extent_length = 0;
@@ -354,6 +379,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
354 size_t count = header->args.count; 379 size_t count = header->args.count;
355 struct page **pages = header->args.pages; 380 struct page **pages = header->args.pages;
356 int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; 381 int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
382 unsigned int pg_len;
357 struct blk_plug plug; 383 struct blk_plug plug;
358 int i; 384 int i;
359 385
@@ -387,19 +413,21 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
387 extent_length = be.be_length - (isect - be.be_f_offset); 413 extent_length = be.be_length - (isect - be.be_f_offset);
388 } 414 }
389 415
416 pg_len = PAGE_CACHE_SIZE;
390 bio = do_add_page_to_bio(bio, header->page_array.npages - i, 417 bio = do_add_page_to_bio(bio, header->page_array.npages - i,
391 WRITE, isect, pages[i], &be, 418 WRITE, isect, pages[i], &map, &be,
392 bl_end_io_write, par, 419 bl_end_io_write, par,
393 0, PAGE_CACHE_SIZE); 420 0, &pg_len);
394 if (IS_ERR(bio)) { 421 if (IS_ERR(bio)) {
395 header->pnfs_error = PTR_ERR(bio); 422 header->pnfs_error = PTR_ERR(bio);
396 bio = NULL; 423 bio = NULL;
397 goto out; 424 goto out;
398 } 425 }
399 offset += PAGE_CACHE_SIZE; 426
400 count -= PAGE_CACHE_SIZE; 427 offset += pg_len;
401 isect += PAGE_CACHE_SECTORS; 428 count -= pg_len;
402 extent_length -= PAGE_CACHE_SECTORS; 429 isect += (pg_len >> SECTOR_SHIFT);
430 extent_length -= (pg_len >> SECTOR_SHIFT);
403 } 431 }
404 432
405 header->res.count = header->args.count; 433 header->res.count = header->args.count;
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index c98d98a62664..92dca9e90d8d 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -44,9 +44,77 @@
44#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) 44#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
45#define SECTOR_SIZE (1 << SECTOR_SHIFT) 45#define SECTOR_SIZE (1 << SECTOR_SHIFT)
46 46
47struct pnfs_block_dev;
48
49enum pnfs_block_volume_type {
50 PNFS_BLOCK_VOLUME_SIMPLE = 0,
51 PNFS_BLOCK_VOLUME_SLICE = 1,
52 PNFS_BLOCK_VOLUME_CONCAT = 2,
53 PNFS_BLOCK_VOLUME_STRIPE = 3,
54};
55
56#define PNFS_BLOCK_MAX_UUIDS 4
57#define PNFS_BLOCK_MAX_DEVICES 64
58
59/*
60 * Random upper cap for the uuid length to avoid unbounded allocation.
61 * Not actually limited by the protocol.
62 */
63#define PNFS_BLOCK_UUID_LEN 128
64
65
66struct pnfs_block_volume {
67 enum pnfs_block_volume_type type;
68 union {
69 struct {
70 int len;
71 int nr_sigs;
72 struct {
73 u64 offset;
74 u32 sig_len;
75 u8 sig[PNFS_BLOCK_UUID_LEN];
76 } sigs[PNFS_BLOCK_MAX_UUIDS];
77 } simple;
78 struct {
79 u64 start;
80 u64 len;
81 u32 volume;
82 } slice;
83 struct {
84 u32 volumes_count;
85 u32 volumes[PNFS_BLOCK_MAX_DEVICES];
86 } concat;
87 struct {
88 u64 chunk_size;
89 u32 volumes_count;
90 u32 volumes[PNFS_BLOCK_MAX_DEVICES];
91 } stripe;
92 };
93};
94
95struct pnfs_block_dev_map {
96 sector_t start;
97 sector_t len;
98
99 sector_t disk_offset;
100 struct block_device *bdev;
101};
102
47struct pnfs_block_dev { 103struct pnfs_block_dev {
48 struct nfs4_deviceid_node d_node; 104 struct nfs4_deviceid_node node;
49 struct block_device *d_bdev; 105
106 u64 start;
107 u64 len;
108
109 u32 nr_children;
110 struct pnfs_block_dev *children;
111 u64 chunk_size;
112
113 struct block_device *bdev;
114 u64 disk_offset;
115
116 bool (*map)(struct pnfs_block_dev *dev, u64 offset,
117 struct pnfs_block_dev_map *map);
50}; 118};
51 119
52enum exstate4 { 120enum exstate4 {
@@ -110,6 +178,11 @@ struct bl_msg_hdr {
110#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ 178#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */
111#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ 179#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
112 180
181/* dev.c */
182struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
183 struct pnfs_device *pdev, gfp_t gfp_mask);
184void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
185
113/* extent_tree.c */ 186/* extent_tree.c */
114int ext_tree_insert(struct pnfs_block_layout *bl, 187int ext_tree_insert(struct pnfs_block_layout *bl,
115 struct pnfs_block_extent *new); 188 struct pnfs_block_extent *new);
@@ -123,10 +196,8 @@ int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
123void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); 196void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
124 197
125/* rpc_pipefs.c */ 198/* rpc_pipefs.c */
126struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, 199dev_t bl_resolve_deviceid(struct nfs_server *server,
127 struct pnfs_device *pdev, gfp_t gfp_mask); 200 struct pnfs_block_volume *b, gfp_t gfp_mask);
128void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
129
130int __init bl_init_pipefs(void); 201int __init bl_init_pipefs(void);
131void __exit bl_cleanup_pipefs(void); 202void __exit bl_cleanup_pipefs(void);
132 203
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
new file mode 100644
index 000000000000..00f159da06ee
--- /dev/null
+++ b/fs/nfs/blocklayout/dev.c
@@ -0,0 +1,360 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#include <linux/sunrpc/svc.h>
5#include <linux/blkdev.h>
6#include <linux/nfs4.h>
7#include <linux/nfs_fs.h>
8#include <linux/nfs_xdr.h>
9
10#include "blocklayout.h"
11
12#define NFSDBG_FACILITY NFSDBG_PNFS_LD
13
14static void
15bl_free_device(struct pnfs_block_dev *dev)
16{
17 if (dev->nr_children) {
18 int i;
19
20 for (i = 0; i < dev->nr_children; i++)
21 bl_free_device(&dev->children[i]);
22 kfree(dev->children);
23 } else {
24 if (dev->bdev)
25 blkdev_put(dev->bdev, FMODE_READ);
26 }
27}
28
29void
30bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31{
32 struct pnfs_block_dev *dev =
33 container_of(d, struct pnfs_block_dev, node);
34
35 bl_free_device(dev);
36 kfree(dev);
37}
38
39static int
40nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41{
42 __be32 *p;
43 int i;
44
45 p = xdr_inline_decode(xdr, 4);
46 if (!p)
47 return -EIO;
48 b->type = be32_to_cpup(p++);
49
50 switch (b->type) {
51 case PNFS_BLOCK_VOLUME_SIMPLE:
52 p = xdr_inline_decode(xdr, 4);
53 if (!p)
54 return -EIO;
55 b->simple.nr_sigs = be32_to_cpup(p++);
56 if (!b->simple.nr_sigs) {
57 dprintk("no signature\n");
58 return -EIO;
59 }
60
61 b->simple.len = 4 + 4;
62 for (i = 0; i < b->simple.nr_sigs; i++) {
63 p = xdr_inline_decode(xdr, 8 + 4);
64 if (!p)
65 return -EIO;
66 p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67 b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68
69 p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
70 if (!p)
71 return -EIO;
72 memcpy(&b->simple.sigs[i].sig, p,
73 b->simple.sigs[i].sig_len);
74
75 b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
76 }
77 break;
78 case PNFS_BLOCK_VOLUME_SLICE:
79 p = xdr_inline_decode(xdr, 8 + 8 + 4);
80 if (!p)
81 return -EIO;
82 p = xdr_decode_hyper(p, &b->slice.start);
83 p = xdr_decode_hyper(p, &b->slice.len);
84 b->slice.volume = be32_to_cpup(p++);
85 break;
86 case PNFS_BLOCK_VOLUME_CONCAT:
87 p = xdr_inline_decode(xdr, 4);
88 if (!p)
89 return -EIO;
90 b->concat.volumes_count = be32_to_cpup(p++);
91
92 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
93 if (!p)
94 return -EIO;
95 for (i = 0; i < b->concat.volumes_count; i++)
96 b->concat.volumes[i] = be32_to_cpup(p++);
97 break;
98 case PNFS_BLOCK_VOLUME_STRIPE:
99 p = xdr_inline_decode(xdr, 8 + 4);
100 if (!p)
101 return -EIO;
102 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
103 b->stripe.volumes_count = be32_to_cpup(p++);
104
105 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
106 if (!p)
107 return -EIO;
108 for (i = 0; i < b->stripe.volumes_count; i++)
109 b->stripe.volumes[i] = be32_to_cpup(p++);
110 break;
111 default:
112 dprintk("unknown volume type!\n");
113 return -EIO;
114 }
115
116 return 0;
117}
118
119static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
120 struct pnfs_block_dev_map *map)
121{
122 map->start = dev->start;
123 map->len = dev->len;
124 map->disk_offset = dev->disk_offset;
125 map->bdev = dev->bdev;
126 return true;
127}
128
129static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
130 struct pnfs_block_dev_map *map)
131{
132 int i;
133
134 for (i = 0; i < dev->nr_children; i++) {
135 struct pnfs_block_dev *child = &dev->children[i];
136
137 if (child->start > offset ||
138 child->start + child->len <= offset)
139 continue;
140
141 child->map(child, offset - child->start, map);
142 return true;
143 }
144
145 dprintk("%s: ran off loop!\n", __func__);
146 return false;
147}
148
149static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
150 struct pnfs_block_dev_map *map)
151{
152 struct pnfs_block_dev *child;
153 u64 chunk = (offset / dev->chunk_size);
154 int chunk_idx = chunk % dev->nr_children;
155 u64 disk_offset;
156
157 if (chunk_idx > dev->nr_children) {
158 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
159 __func__, chunk_idx, offset, dev->chunk_size);
160 /* error, should not happen */
161 return false;
162 }
163
164 /* truncate offset to the beginning of the stripe */
165 offset = chunk * dev->chunk_size;
166
167 /* disk offset of the stripe */
168 disk_offset = offset / dev->nr_children;
169
170 child = &dev->children[chunk_idx];
171 child->map(child, disk_offset, map);
172
173 map->start += offset;
174 map->disk_offset += disk_offset;
175 map->len = dev->chunk_size;
176 return true;
177}
178
179static int
180bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
181 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
182
183
184static int
185bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
186 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
187{
188 struct pnfs_block_volume *v = &volumes[idx];
189 dev_t dev;
190
191 dev = bl_resolve_deviceid(server, v, gfp_mask);
192 if (!dev)
193 return -EIO;
194
195 d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
196 if (IS_ERR(d->bdev)) {
197 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
198 MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
199 return PTR_ERR(d->bdev);
200 }
201
202
203 d->len = i_size_read(d->bdev->bd_inode);
204 d->map = bl_map_simple;
205
206 printk(KERN_INFO "pNFS: using block device %s\n",
207 d->bdev->bd_disk->disk_name);
208 return 0;
209}
210
211static int
212bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
213 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
214{
215 struct pnfs_block_volume *v = &volumes[idx];
216 int ret;
217
218 ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
219 if (ret)
220 return ret;
221
222 d->disk_offset = v->slice.start;
223 d->len = v->slice.len;
224 return 0;
225}
226
227static int
228bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
229 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
230{
231 struct pnfs_block_volume *v = &volumes[idx];
232 u64 len = 0;
233 int ret, i;
234
235 d->children = kcalloc(v->concat.volumes_count,
236 sizeof(struct pnfs_block_dev), GFP_KERNEL);
237 if (!d->children)
238 return -ENOMEM;
239
240 for (i = 0; i < v->concat.volumes_count; i++) {
241 ret = bl_parse_deviceid(server, &d->children[i],
242 volumes, v->concat.volumes[i], gfp_mask);
243 if (ret)
244 return ret;
245
246 d->nr_children++;
247 d->children[i].start += len;
248 len += d->children[i].len;
249 }
250
251 d->len = len;
252 d->map = bl_map_concat;
253 return 0;
254}
255
256static int
257bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
258 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
259{
260 struct pnfs_block_volume *v = &volumes[idx];
261 u64 len = 0;
262 int ret, i;
263
264 d->children = kcalloc(v->stripe.volumes_count,
265 sizeof(struct pnfs_block_dev), GFP_KERNEL);
266 if (!d->children)
267 return -ENOMEM;
268
269 for (i = 0; i < v->stripe.volumes_count; i++) {
270 ret = bl_parse_deviceid(server, &d->children[i],
271 volumes, v->stripe.volumes[i], gfp_mask);
272 if (ret)
273 return ret;
274
275 d->nr_children++;
276 len += d->children[i].len;
277 }
278
279 d->len = len;
280 d->chunk_size = v->stripe.chunk_size;
281 d->map = bl_map_stripe;
282 return 0;
283}
284
285static int
286bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
287 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
288{
289 switch (volumes[idx].type) {
290 case PNFS_BLOCK_VOLUME_SIMPLE:
291 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
292 case PNFS_BLOCK_VOLUME_SLICE:
293 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
294 case PNFS_BLOCK_VOLUME_CONCAT:
295 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
296 case PNFS_BLOCK_VOLUME_STRIPE:
297 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
298 default:
299 dprintk("unsupported volume type: %d\n", volumes[idx].type);
300 return -EIO;
301 }
302}
303
304struct nfs4_deviceid_node *
305bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
306 gfp_t gfp_mask)
307{
308 struct nfs4_deviceid_node *node = NULL;
309 struct pnfs_block_volume *volumes;
310 struct pnfs_block_dev *top;
311 struct xdr_stream xdr;
312 struct xdr_buf buf;
313 struct page *scratch;
314 int nr_volumes, ret, i;
315 __be32 *p;
316
317 scratch = alloc_page(gfp_mask);
318 if (!scratch)
319 goto out;
320
321 xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
322 xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
323
324 p = xdr_inline_decode(&xdr, sizeof(__be32));
325 if (!p)
326 goto out_free_scratch;
327 nr_volumes = be32_to_cpup(p++);
328
329 volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
330 gfp_mask);
331 if (!volumes)
332 goto out_free_scratch;
333
334 for (i = 0; i < nr_volumes; i++) {
335 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
336 if (ret < 0)
337 goto out_free_volumes;
338 }
339
340 top = kzalloc(sizeof(*top), gfp_mask);
341 if (!top)
342 goto out_free_volumes;
343
344 ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
345 if (ret) {
346 bl_free_device(top);
347 kfree(top);
348 goto out_free_volumes;
349 }
350
351 node = &top->node;
352 nfs4_init_deviceid_node(node, server, &pdev->dev_id);
353
354out_free_volumes:
355 kfree(volumes);
356out_free_scratch:
357 __free_page(scratch);
358out:
359 return node;
360}
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index bfb04861eb61..8d04bda2bd2e 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -34,94 +34,53 @@
34 34
35#define NFSDBG_FACILITY NFSDBG_PNFS_LD 35#define NFSDBG_FACILITY NFSDBG_PNFS_LD
36 36
37static void bl_dm_remove(struct net *net, dev_t dev) 37static void
38nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b)
38{ 39{
39 struct bl_pipe_msg bl_pipe_msg; 40 int i;
40 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; 41
41 struct bl_dev_msg bl_umount_request; 42 *p++ = cpu_to_be32(1);
42 struct bl_msg_hdr bl_msg = { 43 *p++ = cpu_to_be32(b->type);
43 .type = BL_DEVICE_UMOUNT, 44 *p++ = cpu_to_be32(b->simple.nr_sigs);
44 .totallen = sizeof(bl_umount_request), 45 for (i = 0; i < b->simple.nr_sigs; i++) {
45 }; 46 p = xdr_encode_hyper(p, b->simple.sigs[i].offset);
46 uint8_t *dataptr; 47 p = xdr_encode_opaque(p, b->simple.sigs[i].sig,
47 DECLARE_WAITQUEUE(wq, current); 48 b->simple.sigs[i].sig_len);
48 struct nfs_net *nn = net_generic(net, nfs_net_id);
49
50 dprintk("Entering %s\n", __func__);
51
52 bl_pipe_msg.bl_wq = &nn->bl_wq;
53 memset(msg, 0, sizeof(*msg));
54 msg->len = sizeof(bl_msg) + bl_msg.totallen;
55 msg->data = kzalloc(msg->len, GFP_NOFS);
56 if (!msg->data)
57 goto out;
58
59 memset(&bl_umount_request, 0, sizeof(bl_umount_request));
60 bl_umount_request.major = MAJOR(dev);
61 bl_umount_request.minor = MINOR(dev);
62
63 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
64 dataptr = (uint8_t *) msg->data;
65 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
66
67 add_wait_queue(&nn->bl_wq, &wq);
68 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
69 remove_wait_queue(&nn->bl_wq, &wq);
70 goto out;
71 } 49 }
72
73 set_current_state(TASK_UNINTERRUPTIBLE);
74 schedule();
75 __set_current_state(TASK_RUNNING);
76 remove_wait_queue(&nn->bl_wq, &wq);
77
78out:
79 kfree(msg->data);
80} 50}
81 51
82/* 52dev_t
83 * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. 53bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
84 */
85struct nfs4_deviceid_node *
86bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev,
87 gfp_t gfp_mask) 54 gfp_t gfp_mask)
88{ 55{
89 struct pnfs_block_dev *rv;
90 struct block_device *bd;
91 struct bl_pipe_msg bl_pipe_msg;
92 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
93 struct bl_msg_hdr bl_msg = {
94 .type = BL_DEVICE_MOUNT,
95 .totallen = dev->mincount,
96 };
97 uint8_t *dataptr;
98 DECLARE_WAITQUEUE(wq, current);
99 int offset, len, i, rc;
100 struct net *net = server->nfs_client->cl_net; 56 struct net *net = server->nfs_client->cl_net;
101 struct nfs_net *nn = net_generic(net, nfs_net_id); 57 struct nfs_net *nn = net_generic(net, nfs_net_id);
102 struct bl_dev_msg *reply = &nn->bl_mount_reply; 58 struct bl_dev_msg *reply = &nn->bl_mount_reply;
59 struct bl_pipe_msg bl_pipe_msg;
60 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
61 struct bl_msg_hdr *bl_msg;
62 DECLARE_WAITQUEUE(wq, current);
63 dev_t dev = 0;
64 int rc;
103 65
104 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); 66 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
105 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
106 dev->mincount);
107 67
108 bl_pipe_msg.bl_wq = &nn->bl_wq; 68 bl_pipe_msg.bl_wq = &nn->bl_wq;
69
70 b->simple.len += 4; /* single volume */
71 if (b->simple.len > PAGE_SIZE)
72 return -EIO;
73
109 memset(msg, 0, sizeof(*msg)); 74 memset(msg, 0, sizeof(*msg));
110 msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, gfp_mask); 75 msg->len = sizeof(*bl_msg) + b->simple.len;
76 msg->data = kzalloc(msg->len, gfp_mask);
111 if (!msg->data) 77 if (!msg->data)
112 goto out; 78 goto out;
113 79
114 memcpy(msg->data, &bl_msg, sizeof(bl_msg)); 80 bl_msg = msg->data;
115 dataptr = (uint8_t *) msg->data; 81 bl_msg->type = BL_DEVICE_MOUNT,
116 len = dev->mincount; 82 bl_msg->totallen = b->simple.len;
117 offset = sizeof(bl_msg); 83 nfs4_encode_simple(msg->data + sizeof(*bl_msg), b);
118 for (i = 0; len > 0; i++) {
119 memcpy(&dataptr[offset], page_address(dev->pages[i]),
120 len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE);
121 len -= PAGE_CACHE_SIZE;
122 offset += PAGE_CACHE_SIZE;
123 }
124 msg->len = sizeof(bl_msg) + dev->mincount;
125 84
126 dprintk("%s CALLING USERSPACE DAEMON\n", __func__); 85 dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
127 add_wait_queue(&nn->bl_wq, &wq); 86 add_wait_queue(&nn->bl_wq, &wq);
@@ -142,46 +101,10 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev,
142 goto out; 101 goto out;
143 } 102 }
144 103
145 bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), 104 dev = MKDEV(reply->major, reply->minor);
146 FMODE_READ, NULL);
147 if (IS_ERR(bd)) {
148 printk(KERN_WARNING "%s failed to open device %d:%d (%ld)\n",
149 __func__, reply->major, reply->minor,
150 PTR_ERR(bd));
151 goto out;
152 }
153
154 rv = kzalloc(sizeof(*rv), gfp_mask);
155 if (!rv)
156 goto out;
157
158 nfs4_init_deviceid_node(&rv->d_node, server, &dev->dev_id);
159 rv->d_bdev = bd;
160
161 dprintk("%s Created device %s with bd_block_size %u\n",
162 __func__,
163 bd->bd_disk->disk_name,
164 bd->bd_block_size);
165
166 kfree(msg->data);
167 return &rv->d_node;
168
169out: 105out:
170 kfree(msg->data); 106 kfree(msg->data);
171 return NULL; 107 return dev;
172}
173
174void
175bl_free_deviceid_node(struct nfs4_deviceid_node *d)
176{
177 struct pnfs_block_dev *dev =
178 container_of(d, struct pnfs_block_dev, d_node);
179 struct net *net = d->nfs_client->cl_net;
180
181 blkdev_put(dev->d_bdev, FMODE_READ);
182 bl_dm_remove(net, dev->d_bdev->bd_dev);
183
184 kfree(dev);
185} 108}
186 109
187static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, 110static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,