diff options
author | Fred Isaman <iisaman@netapp.com> | 2011-03-03 10:13:47 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2011-03-11 15:38:44 -0500 |
commit | a69aef1496726ed88386dad65abfcc8cd3195304 (patch) | |
tree | 66766c9eed54e6dc7ddf9ecaf8765a7bc833685e /fs/nfs | |
parent | 7ffd10640dc008f6d5a375bd6450755745c63c7d (diff) |
NFSv4.1: pnfs filelayout driver write
Allows the pnfs filelayout driver to write to the data servers.
Note that COMMIT to data servers will be implemented in a future
patch. To avoid improper behavior, for the moment any WRITE to a data
server that would also require a COMMIT to the data server is sent
NFS_FILE_SYNC.
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Mingyang Guo <guomingyang@nrchpc.ac.cn>
Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/internal.h | 5 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.c | 101 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 17 | ||||
-rw-r--r-- | fs/nfs/write.c | 5 |
4 files changed, 126 insertions, 2 deletions
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1a3228e9ea22..d1ddc23c404d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -276,6 +276,10 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, | |||
276 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | 276 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); |
277 | 277 | ||
278 | /* write.c */ | 278 | /* write.c */ |
279 | extern int nfs_initiate_write(struct nfs_write_data *data, | ||
280 | struct rpc_clnt *clnt, | ||
281 | const struct rpc_call_ops *call_ops, | ||
282 | int how); | ||
279 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | 283 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); |
280 | #ifdef CONFIG_MIGRATION | 284 | #ifdef CONFIG_MIGRATION |
281 | extern int nfs_migrate_page(struct address_space *, | 285 | extern int nfs_migrate_page(struct address_space *, |
@@ -291,6 +295,7 @@ extern int nfs4_init_client(struct nfs_client *clp, | |||
291 | const char *ip_addr, | 295 | const char *ip_addr, |
292 | rpc_authflavor_t authflavour, | 296 | rpc_authflavor_t authflavour, |
293 | int noresvport); | 297 | int noresvport); |
298 | extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); | ||
294 | extern int _nfs4_call_sync(struct nfs_server *server, | 299 | extern int _nfs4_call_sync(struct nfs_server *server, |
295 | struct rpc_message *msg, | 300 | struct rpc_message *msg, |
296 | struct nfs4_sequence_args *args, | 301 | struct nfs4_sequence_args *args, |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9d21bfeec88f..7e1d4571b7b2 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -189,12 +189,69 @@ static void filelayout_read_release(void *data) | |||
189 | rdata->mds_ops->rpc_release(data); | 189 | rdata->mds_ops->rpc_release(data); |
190 | } | 190 | } |
191 | 191 | ||
192 | static int filelayout_write_done_cb(struct rpc_task *task, | ||
193 | struct nfs_write_data *data) | ||
194 | { | ||
195 | int reset = 0; | ||
196 | |||
197 | if (filelayout_async_handle_error(task, data->args.context->state, | ||
198 | data->ds_clp, &reset) == -EAGAIN) { | ||
199 | struct nfs_client *clp; | ||
200 | |||
201 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", | ||
202 | __func__, data->ds_clp, data->ds_clp->cl_session); | ||
203 | if (reset) { | ||
204 | filelayout_set_lo_fail(data->lseg); | ||
205 | nfs4_reset_write(task, data); | ||
206 | clp = NFS_SERVER(data->inode)->nfs_client; | ||
207 | } else | ||
208 | clp = data->ds_clp; | ||
209 | nfs_restart_rpc(task, clp); | ||
210 | return -EAGAIN; | ||
211 | } | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static void filelayout_write_prepare(struct rpc_task *task, void *data) | ||
217 | { | ||
218 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; | ||
219 | |||
220 | if (nfs41_setup_sequence(wdata->ds_clp->cl_session, | ||
221 | &wdata->args.seq_args, &wdata->res.seq_res, | ||
222 | 0, task)) | ||
223 | return; | ||
224 | |||
225 | rpc_call_start(task); | ||
226 | } | ||
227 | |||
228 | static void filelayout_write_call_done(struct rpc_task *task, void *data) | ||
229 | { | ||
230 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; | ||
231 | |||
232 | /* Note this may cause RPC to be resent */ | ||
233 | wdata->mds_ops->rpc_call_done(task, data); | ||
234 | } | ||
235 | |||
236 | static void filelayout_write_release(void *data) | ||
237 | { | ||
238 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; | ||
239 | |||
240 | wdata->mds_ops->rpc_release(data); | ||
241 | } | ||
242 | |||
192 | struct rpc_call_ops filelayout_read_call_ops = { | 243 | struct rpc_call_ops filelayout_read_call_ops = { |
193 | .rpc_call_prepare = filelayout_read_prepare, | 244 | .rpc_call_prepare = filelayout_read_prepare, |
194 | .rpc_call_done = filelayout_read_call_done, | 245 | .rpc_call_done = filelayout_read_call_done, |
195 | .rpc_release = filelayout_read_release, | 246 | .rpc_release = filelayout_read_release, |
196 | }; | 247 | }; |
197 | 248 | ||
249 | struct rpc_call_ops filelayout_write_call_ops = { | ||
250 | .rpc_call_prepare = filelayout_write_prepare, | ||
251 | .rpc_call_done = filelayout_write_call_done, | ||
252 | .rpc_release = filelayout_write_release, | ||
253 | }; | ||
254 | |||
198 | static enum pnfs_try_status | 255 | static enum pnfs_try_status |
199 | filelayout_read_pagelist(struct nfs_read_data *data) | 256 | filelayout_read_pagelist(struct nfs_read_data *data) |
200 | { | 257 | { |
@@ -238,10 +295,52 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
238 | return PNFS_ATTEMPTED; | 295 | return PNFS_ATTEMPTED; |
239 | } | 296 | } |
240 | 297 | ||
298 | /* Perform async writes. */ | ||
241 | static enum pnfs_try_status | 299 | static enum pnfs_try_status |
242 | filelayout_write_pagelist(struct nfs_write_data *data, int sync) | 300 | filelayout_write_pagelist(struct nfs_write_data *data, int sync) |
243 | { | 301 | { |
244 | return PNFS_NOT_ATTEMPTED; | 302 | struct pnfs_layout_segment *lseg = data->lseg; |
303 | struct nfs4_pnfs_ds *ds; | ||
304 | loff_t offset = data->args.offset; | ||
305 | u32 j, idx; | ||
306 | struct nfs_fh *fh; | ||
307 | int status; | ||
308 | |||
309 | /* Retrieve the correct rpc_client for the byte range */ | ||
310 | j = nfs4_fl_calc_j_index(lseg, offset); | ||
311 | idx = nfs4_fl_calc_ds_index(lseg, j); | ||
312 | ds = nfs4_fl_prepare_ds(lseg, idx); | ||
313 | if (!ds) { | ||
314 | printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); | ||
315 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
316 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
317 | return PNFS_NOT_ATTEMPTED; | ||
318 | } | ||
319 | dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, | ||
320 | data->inode->i_ino, sync, (size_t) data->args.count, offset, | ||
321 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | ||
322 | |||
323 | /* We can't handle commit to ds yet */ | ||
324 | if (!FILELAYOUT_LSEG(lseg)->commit_through_mds) | ||
325 | data->args.stable = NFS_FILE_SYNC; | ||
326 | |||
327 | data->write_done_cb = filelayout_write_done_cb; | ||
328 | data->ds_clp = ds->ds_clp; | ||
329 | fh = nfs4_fl_select_ds_fh(lseg, j); | ||
330 | if (fh) | ||
331 | data->args.fh = fh; | ||
332 | /* | ||
333 | * Get the file offset on the dserver. Set the write offset to | ||
334 | * this offset and save the original offset. | ||
335 | */ | ||
336 | data->args.offset = filelayout_get_dserver_offset(lseg, offset); | ||
337 | data->mds_offset = offset; | ||
338 | |||
339 | /* Perform an asynchronous write */ | ||
340 | status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, | ||
341 | &filelayout_write_call_ops, sync); | ||
342 | BUG_ON(status != 0); | ||
343 | return PNFS_ATTEMPTED; | ||
245 | } | 344 | } |
246 | 345 | ||
247 | /* | 346 | /* |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index da902123ec53..7b4b9f3e9842 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -3145,6 +3145,23 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
3145 | return data->write_done_cb(task, data); | 3145 | return data->write_done_cb(task, data); |
3146 | } | 3146 | } |
3147 | 3147 | ||
3148 | /* Reset the the nfs_write_data to send the write to the MDS. */ | ||
3149 | void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) | ||
3150 | { | ||
3151 | dprintk("%s Reset task for i/o through\n", __func__); | ||
3152 | put_lseg(data->lseg); | ||
3153 | data->lseg = NULL; | ||
3154 | data->ds_clp = NULL; | ||
3155 | data->write_done_cb = nfs4_write_done_cb; | ||
3156 | data->args.fh = NFS_FH(data->inode); | ||
3157 | data->args.bitmask = data->res.server->cache_consistency_bitmask; | ||
3158 | data->args.offset = data->mds_offset; | ||
3159 | data->res.fattr = &data->fattr; | ||
3160 | task->tk_ops = data->mds_ops; | ||
3161 | rpc_task_reset_client(task, NFS_CLIENT(data->inode)); | ||
3162 | } | ||
3163 | EXPORT_SYMBOL_GPL(nfs4_reset_write); | ||
3164 | |||
3148 | static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) | 3165 | static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) |
3149 | { | 3166 | { |
3150 | struct nfs_server *server = NFS_SERVER(data->inode); | 3167 | struct nfs_server *server = NFS_SERVER(data->inode); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index df99c5b0ee65..ee62ddf60e7a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -783,7 +783,7 @@ static int flush_task_priority(int how) | |||
783 | return RPC_PRIORITY_NORMAL; | 783 | return RPC_PRIORITY_NORMAL; |
784 | } | 784 | } |
785 | 785 | ||
786 | static int nfs_initiate_write(struct nfs_write_data *data, | 786 | int nfs_initiate_write(struct nfs_write_data *data, |
787 | struct rpc_clnt *clnt, | 787 | struct rpc_clnt *clnt, |
788 | const struct rpc_call_ops *call_ops, | 788 | const struct rpc_call_ops *call_ops, |
789 | int how) | 789 | int how) |
@@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data, | |||
833 | out: | 833 | out: |
834 | return ret; | 834 | return ret; |
835 | } | 835 | } |
836 | EXPORT_SYMBOL_GPL(nfs_initiate_write); | ||
836 | 837 | ||
837 | /* | 838 | /* |
838 | * Set up the argument/result storage required for the RPC call. | 839 | * Set up the argument/result storage required for the RPC call. |
@@ -1194,6 +1195,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1194 | */ | 1195 | */ |
1195 | static unsigned long complain; | 1196 | static unsigned long complain; |
1196 | 1197 | ||
1198 | /* Note this will print the MDS for a DS write */ | ||
1197 | if (time_before(complain, jiffies)) { | 1199 | if (time_before(complain, jiffies)) { |
1198 | dprintk("NFS: faulty NFS server %s:" | 1200 | dprintk("NFS: faulty NFS server %s:" |
1199 | " (committed = %d) != (stable = %d)\n", | 1201 | " (committed = %d) != (stable = %d)\n", |
@@ -1214,6 +1216,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1214 | /* Was this an NFSv2 write or an NFSv3 stable write? */ | 1216 | /* Was this an NFSv2 write or an NFSv3 stable write? */ |
1215 | if (resp->verf->committed != NFS_UNSTABLE) { | 1217 | if (resp->verf->committed != NFS_UNSTABLE) { |
1216 | /* Resend from where the server left off */ | 1218 | /* Resend from where the server left off */ |
1219 | data->mds_offset += resp->count; | ||
1217 | argp->offset += resp->count; | 1220 | argp->offset += resp->count; |
1218 | argp->pgbase += resp->count; | 1221 | argp->pgbase += resp->count; |
1219 | argp->count -= resp->count; | 1222 | argp->count -= resp->count; |