aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2009-03-11 14:10:30 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2009-03-11 14:10:30 -0400
commit72cb77f4a5ace37b12dcb47a0e8637a2c28ad881 (patch)
tree62fbdd7a34884f3ea9e1b0ba6bd5a4d78263f847
parentfb8a1f11b64e213d94dfa1cebb2a42a7b8c115c4 (diff)
NFS: Throttle page dirtying while we're flushing to disk
The following patch is a combination of a patch by myself and Peter Staubach. Trond: If we allow other processes to dirty pages while a process is doing a consistency sync to disk, we can end up never making progress. Peter: Attached is a patch which addresses a continuing problem with the NFS client generating out of order WRITE requests. While this is compliant with all of the current protocol specifications, there are servers in the market which can not handle out of order WRITE requests very well. Also, this may lead to sub-optimal block allocations in the underlying file system on the server. This may cause the read throughputs to be reduced when reading the file from the server. Peter: There has been a lot of work recently done to address out of order issues on a systemic level. However, the NFS client is still susceptible to the problem. Out of order WRITE requests can occur when pdflush is in the middle of writing out pages while the process dirtying the pages calls generic_file_buffered_write which calls generic_perform_write which calls balance_dirty_pages_rate_limited which ends up calling writeback_inodes which ends up calling back into the NFS client to writes out dirty pages for the same file that pdflush happens to be working with. Signed-off-by: Peter Staubach <staubach@redhat.com> [modification by Trond to merge the two similar patches] Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/file.c9
-rw-r--r--fs/nfs/inode.c12
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfs/pagelist.c11
-rw-r--r--fs/nfs/write.c28
-rw-r--r--include/linux/nfs_fs.h1
7 files changed, 43 insertions, 29 deletions
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 90f292b520d2..404c19c866a7 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -354,6 +354,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
354 file->f_path.dentry->d_name.name, 354 file->f_path.dentry->d_name.name,
355 mapping->host->i_ino, len, (long long) pos); 355 mapping->host->i_ino, len, (long long) pos);
356 356
357 /*
358 * Prevent starvation issues if someone is doing a consistency
359 * sync-to-disk
360 */
361 ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
362 nfs_wait_bit_killable, TASK_KILLABLE);
363 if (ret)
364 return ret;
365
357 page = grab_cache_page_write_begin(mapping, index, flags); 366 page = grab_cache_page_write_begin(mapping, index, flags);
358 if (!page) 367 if (!page)
359 return -ENOMEM; 368 return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 00f116cdadc6..c40adc5dd609 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -66,6 +66,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
66} 66}
67 67
68/** 68/**
69 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
70 * @word: long word containing the bit lock
71 */
72int nfs_wait_bit_killable(void *word)
73{
74 if (fatal_signal_pending(current))
75 return -ERESTARTSYS;
76 schedule();
77 return 0;
78}
79
80/**
69 * nfs_compat_user_ino64 - returns the user-visible inode number 81 * nfs_compat_user_ino64 - returns the user-visible inode number
70 * @fileid: 64-bit fileid 82 * @fileid: 64-bit fileid
71 * 83 *
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 340ede8f608f..a55e69aa52e5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -165,6 +165,7 @@ extern void nfs_clear_inode(struct inode *);
165extern void nfs4_clear_inode(struct inode *); 165extern void nfs4_clear_inode(struct inode *);
166#endif 166#endif
167void nfs_zap_acl_cache(struct inode *inode); 167void nfs_zap_acl_cache(struct inode *inode);
168extern int nfs_wait_bit_killable(void *word);
168 169
169/* super.c */ 170/* super.c */
170void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); 171void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 101f5f4c304f..95f171e7e05a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -193,14 +193,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
193 kunmap_atomic(start, KM_USER0); 193 kunmap_atomic(start, KM_USER0);
194} 194}
195 195
196static int nfs4_wait_bit_killable(void *word)
197{
198 if (fatal_signal_pending(current))
199 return -ERESTARTSYS;
200 schedule();
201 return 0;
202}
203
204static int nfs4_wait_clnt_recover(struct nfs_client *clp) 196static int nfs4_wait_clnt_recover(struct nfs_client *clp)
205{ 197{
206 int res; 198 int res;
@@ -208,7 +200,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
208 might_sleep(); 200 might_sleep();
209 201
210 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, 202 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
211 nfs4_wait_bit_killable, TASK_KILLABLE); 203 nfs_wait_bit_killable, TASK_KILLABLE);
212 return res; 204 return res;
213} 205}
214 206
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7f079209d70a..e2975939126a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -176,17 +176,6 @@ void nfs_release_request(struct nfs_page *req)
176 kref_put(&req->wb_kref, nfs_free_request); 176 kref_put(&req->wb_kref, nfs_free_request);
177} 177}
178 178
179static int nfs_wait_bit_killable(void *word)
180{
181 int ret = 0;
182
183 if (fatal_signal_pending(current))
184 ret = -ERESTARTSYS;
185 else
186 schedule();
187 return ret;
188}
189
190/** 179/**
191 * nfs_wait_on_request - Wait for a request to complete. 180 * nfs_wait_on_request - Wait for a request to complete.
192 * @req: request to wait upon. 181 * @req: request to wait upon.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1a999939fedf..36fd35e0de83 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -313,19 +313,34 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
313int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 313int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
314{ 314{
315 struct inode *inode = mapping->host; 315 struct inode *inode = mapping->host;
316 unsigned long *bitlock = &NFS_I(inode)->flags;
316 struct nfs_pageio_descriptor pgio; 317 struct nfs_pageio_descriptor pgio;
317 int err; 318 int err;
318 319
320 /* Stop dirtying of new pages while we sync */
321 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
322 nfs_wait_bit_killable, TASK_KILLABLE);
323 if (err)
324 goto out_err;
325
319 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 326 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
320 327
321 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 328 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
322 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 329 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
323 nfs_pageio_complete(&pgio); 330 nfs_pageio_complete(&pgio);
331
332 clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
333 smp_mb__after_clear_bit();
334 wake_up_bit(bitlock, NFS_INO_FLUSHING);
335
324 if (err < 0) 336 if (err < 0)
325 return err; 337 goto out_err;
326 if (pgio.pg_error < 0) 338 err = pgio.pg_error;
327 return pgio.pg_error; 339 if (err < 0)
340 goto out_err;
328 return 0; 341 return 0;
342out_err:
343 return err;
329} 344}
330 345
331/* 346/*
@@ -1432,18 +1447,13 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
1432{ 1447{
1433 struct writeback_control wbc = { 1448 struct writeback_control wbc = {
1434 .bdi = mapping->backing_dev_info, 1449 .bdi = mapping->backing_dev_info,
1435 .sync_mode = WB_SYNC_NONE, 1450 .sync_mode = WB_SYNC_ALL,
1436 .nr_to_write = LONG_MAX, 1451 .nr_to_write = LONG_MAX,
1437 .range_start = 0, 1452 .range_start = 0,
1438 .range_end = LLONG_MAX, 1453 .range_end = LLONG_MAX,
1439 .for_writepages = 1, 1454 .for_writepages = 1,
1440 }; 1455 };
1441 int ret;
1442 1456
1443 ret = __nfs_write_mapping(mapping, &wbc, how);
1444 if (ret < 0)
1445 return ret;
1446 wbc.sync_mode = WB_SYNC_ALL;
1447 return __nfs_write_mapping(mapping, &wbc, how); 1457 return __nfs_write_mapping(mapping, &wbc, how);
1448} 1458}
1449 1459
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c9fecd3e8f0f..933bc261c0df 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -206,6 +206,7 @@ struct nfs_inode {
206#define NFS_INO_STALE (1) /* possible stale inode */ 206#define NFS_INO_STALE (1) /* possible stale inode */
207#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ 207#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */
208#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ 208#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */
209#define NFS_INO_FLUSHING (4) /* inode is flushing out data */
209 210
210static inline struct nfs_inode *NFS_I(const struct inode *inode) 211static inline struct nfs_inode *NFS_I(const struct inode *inode)
211{ 212{