summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKazuo Ito <ito_kazuo_g3@lab.ntt.co.jp>2019-02-14 04:39:03 -0500
committerTrond Myklebust <trond.myklebust@hammerspace.com>2019-02-20 17:33:55 -0500
commit2cde04e90d5be46b4b6655b965b496e6b6f18e49 (patch)
tree5fc7ea30fb06a4fe419b71ec9e29bc1173a97a6b
parent97ae91bbf3a70fc8cee3c9030564cfc892cc8cee (diff)
pNFS: Avoid read/modify/write when it is not necessary
As the block and SCSI layouts can only read/write fixed-length blocks, we must perform read-modify-write when data to be written is not aligned to a block boundary or smaller than the block size. (612aa983a0410 pnfs: add flag to force read-modify-write in ->write_begin) The current code tries to see if we have to do read-modify-write on block-oriented pNFS layouts by just checking !PageUptodate(page), but the same condition also applies for overwriting of any uncached potions of existing files, making such operations excessively slow even it is block-aligned. The change does not affect the optimization for modify-write-read cases (38c73044f5f4d NFS: read-modify-write page updating), because partial update of !PageUptodate() pages can only happen in layouts that can do arbitrary length read/write and never in block-based ones. Testing results: We ran fio on one of the pNFS clients running 4.20 kernel (vanilla and patched) in this configuration to read/write/overwrite files on the storage array, exported as pnfs share by the server. pNFS clients ---1G Ethernet--- pNFS server (HP DL360 G8) (HP DL360 G8) | | | | +------8G Fiber Channel--------+ | Storage Array (HP P6350) Throughput of overwrite (both buffered and O_SYNC) is noticeably improved. Ops. |block size| Throughput | | (KiB) | (MiB/s) | | | 4.20 | patched| ---------+----------+----------------+ buffered | 4| 21.3 | 232 | overwrite| 32| 22.2 | 256 | | 512| 22.4 | 260 | ---------+----------+----------------+ O_SYNC | 4| 3.84| 4.77| overwrite| 32| 12.2 | 32.0 | | 512| 18.5 | 152 | ---------+----------+----------------+ Read and write (buffered and O_SYNC) by the same client remain unchanged by the patch either negatively or positively, as they should do. Ops. |block size| Throughput | | (KiB) | (MiB/s) | | | 4.20 | patched| ---------+----------+----------------+ read | 4| 548 | 550 | | 32| 547 | 551 | | 512| 548 | 551 | ---------+----------+----------------+ buffered | 4| 237 | 244 | write | 32| 261 | 268 | | 512| 265 | 272 | ---------+----------+----------------+ O_SYNC | 4| 0.46| 0.46| write | 32| 3.60| 3.57| | 512| 105 | 106 | ---------+----------+----------------+ Signed-off-by: Kazuo Ito <ito_kazuo_g3@lab.ntt.co.jp> Tested-by: Hiroyuki Watanabe <watanabe.hiroyuki@lab.ntt.co.jp> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
-rw-r--r--fs/nfs/file.c40
1 files changed, 26 insertions, 14 deletions
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7086d5677ed3..4899b85f9b3c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -276,6 +276,12 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
276 * then a modify/write/read cycle when writing to a page in the 276 * then a modify/write/read cycle when writing to a page in the
277 * page cache. 277 * page cache.
278 * 278 *
279 * Some pNFS layout drivers can only read/write at a certain block
280 * granularity like all block devices and therefore we must perform
281 * read/modify/write whenever a page hasn't read yet and the data
282 * to be written there is not aligned to a block boundary and/or
283 * smaller than the block size.
284 *
279 * The modify/write/read cycle may occur if a page is read before 285 * The modify/write/read cycle may occur if a page is read before
280 * being completely filled by the writer. In this situation, the 286 * being completely filled by the writer. In this situation, the
281 * page must be completely written to stable storage on the server 287 * page must be completely written to stable storage on the server
@@ -291,26 +297,32 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
291 * and that the new data won't completely replace the old data in 297 * and that the new data won't completely replace the old data in
292 * that range of the file. 298 * that range of the file.
293 */ 299 */
294static int nfs_want_read_modify_write(struct file *file, struct page *page, 300static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
295 loff_t pos, unsigned len)
296{ 301{
297 unsigned int pglen = nfs_page_length(page); 302 unsigned int pglen = nfs_page_length(page);
298 unsigned int offset = pos & (PAGE_SIZE - 1); 303 unsigned int offset = pos & (PAGE_SIZE - 1);
299 unsigned int end = offset + len; 304 unsigned int end = offset + len;
300 305
301 if (pnfs_ld_read_whole_page(file->f_mapping->host)) { 306 return !pglen || (end >= pglen && !offset);
302 if (!PageUptodate(page) && !PagePrivate(page)) 307}
303 return 1;
304 return 0;
305 }
306 308
307 if ((file->f_mode & FMODE_READ) && /* open for read? */ 309static bool nfs_want_read_modify_write(struct file *file, struct page *page,
308 !PageUptodate(page) && /* Uptodate? */ 310 loff_t pos, unsigned int len)
309 !PagePrivate(page) && /* i/o request already? */ 311{
310 pglen && /* valid bytes of file? */ 312 /*
311 (end < pglen || offset)) /* replace all valid bytes? */ 313 * Up-to-date pages, those with ongoing or full-page write
312 return 1; 314 * don't need read/modify/write
313 return 0; 315 */
316 if (PageUptodate(page) || PagePrivate(page) ||
317 nfs_full_page_write(page, pos, len))
318 return false;
319
320 if (pnfs_ld_read_whole_page(file->f_mapping->host))
321 return true;
322 /* Open for reading too? */
323 if (file->f_mode & FMODE_READ)
324 return true;
325 return false;
314} 326}
315 327
316/* 328/*