aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2017-04-28 09:51:54 -0400
committerTheodore Ts'o <tytso@mit.edu>2017-04-28 09:51:54 -0400
commit80a2ea9f85850f1cdae814be03b4a16c3d3abc00 (patch)
tree82d954dc3c6f2f883b7c37a80386804edbfb913d
parent39da7c509acff13fc8cb12ec1bb20337c988ed36 (diff)
mm: retry writepages() on ENOMEM when doing an data integrity writeback
Currently, file system's writepages() function must not fail with an ENOMEM, since if they do, it's possible for buffered data to be lost. This is because on a data integrity writeback writepages() gets called but once, and if it returns ENOMEM, if you're lucky the error will get reflected back to the userspace process calling fsync(). If you aren't lucky, the user is unmounting the file system, and the dirty pages will simply be lost. For this reason, file system code generally will use GFP_NOFS, and in some cases, will retry the allocation in a loop, on the theory that "kernel livelocks are temporary; data loss is forever". Unfortunately, this can indeed cause livelocks, since inside the writepages() call, the file system is holding various mutexes, and these mutexes may prevent the OOM killer from killing its targetted victim if it is also holding on to those mutexes. A better solution would be to allow writepages() to call the memory allocator with flags that give greater latitude to the allocator to fail, and then release its locks and return ENOMEM, and in the case of background writeback, the writes can be retried at a later time. In the case of data-integrity writeback retry after waiting a brief amount of time. Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--mm/page-writeback.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d8ac2a7fb9e7..03a70d8a6030 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2353,10 +2353,16 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2353 2353
2354 if (wbc->nr_to_write <= 0) 2354 if (wbc->nr_to_write <= 0)
2355 return 0; 2355 return 0;
2356 if (mapping->a_ops->writepages) 2356 while (1) {
2357 ret = mapping->a_ops->writepages(mapping, wbc); 2357 if (mapping->a_ops->writepages)
2358 else 2358 ret = mapping->a_ops->writepages(mapping, wbc);
2359 ret = generic_writepages(mapping, wbc); 2359 else
2360 ret = generic_writepages(mapping, wbc);
2361 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2362 break;
2363 cond_resched();
2364 congestion_wait(BLK_RW_ASYNC, HZ/50);
2365 }
2360 return ret; 2366 return ret;
2361} 2367}
2362 2368