aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-26 00:00:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-26 00:00:19 -0400
commit45b583b10a8b438b970e95a7d1d4db22c9e35004 (patch)
tree14fa481598289df0459580c582b48a9d95db51f6 /mm/shmem.c
parent154dd78d30b56ffb8b447f629bfcceb14150e5c4 (diff)
parentf19da2ce8ef5e49b8b8ea199c3601dd45d71b262 (diff)
Merge 'akpm' patch series
* Merge akpm patch series: (122 commits) drivers/connector/cn_proc.c: remove unused local Documentation/SubmitChecklist: add RCU debug config options reiserfs: use hweight_long() reiserfs: use proper little-endian bitops pnpacpi: register disabled resources drivers/rtc/rtc-tegra.c: properly initialize spinlock drivers/rtc/rtc-twl.c: check return value of twl_rtc_write_u8() in twl_rtc_set_time() drivers/rtc: add support for Qualcomm PMIC8xxx RTC drivers/rtc/rtc-s3c.c: support clock gating drivers/rtc/rtc-mpc5121.c: add support for RTC on MPC5200 init: skip calibration delay if previously done misc/eeprom: add eeprom access driver for digsy_mtc board misc/eeprom: add driver for microwire 93xx46 EEPROMs checkpatch.pl: update $logFunctions checkpatch: make utf-8 test --strict checkpatch.pl: add ability to ignore various messages checkpatch: add a "prefer __aligned" check checkpatch: validate signature styles and To: and Cc: lines checkpatch: add __rcu as a sparse modifier checkpatch: suggest using min_t or max_t ... Did this as a merge because of (trivial) conflicts in - Documentation/feature-removal-schedule.txt - arch/xtensa/include/asm/uaccess.h that were just easier to fix up in the merge than in the patch series.
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c552
1 files changed, 296 insertions, 256 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 3e519798b522..5cc21f8b4cd3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -51,6 +51,7 @@ static struct vfsmount *shm_mnt;
51#include <linux/shmem_fs.h> 51#include <linux/shmem_fs.h>
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/blkdev.h> 53#include <linux/blkdev.h>
54#include <linux/splice.h>
54#include <linux/security.h> 55#include <linux/security.h>
55#include <linux/swapops.h> 56#include <linux/swapops.h>
56#include <linux/mempolicy.h> 57#include <linux/mempolicy.h>
@@ -126,8 +127,15 @@ static unsigned long shmem_default_max_inodes(void)
126} 127}
127#endif 128#endif
128 129
129static int shmem_getpage(struct inode *inode, unsigned long idx, 130static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
130 struct page **pagep, enum sgp_type sgp, int *type); 131 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
132
133static inline int shmem_getpage(struct inode *inode, pgoff_t index,
134 struct page **pagep, enum sgp_type sgp, int *fault_type)
135{
136 return shmem_getpage_gfp(inode, index, pagep, sgp,
137 mapping_gfp_mask(inode->i_mapping), fault_type);
138}
131 139
132static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) 140static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
133{ 141{
@@ -241,9 +249,7 @@ static void shmem_free_blocks(struct inode *inode, long pages)
241 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 249 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
242 if (sbinfo->max_blocks) { 250 if (sbinfo->max_blocks) {
243 percpu_counter_add(&sbinfo->used_blocks, -pages); 251 percpu_counter_add(&sbinfo->used_blocks, -pages);
244 spin_lock(&inode->i_lock);
245 inode->i_blocks -= pages*BLOCKS_PER_PAGE; 252 inode->i_blocks -= pages*BLOCKS_PER_PAGE;
246 spin_unlock(&inode->i_lock);
247 } 253 }
248} 254}
249 255
@@ -405,10 +411,12 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
405 * @info: info structure for the inode 411 * @info: info structure for the inode
406 * @index: index of the page to find 412 * @index: index of the page to find
407 * @sgp: check and recheck i_size? skip allocation? 413 * @sgp: check and recheck i_size? skip allocation?
414 * @gfp: gfp mask to use for any page allocation
408 * 415 *
409 * If the entry does not exist, allocate it. 416 * If the entry does not exist, allocate it.
410 */ 417 */
411static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) 418static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info,
419 unsigned long index, enum sgp_type sgp, gfp_t gfp)
412{ 420{
413 struct inode *inode = &info->vfs_inode; 421 struct inode *inode = &info->vfs_inode;
414 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 422 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
@@ -432,13 +440,11 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
432 sbinfo->max_blocks - 1) >= 0) 440 sbinfo->max_blocks - 1) >= 0)
433 return ERR_PTR(-ENOSPC); 441 return ERR_PTR(-ENOSPC);
434 percpu_counter_inc(&sbinfo->used_blocks); 442 percpu_counter_inc(&sbinfo->used_blocks);
435 spin_lock(&inode->i_lock);
436 inode->i_blocks += BLOCKS_PER_PAGE; 443 inode->i_blocks += BLOCKS_PER_PAGE;
437 spin_unlock(&inode->i_lock);
438 } 444 }
439 445
440 spin_unlock(&info->lock); 446 spin_unlock(&info->lock);
441 page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping)); 447 page = shmem_dir_alloc(gfp);
442 spin_lock(&info->lock); 448 spin_lock(&info->lock);
443 449
444 if (!page) { 450 if (!page) {
@@ -966,20 +972,7 @@ found:
966 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); 972 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
967 /* which does mem_cgroup_uncharge_cache_page on error */ 973 /* which does mem_cgroup_uncharge_cache_page on error */
968 974
969 if (error == -EEXIST) { 975 if (error != -ENOMEM) {
970 struct page *filepage = find_get_page(mapping, idx);
971 error = 1;
972 if (filepage) {
973 /*
974 * There might be a more uptodate page coming down
975 * from a stacked writepage: forget our swappage if so.
976 */
977 if (PageUptodate(filepage))
978 error = 0;
979 page_cache_release(filepage);
980 }
981 }
982 if (!error) {
983 delete_from_swap_cache(page); 976 delete_from_swap_cache(page);
984 set_page_dirty(page); 977 set_page_dirty(page);
985 info->flags |= SHMEM_PAGEIN; 978 info->flags |= SHMEM_PAGEIN;
@@ -1066,16 +1059,17 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1066 /* 1059 /*
1067 * shmem_backing_dev_info's capabilities prevent regular writeback or 1060 * shmem_backing_dev_info's capabilities prevent regular writeback or
1068 * sync from ever calling shmem_writepage; but a stacking filesystem 1061 * sync from ever calling shmem_writepage; but a stacking filesystem
1069 * may use the ->writepage of its underlying filesystem, in which case 1062 * might use ->writepage of its underlying filesystem, in which case
1070 * tmpfs should write out to swap only in response to memory pressure, 1063 * tmpfs should write out to swap only in response to memory pressure,
1071 * and not for the writeback threads or sync. However, in those cases, 1064 * and not for the writeback threads or sync.
1072 * we do still want to check if there's a redundant swappage to be
1073 * discarded.
1074 */ 1065 */
1075 if (wbc->for_reclaim) 1066 if (!wbc->for_reclaim) {
1076 swap = get_swap_page(); 1067 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
1077 else 1068 goto redirty;
1078 swap.val = 0; 1069 }
1070 swap = get_swap_page();
1071 if (!swap.val)
1072 goto redirty;
1079 1073
1080 /* 1074 /*
1081 * Add inode to shmem_unuse()'s list of swapped-out inodes, 1075 * Add inode to shmem_unuse()'s list of swapped-out inodes,
@@ -1086,15 +1080,12 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1086 * we've taken the spinlock, because shmem_unuse_inode() will 1080 * we've taken the spinlock, because shmem_unuse_inode() will
1087 * prune a !swapped inode from the swaplist under both locks. 1081 * prune a !swapped inode from the swaplist under both locks.
1088 */ 1082 */
1089 if (swap.val) { 1083 mutex_lock(&shmem_swaplist_mutex);
1090 mutex_lock(&shmem_swaplist_mutex); 1084 if (list_empty(&info->swaplist))
1091 if (list_empty(&info->swaplist)) 1085 list_add_tail(&info->swaplist, &shmem_swaplist);
1092 list_add_tail(&info->swaplist, &shmem_swaplist);
1093 }
1094 1086
1095 spin_lock(&info->lock); 1087 spin_lock(&info->lock);
1096 if (swap.val) 1088 mutex_unlock(&shmem_swaplist_mutex);
1097 mutex_unlock(&shmem_swaplist_mutex);
1098 1089
1099 if (index >= info->next_index) { 1090 if (index >= info->next_index) {
1100 BUG_ON(!(info->flags & SHMEM_TRUNCATE)); 1091 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
@@ -1102,16 +1093,13 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1102 } 1093 }
1103 entry = shmem_swp_entry(info, index, NULL); 1094 entry = shmem_swp_entry(info, index, NULL);
1104 if (entry->val) { 1095 if (entry->val) {
1105 /* 1096 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
1106 * The more uptodate page coming down from a stacked
1107 * writepage should replace our old swappage.
1108 */
1109 free_swap_and_cache(*entry); 1097 free_swap_and_cache(*entry);
1110 shmem_swp_set(info, entry, 0); 1098 shmem_swp_set(info, entry, 0);
1111 } 1099 }
1112 shmem_recalc_inode(inode); 1100 shmem_recalc_inode(inode);
1113 1101
1114 if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { 1102 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
1115 delete_from_page_cache(page); 1103 delete_from_page_cache(page);
1116 shmem_swp_set(info, entry, swap.val); 1104 shmem_swp_set(info, entry, swap.val);
1117 shmem_swp_unmap(entry); 1105 shmem_swp_unmap(entry);
@@ -1228,92 +1216,83 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1228#endif 1216#endif
1229 1217
1230/* 1218/*
1231 * shmem_getpage - either get the page from swap or allocate a new one 1219 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
1232 * 1220 *
1233 * If we allocate a new one we do not mark it dirty. That's up to the 1221 * If we allocate a new one we do not mark it dirty. That's up to the
1234 * vm. If we swap it in we mark it dirty since we also free the swap 1222 * vm. If we swap it in we mark it dirty since we also free the swap
1235 * entry since a page cannot live in both the swap and page cache 1223 * entry since a page cannot live in both the swap and page cache
1236 */ 1224 */
1237static int shmem_getpage(struct inode *inode, unsigned long idx, 1225static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx,
1238 struct page **pagep, enum sgp_type sgp, int *type) 1226 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
1239{ 1227{
1240 struct address_space *mapping = inode->i_mapping; 1228 struct address_space *mapping = inode->i_mapping;
1241 struct shmem_inode_info *info = SHMEM_I(inode); 1229 struct shmem_inode_info *info = SHMEM_I(inode);
1242 struct shmem_sb_info *sbinfo; 1230 struct shmem_sb_info *sbinfo;
1243 struct page *filepage = *pagep; 1231 struct page *page;
1244 struct page *swappage;
1245 struct page *prealloc_page = NULL; 1232 struct page *prealloc_page = NULL;
1246 swp_entry_t *entry; 1233 swp_entry_t *entry;
1247 swp_entry_t swap; 1234 swp_entry_t swap;
1248 gfp_t gfp;
1249 int error; 1235 int error;
1236 int ret;
1250 1237
1251 if (idx >= SHMEM_MAX_INDEX) 1238 if (idx >= SHMEM_MAX_INDEX)
1252 return -EFBIG; 1239 return -EFBIG;
1253
1254 if (type)
1255 *type = 0;
1256
1257 /*
1258 * Normally, filepage is NULL on entry, and either found
1259 * uptodate immediately, or allocated and zeroed, or read
1260 * in under swappage, which is then assigned to filepage.
1261 * But shmem_readpage (required for splice) passes in a locked
1262 * filepage, which may be found not uptodate by other callers
1263 * too, and may need to be copied from the swappage read in.
1264 */
1265repeat: 1240repeat:
1266 if (!filepage) 1241 page = find_lock_page(mapping, idx);
1267 filepage = find_lock_page(mapping, idx); 1242 if (page) {
1268 if (filepage && PageUptodate(filepage))
1269 goto done;
1270 gfp = mapping_gfp_mask(mapping);
1271 if (!filepage) {
1272 /* 1243 /*
1273 * Try to preload while we can wait, to not make a habit of 1244 * Once we can get the page lock, it must be uptodate:
1274 * draining atomic reserves; but don't latch on to this cpu. 1245 * if there were an error in reading back from swap,
1246 * the page would not be inserted into the filecache.
1275 */ 1247 */
1276 error = radix_tree_preload(gfp & ~__GFP_HIGHMEM); 1248 BUG_ON(!PageUptodate(page));
1277 if (error) 1249 goto done;
1278 goto failed; 1250 }
1279 radix_tree_preload_end(); 1251
1280 if (sgp != SGP_READ && !prealloc_page) { 1252 /*
1281 /* We don't care if this fails */ 1253 * Try to preload while we can wait, to not make a habit of
1282 prealloc_page = shmem_alloc_page(gfp, info, idx); 1254 * draining atomic reserves; but don't latch on to this cpu.
1283 if (prealloc_page) { 1255 */
1284 if (mem_cgroup_cache_charge(prealloc_page, 1256 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
1285 current->mm, GFP_KERNEL)) { 1257 if (error)
1286 page_cache_release(prealloc_page); 1258 goto out;
1287 prealloc_page = NULL; 1259 radix_tree_preload_end();
1288 } 1260
1261 if (sgp != SGP_READ && !prealloc_page) {
1262 prealloc_page = shmem_alloc_page(gfp, info, idx);
1263 if (prealloc_page) {
1264 SetPageSwapBacked(prealloc_page);
1265 if (mem_cgroup_cache_charge(prealloc_page,
1266 current->mm, GFP_KERNEL)) {
1267 page_cache_release(prealloc_page);
1268 prealloc_page = NULL;
1289 } 1269 }
1290 } 1270 }
1291 } 1271 }
1292 error = 0;
1293 1272
1294 spin_lock(&info->lock); 1273 spin_lock(&info->lock);
1295 shmem_recalc_inode(inode); 1274 shmem_recalc_inode(inode);
1296 entry = shmem_swp_alloc(info, idx, sgp); 1275 entry = shmem_swp_alloc(info, idx, sgp, gfp);
1297 if (IS_ERR(entry)) { 1276 if (IS_ERR(entry)) {
1298 spin_unlock(&info->lock); 1277 spin_unlock(&info->lock);
1299 error = PTR_ERR(entry); 1278 error = PTR_ERR(entry);
1300 goto failed; 1279 goto out;
1301 } 1280 }
1302 swap = *entry; 1281 swap = *entry;
1303 1282
1304 if (swap.val) { 1283 if (swap.val) {
1305 /* Look it up and read it in.. */ 1284 /* Look it up and read it in.. */
1306 swappage = lookup_swap_cache(swap); 1285 page = lookup_swap_cache(swap);
1307 if (!swappage) { 1286 if (!page) {
1308 shmem_swp_unmap(entry); 1287 shmem_swp_unmap(entry);
1309 spin_unlock(&info->lock); 1288 spin_unlock(&info->lock);
1310 /* here we actually do the io */ 1289 /* here we actually do the io */
1311 if (type) 1290 if (fault_type)
1312 *type |= VM_FAULT_MAJOR; 1291 *fault_type |= VM_FAULT_MAJOR;
1313 swappage = shmem_swapin(swap, gfp, info, idx); 1292 page = shmem_swapin(swap, gfp, info, idx);
1314 if (!swappage) { 1293 if (!page) {
1315 spin_lock(&info->lock); 1294 spin_lock(&info->lock);
1316 entry = shmem_swp_alloc(info, idx, sgp); 1295 entry = shmem_swp_alloc(info, idx, sgp, gfp);
1317 if (IS_ERR(entry)) 1296 if (IS_ERR(entry))
1318 error = PTR_ERR(entry); 1297 error = PTR_ERR(entry);
1319 else { 1298 else {
@@ -1323,62 +1302,42 @@ repeat:
1323 } 1302 }
1324 spin_unlock(&info->lock); 1303 spin_unlock(&info->lock);
1325 if (error) 1304 if (error)
1326 goto failed; 1305 goto out;
1327 goto repeat; 1306 goto repeat;
1328 } 1307 }
1329 wait_on_page_locked(swappage); 1308 wait_on_page_locked(page);
1330 page_cache_release(swappage); 1309 page_cache_release(page);
1331 goto repeat; 1310 goto repeat;
1332 } 1311 }
1333 1312
1334 /* We have to do this with page locked to prevent races */ 1313 /* We have to do this with page locked to prevent races */
1335 if (!trylock_page(swappage)) { 1314 if (!trylock_page(page)) {
1336 shmem_swp_unmap(entry); 1315 shmem_swp_unmap(entry);
1337 spin_unlock(&info->lock); 1316 spin_unlock(&info->lock);
1338 wait_on_page_locked(swappage); 1317 wait_on_page_locked(page);
1339 page_cache_release(swappage); 1318 page_cache_release(page);
1340 goto repeat; 1319 goto repeat;
1341 } 1320 }
1342 if (PageWriteback(swappage)) { 1321 if (PageWriteback(page)) {
1343 shmem_swp_unmap(entry); 1322 shmem_swp_unmap(entry);
1344 spin_unlock(&info->lock); 1323 spin_unlock(&info->lock);
1345 wait_on_page_writeback(swappage); 1324 wait_on_page_writeback(page);
1346 unlock_page(swappage); 1325 unlock_page(page);
1347 page_cache_release(swappage); 1326 page_cache_release(page);
1348 goto repeat; 1327 goto repeat;
1349 } 1328 }
1350 if (!PageUptodate(swappage)) { 1329 if (!PageUptodate(page)) {
1351 shmem_swp_unmap(entry); 1330 shmem_swp_unmap(entry);
1352 spin_unlock(&info->lock); 1331 spin_unlock(&info->lock);
1353 unlock_page(swappage); 1332 unlock_page(page);
1354 page_cache_release(swappage); 1333 page_cache_release(page);
1355 error = -EIO; 1334 error = -EIO;
1356 goto failed; 1335 goto out;
1357 } 1336 }
1358 1337
1359 if (filepage) { 1338 error = add_to_page_cache_locked(page, mapping,
1360 shmem_swp_set(info, entry, 0); 1339 idx, GFP_NOWAIT);
1361 shmem_swp_unmap(entry); 1340 if (error) {
1362 delete_from_swap_cache(swappage);
1363 spin_unlock(&info->lock);
1364 copy_highpage(filepage, swappage);
1365 unlock_page(swappage);
1366 page_cache_release(swappage);
1367 flush_dcache_page(filepage);
1368 SetPageUptodate(filepage);
1369 set_page_dirty(filepage);
1370 swap_free(swap);
1371 } else if (!(error = add_to_page_cache_locked(swappage, mapping,
1372 idx, GFP_NOWAIT))) {
1373 info->flags |= SHMEM_PAGEIN;
1374 shmem_swp_set(info, entry, 0);
1375 shmem_swp_unmap(entry);
1376 delete_from_swap_cache(swappage);
1377 spin_unlock(&info->lock);
1378 filepage = swappage;
1379 set_page_dirty(filepage);
1380 swap_free(swap);
1381 } else {
1382 shmem_swp_unmap(entry); 1341 shmem_swp_unmap(entry);
1383 spin_unlock(&info->lock); 1342 spin_unlock(&info->lock);
1384 if (error == -ENOMEM) { 1343 if (error == -ENOMEM) {
@@ -1387,32 +1346,38 @@ repeat:
1387 * call memcg's OOM if needed. 1346 * call memcg's OOM if needed.
1388 */ 1347 */
1389 error = mem_cgroup_shmem_charge_fallback( 1348 error = mem_cgroup_shmem_charge_fallback(
1390 swappage, 1349 page, current->mm, gfp);
1391 current->mm,
1392 gfp);
1393 if (error) { 1350 if (error) {
1394 unlock_page(swappage); 1351 unlock_page(page);
1395 page_cache_release(swappage); 1352 page_cache_release(page);
1396 goto failed; 1353 goto out;
1397 } 1354 }
1398 } 1355 }
1399 unlock_page(swappage); 1356 unlock_page(page);
1400 page_cache_release(swappage); 1357 page_cache_release(page);
1401 goto repeat; 1358 goto repeat;
1402 } 1359 }
1403 } else if (sgp == SGP_READ && !filepage) { 1360
1361 info->flags |= SHMEM_PAGEIN;
1362 shmem_swp_set(info, entry, 0);
1404 shmem_swp_unmap(entry); 1363 shmem_swp_unmap(entry);
1405 filepage = find_get_page(mapping, idx); 1364 delete_from_swap_cache(page);
1406 if (filepage && 1365 spin_unlock(&info->lock);
1407 (!PageUptodate(filepage) || !trylock_page(filepage))) { 1366 set_page_dirty(page);
1367 swap_free(swap);
1368
1369 } else if (sgp == SGP_READ) {
1370 shmem_swp_unmap(entry);
1371 page = find_get_page(mapping, idx);
1372 if (page && !trylock_page(page)) {
1408 spin_unlock(&info->lock); 1373 spin_unlock(&info->lock);
1409 wait_on_page_locked(filepage); 1374 wait_on_page_locked(page);
1410 page_cache_release(filepage); 1375 page_cache_release(page);
1411 filepage = NULL;
1412 goto repeat; 1376 goto repeat;
1413 } 1377 }
1414 spin_unlock(&info->lock); 1378 spin_unlock(&info->lock);
1415 } else { 1379
1380 } else if (prealloc_page) {
1416 shmem_swp_unmap(entry); 1381 shmem_swp_unmap(entry);
1417 sbinfo = SHMEM_SB(inode->i_sb); 1382 sbinfo = SHMEM_SB(inode->i_sb);
1418 if (sbinfo->max_blocks) { 1383 if (sbinfo->max_blocks) {
@@ -1421,126 +1386,86 @@ repeat:
1421 shmem_acct_block(info->flags)) 1386 shmem_acct_block(info->flags))
1422 goto nospace; 1387 goto nospace;
1423 percpu_counter_inc(&sbinfo->used_blocks); 1388 percpu_counter_inc(&sbinfo->used_blocks);
1424 spin_lock(&inode->i_lock);
1425 inode->i_blocks += BLOCKS_PER_PAGE; 1389 inode->i_blocks += BLOCKS_PER_PAGE;
1426 spin_unlock(&inode->i_lock);
1427 } else if (shmem_acct_block(info->flags)) 1390 } else if (shmem_acct_block(info->flags))
1428 goto nospace; 1391 goto nospace;
1429 1392
1430 if (!filepage) { 1393 page = prealloc_page;
1431 int ret; 1394 prealloc_page = NULL;
1432
1433 if (!prealloc_page) {
1434 spin_unlock(&info->lock);
1435 filepage = shmem_alloc_page(gfp, info, idx);
1436 if (!filepage) {
1437 shmem_unacct_blocks(info->flags, 1);
1438 shmem_free_blocks(inode, 1);
1439 error = -ENOMEM;
1440 goto failed;
1441 }
1442 SetPageSwapBacked(filepage);
1443 1395
1444 /* 1396 entry = shmem_swp_alloc(info, idx, sgp, gfp);
1445 * Precharge page while we can wait, compensate 1397 if (IS_ERR(entry))
1446 * after 1398 error = PTR_ERR(entry);
1447 */ 1399 else {
1448 error = mem_cgroup_cache_charge(filepage, 1400 swap = *entry;
1449 current->mm, GFP_KERNEL); 1401 shmem_swp_unmap(entry);
1450 if (error) { 1402 }
1451 page_cache_release(filepage); 1403 ret = error || swap.val;
1452 shmem_unacct_blocks(info->flags, 1); 1404 if (ret)
1453 shmem_free_blocks(inode, 1); 1405 mem_cgroup_uncharge_cache_page(page);
1454 filepage = NULL; 1406 else
1455 goto failed; 1407 ret = add_to_page_cache_lru(page, mapping,
1456 }
1457
1458 spin_lock(&info->lock);
1459 } else {
1460 filepage = prealloc_page;
1461 prealloc_page = NULL;
1462 SetPageSwapBacked(filepage);
1463 }
1464
1465 entry = shmem_swp_alloc(info, idx, sgp);
1466 if (IS_ERR(entry))
1467 error = PTR_ERR(entry);
1468 else {
1469 swap = *entry;
1470 shmem_swp_unmap(entry);
1471 }
1472 ret = error || swap.val;
1473 if (ret)
1474 mem_cgroup_uncharge_cache_page(filepage);
1475 else
1476 ret = add_to_page_cache_lru(filepage, mapping,
1477 idx, GFP_NOWAIT); 1408 idx, GFP_NOWAIT);
1478 /* 1409 /*
1479 * At add_to_page_cache_lru() failure, uncharge will 1410 * At add_to_page_cache_lru() failure,
1480 * be done automatically. 1411 * uncharge will be done automatically.
1481 */ 1412 */
1482 if (ret) { 1413 if (ret) {
1483 spin_unlock(&info->lock); 1414 shmem_unacct_blocks(info->flags, 1);
1484 page_cache_release(filepage); 1415 shmem_free_blocks(inode, 1);
1485 shmem_unacct_blocks(info->flags, 1); 1416 spin_unlock(&info->lock);
1486 shmem_free_blocks(inode, 1); 1417 page_cache_release(page);
1487 filepage = NULL; 1418 if (error)
1488 if (error) 1419 goto out;
1489 goto failed; 1420 goto repeat;
1490 goto repeat;
1491 }
1492 info->flags |= SHMEM_PAGEIN;
1493 } 1421 }
1494 1422
1423 info->flags |= SHMEM_PAGEIN;
1495 info->alloced++; 1424 info->alloced++;
1496 spin_unlock(&info->lock); 1425 spin_unlock(&info->lock);
1497 clear_highpage(filepage); 1426 clear_highpage(page);
1498 flush_dcache_page(filepage); 1427 flush_dcache_page(page);
1499 SetPageUptodate(filepage); 1428 SetPageUptodate(page);
1500 if (sgp == SGP_DIRTY) 1429 if (sgp == SGP_DIRTY)
1501 set_page_dirty(filepage); 1430 set_page_dirty(page);
1431
1432 } else {
1433 spin_unlock(&info->lock);
1434 error = -ENOMEM;
1435 goto out;
1502 } 1436 }
1503done: 1437done:
1504 *pagep = filepage; 1438 *pagep = page;
1505 error = 0; 1439 error = 0;
1506 goto out; 1440out:
1441 if (prealloc_page) {
1442 mem_cgroup_uncharge_cache_page(prealloc_page);
1443 page_cache_release(prealloc_page);
1444 }
1445 return error;
1507 1446
1508nospace: 1447nospace:
1509 /* 1448 /*
1510 * Perhaps the page was brought in from swap between find_lock_page 1449 * Perhaps the page was brought in from swap between find_lock_page
1511 * and taking info->lock? We allow for that at add_to_page_cache_lru, 1450 * and taking info->lock? We allow for that at add_to_page_cache_lru,
1512 * but must also avoid reporting a spurious ENOSPC while working on a 1451 * but must also avoid reporting a spurious ENOSPC while working on a
1513 * full tmpfs. (When filepage has been passed in to shmem_getpage, it 1452 * full tmpfs.
1514 * is already in page cache, which prevents this race from occurring.)
1515 */ 1453 */
1516 if (!filepage) { 1454 page = find_get_page(mapping, idx);
1517 struct page *page = find_get_page(mapping, idx);
1518 if (page) {
1519 spin_unlock(&info->lock);
1520 page_cache_release(page);
1521 goto repeat;
1522 }
1523 }
1524 spin_unlock(&info->lock); 1455 spin_unlock(&info->lock);
1525 error = -ENOSPC; 1456 if (page) {
1526failed: 1457 page_cache_release(page);
1527 if (*pagep != filepage) { 1458 goto repeat;
1528 unlock_page(filepage);
1529 page_cache_release(filepage);
1530 }
1531out:
1532 if (prealloc_page) {
1533 mem_cgroup_uncharge_cache_page(prealloc_page);
1534 page_cache_release(prealloc_page);
1535 } 1459 }
1536 return error; 1460 error = -ENOSPC;
1461 goto out;
1537} 1462}
1538 1463
1539static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1464static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1540{ 1465{
1541 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1466 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1542 int error; 1467 int error;
1543 int ret; 1468 int ret = VM_FAULT_LOCKED;
1544 1469
1545 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) 1470 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1546 return VM_FAULT_SIGBUS; 1471 return VM_FAULT_SIGBUS;
@@ -1548,11 +1473,12 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1548 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); 1473 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1549 if (error) 1474 if (error)
1550 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 1475 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1476
1551 if (ret & VM_FAULT_MAJOR) { 1477 if (ret & VM_FAULT_MAJOR) {
1552 count_vm_event(PGMAJFAULT); 1478 count_vm_event(PGMAJFAULT);
1553 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); 1479 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1554 } 1480 }
1555 return ret | VM_FAULT_LOCKED; 1481 return ret;
1556} 1482}
1557 1483
1558#ifdef CONFIG_NUMA 1484#ifdef CONFIG_NUMA
@@ -1669,19 +1595,6 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
1669static const struct inode_operations shmem_symlink_inode_operations; 1595static const struct inode_operations shmem_symlink_inode_operations;
1670static const struct inode_operations shmem_symlink_inline_operations; 1596static const struct inode_operations shmem_symlink_inline_operations;
1671 1597
1672/*
1673 * Normally tmpfs avoids the use of shmem_readpage and shmem_write_begin;
1674 * but providing them allows a tmpfs file to be used for splice, sendfile, and
1675 * below the loop driver, in the generic fashion that many filesystems support.
1676 */
1677static int shmem_readpage(struct file *file, struct page *page)
1678{
1679 struct inode *inode = page->mapping->host;
1680 int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
1681 unlock_page(page);
1682 return error;
1683}
1684
1685static int 1598static int
1686shmem_write_begin(struct file *file, struct address_space *mapping, 1599shmem_write_begin(struct file *file, struct address_space *mapping,
1687 loff_t pos, unsigned len, unsigned flags, 1600 loff_t pos, unsigned len, unsigned flags,
@@ -1689,7 +1602,6 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
1689{ 1602{
1690 struct inode *inode = mapping->host; 1603 struct inode *inode = mapping->host;
1691 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 1604 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1692 *pagep = NULL;
1693 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); 1605 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1694} 1606}
1695 1607
@@ -1846,6 +1758,119 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1846 return retval; 1758 return retval;
1847} 1759}
1848 1760
1761static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1762 struct pipe_inode_info *pipe, size_t len,
1763 unsigned int flags)
1764{
1765 struct address_space *mapping = in->f_mapping;
1766 struct inode *inode = mapping->host;
1767 unsigned int loff, nr_pages, req_pages;
1768 struct page *pages[PIPE_DEF_BUFFERS];
1769 struct partial_page partial[PIPE_DEF_BUFFERS];
1770 struct page *page;
1771 pgoff_t index, end_index;
1772 loff_t isize, left;
1773 int error, page_nr;
1774 struct splice_pipe_desc spd = {
1775 .pages = pages,
1776 .partial = partial,
1777 .flags = flags,
1778 .ops = &page_cache_pipe_buf_ops,
1779 .spd_release = spd_release_page,
1780 };
1781
1782 isize = i_size_read(inode);
1783 if (unlikely(*ppos >= isize))
1784 return 0;
1785
1786 left = isize - *ppos;
1787 if (unlikely(left < len))
1788 len = left;
1789
1790 if (splice_grow_spd(pipe, &spd))
1791 return -ENOMEM;
1792
1793 index = *ppos >> PAGE_CACHE_SHIFT;
1794 loff = *ppos & ~PAGE_CACHE_MASK;
1795 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1796 nr_pages = min(req_pages, pipe->buffers);
1797
1798 spd.nr_pages = find_get_pages_contig(mapping, index,
1799 nr_pages, spd.pages);
1800 index += spd.nr_pages;
1801 error = 0;
1802
1803 while (spd.nr_pages < nr_pages) {
1804 error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1805 if (error)
1806 break;
1807 unlock_page(page);
1808 spd.pages[spd.nr_pages++] = page;
1809 index++;
1810 }
1811
1812 index = *ppos >> PAGE_CACHE_SHIFT;
1813 nr_pages = spd.nr_pages;
1814 spd.nr_pages = 0;
1815
1816 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1817 unsigned int this_len;
1818
1819 if (!len)
1820 break;
1821
1822 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1823 page = spd.pages[page_nr];
1824
1825 if (!PageUptodate(page) || page->mapping != mapping) {
1826 error = shmem_getpage(inode, index, &page,
1827 SGP_CACHE, NULL);
1828 if (error)
1829 break;
1830 unlock_page(page);
1831 page_cache_release(spd.pages[page_nr]);
1832 spd.pages[page_nr] = page;
1833 }
1834
1835 isize = i_size_read(inode);
1836 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1837 if (unlikely(!isize || index > end_index))
1838 break;
1839
1840 if (end_index == index) {
1841 unsigned int plen;
1842
1843 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1844 if (plen <= loff)
1845 break;
1846
1847 this_len = min(this_len, plen - loff);
1848 len = this_len;
1849 }
1850
1851 spd.partial[page_nr].offset = loff;
1852 spd.partial[page_nr].len = this_len;
1853 len -= this_len;
1854 loff = 0;
1855 spd.nr_pages++;
1856 index++;
1857 }
1858
1859 while (page_nr < nr_pages)
1860 page_cache_release(spd.pages[page_nr++]);
1861
1862 if (spd.nr_pages)
1863 error = splice_to_pipe(pipe, &spd);
1864
1865 splice_shrink_spd(pipe, &spd);
1866
1867 if (error > 0) {
1868 *ppos += error;
1869 file_accessed(in);
1870 }
1871 return error;
1872}
1873
1849static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 1874static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1850{ 1875{
1851 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 1876 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2006,7 +2031,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
2006 int error; 2031 int error;
2007 int len; 2032 int len;
2008 struct inode *inode; 2033 struct inode *inode;
2009 struct page *page = NULL; 2034 struct page *page;
2010 char *kaddr; 2035 char *kaddr;
2011 struct shmem_inode_info *info; 2036 struct shmem_inode_info *info;
2012 2037
@@ -2684,7 +2709,6 @@ static const struct address_space_operations shmem_aops = {
2684 .writepage = shmem_writepage, 2709 .writepage = shmem_writepage,
2685 .set_page_dirty = __set_page_dirty_no_writeback, 2710 .set_page_dirty = __set_page_dirty_no_writeback,
2686#ifdef CONFIG_TMPFS 2711#ifdef CONFIG_TMPFS
2687 .readpage = shmem_readpage,
2688 .write_begin = shmem_write_begin, 2712 .write_begin = shmem_write_begin,
2689 .write_end = shmem_write_end, 2713 .write_end = shmem_write_end,
2690#endif 2714#endif
@@ -2701,7 +2725,7 @@ static const struct file_operations shmem_file_operations = {
2701 .aio_read = shmem_file_aio_read, 2725 .aio_read = shmem_file_aio_read,
2702 .aio_write = generic_file_aio_write, 2726 .aio_write = generic_file_aio_write,
2703 .fsync = noop_fsync, 2727 .fsync = noop_fsync,
2704 .splice_read = generic_file_splice_read, 2728 .splice_read = shmem_file_splice_read,
2705 .splice_write = generic_file_splice_write, 2729 .splice_write = generic_file_splice_write,
2706#endif 2730#endif
2707}; 2731};
@@ -3042,13 +3066,29 @@ int shmem_zero_setup(struct vm_area_struct *vma)
3042 * suit tmpfs, since it may have pages in swapcache, and needs to find those 3066 * suit tmpfs, since it may have pages in swapcache, and needs to find those
3043 * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. 3067 * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
3044 * 3068 *
3045 * Provide a stub for those callers to start using now, then later 3069 * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
3046 * flesh it out to call shmem_getpage() with additional gfp mask, when 3070 * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
3047 * shmem_file_splice_read() is added and shmem_readpage() is removed.
3048 */ 3071 */
3049struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, 3072struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
3050 pgoff_t index, gfp_t gfp) 3073 pgoff_t index, gfp_t gfp)
3051{ 3074{
3075#ifdef CONFIG_SHMEM
3076 struct inode *inode = mapping->host;
3077 struct page *page;
3078 int error;
3079
3080 BUG_ON(mapping->a_ops != &shmem_aops);
3081 error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
3082 if (error)
3083 page = ERR_PTR(error);
3084 else
3085 unlock_page(page);
3086 return page;
3087#else
3088 /*
3089 * The tiny !SHMEM case uses ramfs without swap
3090 */
3052 return read_cache_page_gfp(mapping, index, gfp); 3091 return read_cache_page_gfp(mapping, index, gfp);
3092#endif
3053} 3093}
3054EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); 3094EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);