aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorGioh Kim <gioh.kim@lge.com>2014-09-04 22:04:42 -0400
committerTheodore Ts'o <tytso@mit.edu>2014-09-04 22:04:42 -0400
commit3b5e6454aaf6b4439b19400d8365e2ec2d24e411 (patch)
treebd1105d5eafdf980633ea5735b75e20af9f614ad /fs/buffer.c
parentd26e2c4d72c2f2a38246f618480864fe3224929c (diff)
fs/buffer.c: support buffer cache allocations with gfp modifiers
A buffer cache is allocated from movable area because it is referred for a while and released soon. But some filesystems are taking buffer cache for a long time and it can disturb page migration. New APIs are introduced to allocate buffer cache with user specific flag. *_gfp APIs are for user want to set page allocation flag for page cache allocation. And *_unmovable APIs are for the user wants to allocate page cache from non-movable area. Signed-off-by: Gioh Kim <gioh.kim@lge.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c45
1 files changed, 26 insertions, 19 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 8f05111bbb8b..9a6029e0dd71 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
993 */ 993 */
994static int 994static int
995grow_dev_page(struct block_device *bdev, sector_t block, 995grow_dev_page(struct block_device *bdev, sector_t block,
996 pgoff_t index, int size, int sizebits) 996 pgoff_t index, int size, int sizebits, gfp_t gfp)
997{ 997{
998 struct inode *inode = bdev->bd_inode; 998 struct inode *inode = bdev->bd_inode;
999 struct page *page; 999 struct page *page;
@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
1002 int ret = 0; /* Will call free_more_memory() */ 1002 int ret = 0; /* Will call free_more_memory() */
1003 gfp_t gfp_mask; 1003 gfp_t gfp_mask;
1004 1004
1005 gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; 1005 gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
1006 gfp_mask |= __GFP_MOVABLE; 1006
1007 /* 1007 /*
1008 * XXX: __getblk_slow() can not really deal with failure and 1008 * XXX: __getblk_slow() can not really deal with failure and
1009 * will endlessly loop on improvised global reclaim. Prefer 1009 * will endlessly loop on improvised global reclaim. Prefer
@@ -1058,7 +1058,7 @@ failed:
1058 * that page was dirty, the buffers are set dirty also. 1058 * that page was dirty, the buffers are set dirty also.
1059 */ 1059 */
1060static int 1060static int
1061grow_buffers(struct block_device *bdev, sector_t block, int size) 1061grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1062{ 1062{
1063 pgoff_t index; 1063 pgoff_t index;
1064 int sizebits; 1064 int sizebits;
@@ -1085,11 +1085,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1085 } 1085 }
1086 1086
1087 /* Create a page with the proper size buffers.. */ 1087 /* Create a page with the proper size buffers.. */
1088 return grow_dev_page(bdev, block, index, size, sizebits); 1088 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1089} 1089}
1090 1090
1091static struct buffer_head * 1091struct buffer_head *
1092__getblk_slow(struct block_device *bdev, sector_t block, int size) 1092__getblk_slow(struct block_device *bdev, sector_t block,
1093 unsigned size, gfp_t gfp)
1093{ 1094{
1094 /* Size must be multiple of hard sectorsize */ 1095 /* Size must be multiple of hard sectorsize */
1095 if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1096 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1111,13 +1112,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1111 if (bh) 1112 if (bh)
1112 return bh; 1113 return bh;
1113 1114
1114 ret = grow_buffers(bdev, block, size); 1115 ret = grow_buffers(bdev, block, size, gfp);
1115 if (ret < 0) 1116 if (ret < 0)
1116 return NULL; 1117 return NULL;
1117 if (ret == 0) 1118 if (ret == 0)
1118 free_more_memory(); 1119 free_more_memory();
1119 } 1120 }
1120} 1121}
1122EXPORT_SYMBOL(__getblk_slow);
1121 1123
1122/* 1124/*
1123 * The relationship between dirty buffers and dirty pages: 1125 * The relationship between dirty buffers and dirty pages:
@@ -1371,24 +1373,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1371EXPORT_SYMBOL(__find_get_block); 1373EXPORT_SYMBOL(__find_get_block);
1372 1374
1373/* 1375/*
1374 * __getblk will locate (and, if necessary, create) the buffer_head 1376 * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
1375 * which corresponds to the passed block_device, block and size. The 1377 * which corresponds to the passed block_device, block and size. The
1376 * returned buffer has its reference count incremented. 1378 * returned buffer has its reference count incremented.
1377 * 1379 *
1378 * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() 1380 * __getblk_gfp() will lock up the machine if grow_dev_page's
1379 * attempt is failing. FIXME, perhaps? 1381 * try_to_free_buffers() attempt is failing. FIXME, perhaps?
1380 */ 1382 */
1381struct buffer_head * 1383struct buffer_head *
1382__getblk(struct block_device *bdev, sector_t block, unsigned size) 1384__getblk_gfp(struct block_device *bdev, sector_t block,
1385 unsigned size, gfp_t gfp)
1383{ 1386{
1384 struct buffer_head *bh = __find_get_block(bdev, block, size); 1387 struct buffer_head *bh = __find_get_block(bdev, block, size);
1385 1388
1386 might_sleep(); 1389 might_sleep();
1387 if (bh == NULL) 1390 if (bh == NULL)
1388 bh = __getblk_slow(bdev, block, size); 1391 bh = __getblk_slow(bdev, block, size, gfp);
1389 return bh; 1392 return bh;
1390} 1393}
1391EXPORT_SYMBOL(__getblk); 1394EXPORT_SYMBOL(__getblk_gfp);
1392 1395
1393/* 1396/*
1394 * Do async read-ahead on a buffer.. 1397 * Do async read-ahead on a buffer..
@@ -1404,24 +1407,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1404EXPORT_SYMBOL(__breadahead); 1407EXPORT_SYMBOL(__breadahead);
1405 1408
1406/** 1409/**
1407 * __bread() - reads a specified block and returns the bh 1410 * __bread_gfp() - reads a specified block and returns the bh
1408 * @bdev: the block_device to read from 1411 * @bdev: the block_device to read from
1409 * @block: number of block 1412 * @block: number of block
1410 * @size: size (in bytes) to read 1413 * @size: size (in bytes) to read
1411 * 1414 * @gfp: page allocation flag
1415 *
1412 * Reads a specified block, and returns buffer head that contains it. 1416 * Reads a specified block, and returns buffer head that contains it.
1417 * The page cache can be allocated from non-movable area
1418 * not to prevent page migration if you set gfp to zero.
1413 * It returns NULL if the block was unreadable. 1419 * It returns NULL if the block was unreadable.
1414 */ 1420 */
1415struct buffer_head * 1421struct buffer_head *
1416__bread(struct block_device *bdev, sector_t block, unsigned size) 1422__bread_gfp(struct block_device *bdev, sector_t block,
1423 unsigned size, gfp_t gfp)
1417{ 1424{
1418 struct buffer_head *bh = __getblk(bdev, block, size); 1425 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1419 1426
1420 if (likely(bh) && !buffer_uptodate(bh)) 1427 if (likely(bh) && !buffer_uptodate(bh))
1421 bh = __bread_slow(bh); 1428 bh = __bread_slow(bh);
1422 return bh; 1429 return bh;
1423} 1430}
1424EXPORT_SYMBOL(__bread); 1431EXPORT_SYMBOL(__bread_gfp);
1425 1432
1426/* 1433/*
1427 * invalidate_bh_lrus() is called rarely - but not only at unmount. 1434 * invalidate_bh_lrus() is called rarely - but not only at unmount.