diff options
author | Gioh Kim <gioh.kim@lge.com> | 2014-09-04 22:04:42 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2014-09-04 22:04:42 -0400 |
commit | 3b5e6454aaf6b4439b19400d8365e2ec2d24e411 (patch) | |
tree | bd1105d5eafdf980633ea5735b75e20af9f614ad /fs/buffer.c | |
parent | d26e2c4d72c2f2a38246f618480864fe3224929c (diff) |
fs/buffer.c: support buffer cache allocations with gfp modifiers
A buffer cache is allocated from movable area because it is referred
for a while and released soon. But some filesystems are taking buffer
cache for a long time and it can disturb page migration.
New APIs are introduced to allocate buffer cache with user specific
flag. *_gfp APIs are for user want to set page allocation flag for
page cache allocation. And *_unmovable APIs are for the user wants to
allocate page cache from non-movable area.
Signed-off-by: Gioh Kim <gioh.kim@lge.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 45 |
1 files changed, 26 insertions, 19 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 8f05111bbb8b..9a6029e0dd71 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
993 | */ | 993 | */ |
994 | static int | 994 | static int |
995 | grow_dev_page(struct block_device *bdev, sector_t block, | 995 | grow_dev_page(struct block_device *bdev, sector_t block, |
996 | pgoff_t index, int size, int sizebits) | 996 | pgoff_t index, int size, int sizebits, gfp_t gfp) |
997 | { | 997 | { |
998 | struct inode *inode = bdev->bd_inode; | 998 | struct inode *inode = bdev->bd_inode; |
999 | struct page *page; | 999 | struct page *page; |
@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
1002 | int ret = 0; /* Will call free_more_memory() */ | 1002 | int ret = 0; /* Will call free_more_memory() */ |
1003 | gfp_t gfp_mask; | 1003 | gfp_t gfp_mask; |
1004 | 1004 | ||
1005 | gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; | 1005 | gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; |
1006 | gfp_mask |= __GFP_MOVABLE; | 1006 | |
1007 | /* | 1007 | /* |
1008 | * XXX: __getblk_slow() can not really deal with failure and | 1008 | * XXX: __getblk_slow() can not really deal with failure and |
1009 | * will endlessly loop on improvised global reclaim. Prefer | 1009 | * will endlessly loop on improvised global reclaim. Prefer |
@@ -1058,7 +1058,7 @@ failed: | |||
1058 | * that page was dirty, the buffers are set dirty also. | 1058 | * that page was dirty, the buffers are set dirty also. |
1059 | */ | 1059 | */ |
1060 | static int | 1060 | static int |
1061 | grow_buffers(struct block_device *bdev, sector_t block, int size) | 1061 | grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) |
1062 | { | 1062 | { |
1063 | pgoff_t index; | 1063 | pgoff_t index; |
1064 | int sizebits; | 1064 | int sizebits; |
@@ -1085,11 +1085,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) | |||
1085 | } | 1085 | } |
1086 | 1086 | ||
1087 | /* Create a page with the proper size buffers.. */ | 1087 | /* Create a page with the proper size buffers.. */ |
1088 | return grow_dev_page(bdev, block, index, size, sizebits); | 1088 | return grow_dev_page(bdev, block, index, size, sizebits, gfp); |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | static struct buffer_head * | 1091 | struct buffer_head * |
1092 | __getblk_slow(struct block_device *bdev, sector_t block, int size) | 1092 | __getblk_slow(struct block_device *bdev, sector_t block, |
1093 | unsigned size, gfp_t gfp) | ||
1093 | { | 1094 | { |
1094 | /* Size must be multiple of hard sectorsize */ | 1095 | /* Size must be multiple of hard sectorsize */ |
1095 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || | 1096 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || |
@@ -1111,13 +1112,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
1111 | if (bh) | 1112 | if (bh) |
1112 | return bh; | 1113 | return bh; |
1113 | 1114 | ||
1114 | ret = grow_buffers(bdev, block, size); | 1115 | ret = grow_buffers(bdev, block, size, gfp); |
1115 | if (ret < 0) | 1116 | if (ret < 0) |
1116 | return NULL; | 1117 | return NULL; |
1117 | if (ret == 0) | 1118 | if (ret == 0) |
1118 | free_more_memory(); | 1119 | free_more_memory(); |
1119 | } | 1120 | } |
1120 | } | 1121 | } |
1122 | EXPORT_SYMBOL(__getblk_slow); | ||
1121 | 1123 | ||
1122 | /* | 1124 | /* |
1123 | * The relationship between dirty buffers and dirty pages: | 1125 | * The relationship between dirty buffers and dirty pages: |
@@ -1371,24 +1373,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) | |||
1371 | EXPORT_SYMBOL(__find_get_block); | 1373 | EXPORT_SYMBOL(__find_get_block); |
1372 | 1374 | ||
1373 | /* | 1375 | /* |
1374 | * __getblk will locate (and, if necessary, create) the buffer_head | 1376 | * __getblk_gfp() will locate (and, if necessary, create) the buffer_head |
1375 | * which corresponds to the passed block_device, block and size. The | 1377 | * which corresponds to the passed block_device, block and size. The |
1376 | * returned buffer has its reference count incremented. | 1378 | * returned buffer has its reference count incremented. |
1377 | * | 1379 | * |
1378 | * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() | 1380 | * __getblk_gfp() will lock up the machine if grow_dev_page's |
1379 | * attempt is failing. FIXME, perhaps? | 1381 | * try_to_free_buffers() attempt is failing. FIXME, perhaps? |
1380 | */ | 1382 | */ |
1381 | struct buffer_head * | 1383 | struct buffer_head * |
1382 | __getblk(struct block_device *bdev, sector_t block, unsigned size) | 1384 | __getblk_gfp(struct block_device *bdev, sector_t block, |
1385 | unsigned size, gfp_t gfp) | ||
1383 | { | 1386 | { |
1384 | struct buffer_head *bh = __find_get_block(bdev, block, size); | 1387 | struct buffer_head *bh = __find_get_block(bdev, block, size); |
1385 | 1388 | ||
1386 | might_sleep(); | 1389 | might_sleep(); |
1387 | if (bh == NULL) | 1390 | if (bh == NULL) |
1388 | bh = __getblk_slow(bdev, block, size); | 1391 | bh = __getblk_slow(bdev, block, size, gfp); |
1389 | return bh; | 1392 | return bh; |
1390 | } | 1393 | } |
1391 | EXPORT_SYMBOL(__getblk); | 1394 | EXPORT_SYMBOL(__getblk_gfp); |
1392 | 1395 | ||
1393 | /* | 1396 | /* |
1394 | * Do async read-ahead on a buffer.. | 1397 | * Do async read-ahead on a buffer.. |
@@ -1404,24 +1407,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) | |||
1404 | EXPORT_SYMBOL(__breadahead); | 1407 | EXPORT_SYMBOL(__breadahead); |
1405 | 1408 | ||
1406 | /** | 1409 | /** |
1407 | * __bread() - reads a specified block and returns the bh | 1410 | * __bread_gfp() - reads a specified block and returns the bh |
1408 | * @bdev: the block_device to read from | 1411 | * @bdev: the block_device to read from |
1409 | * @block: number of block | 1412 | * @block: number of block |
1410 | * @size: size (in bytes) to read | 1413 | * @size: size (in bytes) to read |
1411 | * | 1414 | * @gfp: page allocation flag |
1415 | * | ||
1412 | * Reads a specified block, and returns buffer head that contains it. | 1416 | * Reads a specified block, and returns buffer head that contains it. |
1417 | * The page cache can be allocated from non-movable area | ||
1418 | * not to prevent page migration if you set gfp to zero. | ||
1413 | * It returns NULL if the block was unreadable. | 1419 | * It returns NULL if the block was unreadable. |
1414 | */ | 1420 | */ |
1415 | struct buffer_head * | 1421 | struct buffer_head * |
1416 | __bread(struct block_device *bdev, sector_t block, unsigned size) | 1422 | __bread_gfp(struct block_device *bdev, sector_t block, |
1423 | unsigned size, gfp_t gfp) | ||
1417 | { | 1424 | { |
1418 | struct buffer_head *bh = __getblk(bdev, block, size); | 1425 | struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); |
1419 | 1426 | ||
1420 | if (likely(bh) && !buffer_uptodate(bh)) | 1427 | if (likely(bh) && !buffer_uptodate(bh)) |
1421 | bh = __bread_slow(bh); | 1428 | bh = __bread_slow(bh); |
1422 | return bh; | 1429 | return bh; |
1423 | } | 1430 | } |
1424 | EXPORT_SYMBOL(__bread); | 1431 | EXPORT_SYMBOL(__bread_gfp); |
1425 | 1432 | ||
1426 | /* | 1433 | /* |
1427 | * invalidate_bh_lrus() is called rarely - but not only at unmount. | 1434 | * invalidate_bh_lrus() is called rarely - but not only at unmount. |