aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 18:32:19 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 18:32:19 -0500
commite2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
treeb97a90170c45211bcc437761653aa8016c34afcd /fs/buffer.c
parentabc36be236358162202e86ad88616ff95a755101 (diff)
parenta04b5de5050ab8b891128eb2c47a0916fe8622e1 (diff)
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe: "This is the main pull request for block storage for 4.15-rc1. Nothing out of the ordinary in here, and no API changes or anything like that. Just various new features for drivers, core changes, etc. In particular, this pull request contains: - A patch series from Bart, closing the whole on blk/scsi-mq queue quescing. - A series from Christoph, building towards hidden gendisks (for multipath) and ability to move bio chains around. - NVMe - Support for native multipath for NVMe (Christoph). - Userspace notifications for AENs (Keith). - Command side-effects support (Keith). - SGL support (Chaitanya Kulkarni) - FC fixes and improvements (James Smart) - Lots of fixes and tweaks (Various) - bcache - New maintainer (Michael Lyle) - Writeback control improvements (Michael) - Various fixes (Coly, Elena, Eric, Liang, et al) - lightnvm updates, mostly centered around the pblk interface (Javier, Hans, and Rakesh). - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph) - Writeback series that fix the much discussed hundreds of millions of sync-all units. This goes all the way, as discussed previously (me). - Fix for missing wakeup on writeback timer adjustments (Yafang Shao). - Fix laptop mode on blk-mq (me). - {mq,name} tupple lookup for IO schedulers, allowing us to have alias names. This means you can use 'deadline' on both !mq and on mq (where it's called mq-deadline). (me). - blktrace race fix, oopsing on sg load (me). - blk-mq optimizations (me). - Obscure waitqueue race fix for kyber (Omar). - NBD fixes (Josef). - Disable writeback throttling by default on bfq, like we do on cfq (Luca Miccio). - Series from Ming that enable us to treat flush requests on blk-mq like any other request. This is a really nice cleanup. - Series from Ming that improves merging on blk-mq with schedulers, getting us closer to flipping the switch on scsi-mq again. - BFQ updates (Paolo). - blk-mq atomic flags memory ordering fixes (Peter Z). - Loop cgroup support (Shaohua). - Lots of minor fixes from lots of different folks, both for core and driver code" * 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits) nvme: fix visibility of "uuid" ns attribute blk-mq: fixup some comment typos and lengths ide: ide-atapi: fix compile error with defining macro DEBUG blk-mq: improve tag waiting setup for non-shared tags brd: remove unused brd_mutex blk-mq: only run the hardware queue if IO is pending block: avoid null pointer dereference on null disk fs: guard_bio_eod() needs to consider partitions xtensa/simdisk: fix compile error nvme: expose subsys attribute to sysfs nvme: create 'slaves' and 'holders' entries for hidden controllers block: create 'slaves' and 'holders' entries for hidden gendisks nvme: also expose the namespace identification sysfs files for mpath nodes nvme: implement multipath access to nvme subsystems nvme: track shared namespaces nvme: introduce a nvme_ns_ids structure nvme: track subsystems block, nvme: Introduce blk_mq_req_flags_t block, scsi: Make SCSI quiesce and resume work reliably block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag ...
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c70
1 files changed, 19 insertions, 51 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 49b7e9bdcd1d..1c18a22a6013 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -253,27 +253,6 @@ out:
253} 253}
254 254
255/* 255/*
256 * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
257 */
258static void free_more_memory(void)
259{
260 struct zoneref *z;
261 int nid;
262
263 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
264 yield();
265
266 for_each_online_node(nid) {
267
268 z = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
269 gfp_zone(GFP_NOFS), NULL);
270 if (z->zone)
271 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
272 GFP_NOFS, NULL);
273 }
274}
275
276/*
277 * I/O completion handler for block_read_full_page() - pages 256 * I/O completion handler for block_read_full_page() - pages
278 * which come unlocked at the end of I/O. 257 * which come unlocked at the end of I/O.
279 */ 258 */
@@ -861,16 +840,19 @@ int remove_inode_buffers(struct inode *inode)
861 * which may not fail from ordinary buffer allocations. 840 * which may not fail from ordinary buffer allocations.
862 */ 841 */
863struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, 842struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
864 int retry) 843 bool retry)
865{ 844{
866 struct buffer_head *bh, *head; 845 struct buffer_head *bh, *head;
846 gfp_t gfp = GFP_NOFS;
867 long offset; 847 long offset;
868 848
869try_again: 849 if (retry)
850 gfp |= __GFP_NOFAIL;
851
870 head = NULL; 852 head = NULL;
871 offset = PAGE_SIZE; 853 offset = PAGE_SIZE;
872 while ((offset -= size) >= 0) { 854 while ((offset -= size) >= 0) {
873 bh = alloc_buffer_head(GFP_NOFS); 855 bh = alloc_buffer_head(gfp);
874 if (!bh) 856 if (!bh)
875 goto no_grow; 857 goto no_grow;
876 858
@@ -896,23 +878,7 @@ no_grow:
896 } while (head); 878 } while (head);
897 } 879 }
898 880
899 /* 881 return NULL;
900 * Return failure for non-async IO requests. Async IO requests
901 * are not allowed to fail, so we have to wait until buffer heads
902 * become available. But we don't want tasks sleeping with
903 * partially complete buffers, so all were released above.
904 */
905 if (!retry)
906 return NULL;
907
908 /* We're _really_ low on memory. Now we just
909 * wait for old buffer heads to become free due to
910 * finishing IO. Since this is an async request and
911 * the reserve list is empty, we're sure there are
912 * async buffer heads in use.
913 */
914 free_more_memory();
915 goto try_again;
916} 882}
917EXPORT_SYMBOL_GPL(alloc_page_buffers); 883EXPORT_SYMBOL_GPL(alloc_page_buffers);
918 884
@@ -1001,8 +967,6 @@ grow_dev_page(struct block_device *bdev, sector_t block,
1001 gfp_mask |= __GFP_NOFAIL; 967 gfp_mask |= __GFP_NOFAIL;
1002 968
1003 page = find_or_create_page(inode->i_mapping, index, gfp_mask); 969 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1004 if (!page)
1005 return ret;
1006 970
1007 BUG_ON(!PageLocked(page)); 971 BUG_ON(!PageLocked(page));
1008 972
@@ -1021,9 +985,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
1021 /* 985 /*
1022 * Allocate some buffers for this page 986 * Allocate some buffers for this page
1023 */ 987 */
1024 bh = alloc_page_buffers(page, size, 0); 988 bh = alloc_page_buffers(page, size, true);
1025 if (!bh)
1026 goto failed;
1027 989
1028 /* 990 /*
1029 * Link the page to the buffers and initialise them. Take the 991 * Link the page to the buffers and initialise them. Take the
@@ -1103,8 +1065,6 @@ __getblk_slow(struct block_device *bdev, sector_t block,
1103 ret = grow_buffers(bdev, block, size, gfp); 1065 ret = grow_buffers(bdev, block, size, gfp);
1104 if (ret < 0) 1066 if (ret < 0)
1105 return NULL; 1067 return NULL;
1106 if (ret == 0)
1107 free_more_memory();
1108 } 1068 }
1109} 1069}
1110 1070
@@ -1575,7 +1535,7 @@ void create_empty_buffers(struct page *page,
1575{ 1535{
1576 struct buffer_head *bh, *head, *tail; 1536 struct buffer_head *bh, *head, *tail;
1577 1537
1578 head = alloc_page_buffers(page, blocksize, 1); 1538 head = alloc_page_buffers(page, blocksize, true);
1579 bh = head; 1539 bh = head;
1580 do { 1540 do {
1581 bh->b_state |= b_state; 1541 bh->b_state |= b_state;
@@ -2639,7 +2599,7 @@ int nobh_write_begin(struct address_space *mapping,
2639 * Be careful: the buffer linked list is a NULL terminated one, rather 2599 * Be careful: the buffer linked list is a NULL terminated one, rather
2640 * than the circular one we're used to. 2600 * than the circular one we're used to.
2641 */ 2601 */
2642 head = alloc_page_buffers(page, blocksize, 0); 2602 head = alloc_page_buffers(page, blocksize, false);
2643 if (!head) { 2603 if (!head) {
2644 ret = -ENOMEM; 2604 ret = -ENOMEM;
2645 goto out_release; 2605 goto out_release;
@@ -3056,8 +3016,16 @@ void guard_bio_eod(int op, struct bio *bio)
3056 sector_t maxsector; 3016 sector_t maxsector;
3057 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; 3017 struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
3058 unsigned truncated_bytes; 3018 unsigned truncated_bytes;
3019 struct hd_struct *part;
3020
3021 rcu_read_lock();
3022 part = __disk_get_part(bio->bi_disk, bio->bi_partno);
3023 if (part)
3024 maxsector = part_nr_sects_read(part);
3025 else
3026 maxsector = get_capacity(bio->bi_disk);
3027 rcu_read_unlock();
3059 3028
3060 maxsector = get_capacity(bio->bi_disk);
3061 if (!maxsector) 3029 if (!maxsector)
3062 return; 3030 return;
3063 3031