summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-bufio.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-21 13:40:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-21 13:40:37 -0400
commit3e414b5bd28f965fb39b9e9419d877df0cf3111a (patch)
tree5780a87d8e1b436eedeff6a7e6585cda75ddceaa /drivers/md/dm-bufio.c
parent018c6837f3e63b45163d55a1668d9f8e6fdecf6e (diff)
parentafa179eb603847494aa5061d4f501224a30dd187 (diff)
Merge tag 'for-5.4/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - crypto and DM crypt advances that allow the crypto API to reclaim implementation details that do not belong in DM crypt. The wrapper template for ESSIV generation that was factored out will also be used by fscrypt in the future. - Add root hash pkcs#7 signature verification to the DM verity target. - Add a new "clone" DM target that allows for efficient remote replication of a device. - Enhance DM bufio's cache to be tailored to each client based on use. Clients that make heavy use of the cache get more of it, and those that use less have reduced cache usage. - Add a new DM_GET_TARGET_VERSION ioctl to allow userspace to query the version number of a DM target (even if the associated module isn't yet loaded). - Fix invalid memory access in DM zoned target. - Fix the max_discard_sectors limit advertised by the DM raid target; it was mistakenly storing the limit in bytes rather than sectors. - Small optimizations and cleanups in DM writecache target. - Various fixes and cleanups in DM core, DM raid1 and space map portion of DM persistent data library. * tag 'for-5.4/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (22 commits) dm: introduce DM_GET_TARGET_VERSION dm bufio: introduce a global cache replacement dm bufio: remove old-style buffer cleanup dm bufio: introduce a global queue dm bufio: refactor adjust_total_allocated dm bufio: call adjust_total_allocated from __link_buffer and __unlink_buffer dm: add clone target dm raid: fix updating of max_discard_sectors limit dm writecache: skip writecache_wait for pmem mode dm stats: use struct_size() helper dm crypt: omit parsing of the encapsulated cipher dm crypt: switch to ESSIV crypto API template crypto: essiv - create wrapper template for ESSIV generation dm space map common: remove check for impossible sm_find_free() return value dm raid1: use struct_size() with kzalloc() dm writecache: optimize performance by sorting the blocks for writeback_all dm writecache: add unlikely for getting two block with same LBA dm writecache: remove unused member pointer in writeback_struct dm zoned: fix invalid memory access dm verity: add root hash pkcs#7 signature verification ...
Diffstat (limited to 'drivers/md/dm-bufio.c')
-rw-r--r--drivers/md/dm-bufio.c192
1 files changed, 119 insertions, 73 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 2a48ea3f1b30..2d519c223562 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -33,7 +33,8 @@
33 33
34#define DM_BUFIO_MEMORY_PERCENT 2 34#define DM_BUFIO_MEMORY_PERCENT 2
35#define DM_BUFIO_VMALLOC_PERCENT 25 35#define DM_BUFIO_VMALLOC_PERCENT 25
36#define DM_BUFIO_WRITEBACK_PERCENT 75 36#define DM_BUFIO_WRITEBACK_RATIO 3
37#define DM_BUFIO_LOW_WATERMARK_RATIO 16
37 38
38/* 39/*
39 * Check buffer ages in this interval (seconds) 40 * Check buffer ages in this interval (seconds)
@@ -132,12 +133,14 @@ enum data_mode {
132struct dm_buffer { 133struct dm_buffer {
133 struct rb_node node; 134 struct rb_node node;
134 struct list_head lru_list; 135 struct list_head lru_list;
136 struct list_head global_list;
135 sector_t block; 137 sector_t block;
136 void *data; 138 void *data;
137 unsigned char data_mode; /* DATA_MODE_* */ 139 unsigned char data_mode; /* DATA_MODE_* */
138 unsigned char list_mode; /* LIST_* */ 140 unsigned char list_mode; /* LIST_* */
139 blk_status_t read_error; 141 blk_status_t read_error;
140 blk_status_t write_error; 142 blk_status_t write_error;
143 unsigned accessed;
141 unsigned hold_count; 144 unsigned hold_count;
142 unsigned long state; 145 unsigned long state;
143 unsigned long last_accessed; 146 unsigned long last_accessed;
@@ -192,7 +195,11 @@ static unsigned long dm_bufio_cache_size;
192 */ 195 */
193static unsigned long dm_bufio_cache_size_latch; 196static unsigned long dm_bufio_cache_size_latch;
194 197
195static DEFINE_SPINLOCK(param_spinlock); 198static DEFINE_SPINLOCK(global_spinlock);
199
200static LIST_HEAD(global_queue);
201
202static unsigned long global_num = 0;
196 203
197/* 204/*
198 * Buffers are freed after this timeout 205 * Buffers are freed after this timeout
@@ -209,11 +216,6 @@ static unsigned long dm_bufio_current_allocated;
209/*----------------------------------------------------------------*/ 216/*----------------------------------------------------------------*/
210 217
211/* 218/*
212 * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count
213 */
214static unsigned long dm_bufio_cache_size_per_client;
215
216/*
217 * The current number of clients. 219 * The current number of clients.
218 */ 220 */
219static int dm_bufio_client_count; 221static int dm_bufio_client_count;
@@ -224,11 +226,15 @@ static int dm_bufio_client_count;
224static LIST_HEAD(dm_bufio_all_clients); 226static LIST_HEAD(dm_bufio_all_clients);
225 227
226/* 228/*
227 * This mutex protects dm_bufio_cache_size_latch, 229 * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count
228 * dm_bufio_cache_size_per_client and dm_bufio_client_count
229 */ 230 */
230static DEFINE_MUTEX(dm_bufio_clients_lock); 231static DEFINE_MUTEX(dm_bufio_clients_lock);
231 232
233static struct workqueue_struct *dm_bufio_wq;
234static struct delayed_work dm_bufio_cleanup_old_work;
235static struct work_struct dm_bufio_replacement_work;
236
237
232#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 238#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
233static void buffer_record_stack(struct dm_buffer *b) 239static void buffer_record_stack(struct dm_buffer *b)
234{ 240{
@@ -285,15 +291,23 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
285 291
286/*----------------------------------------------------------------*/ 292/*----------------------------------------------------------------*/
287 293
288static void adjust_total_allocated(unsigned char data_mode, long diff) 294static void adjust_total_allocated(struct dm_buffer *b, bool unlink)
289{ 295{
296 unsigned char data_mode;
297 long diff;
298
290 static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { 299 static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
291 &dm_bufio_allocated_kmem_cache, 300 &dm_bufio_allocated_kmem_cache,
292 &dm_bufio_allocated_get_free_pages, 301 &dm_bufio_allocated_get_free_pages,
293 &dm_bufio_allocated_vmalloc, 302 &dm_bufio_allocated_vmalloc,
294 }; 303 };
295 304
296 spin_lock(&param_spinlock); 305 data_mode = b->data_mode;
306 diff = (long)b->c->block_size;
307 if (unlink)
308 diff = -diff;
309
310 spin_lock(&global_spinlock);
297 311
298 *class_ptr[data_mode] += diff; 312 *class_ptr[data_mode] += diff;
299 313
@@ -302,7 +316,19 @@ static void adjust_total_allocated(unsigned char data_mode, long diff)
302 if (dm_bufio_current_allocated > dm_bufio_peak_allocated) 316 if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
303 dm_bufio_peak_allocated = dm_bufio_current_allocated; 317 dm_bufio_peak_allocated = dm_bufio_current_allocated;
304 318
305 spin_unlock(&param_spinlock); 319 b->accessed = 1;
320
321 if (!unlink) {
322 list_add(&b->global_list, &global_queue);
323 global_num++;
324 if (dm_bufio_current_allocated > dm_bufio_cache_size)
325 queue_work(dm_bufio_wq, &dm_bufio_replacement_work);
326 } else {
327 list_del(&b->global_list);
328 global_num--;
329 }
330
331 spin_unlock(&global_spinlock);
306} 332}
307 333
308/* 334/*
@@ -323,9 +349,6 @@ static void __cache_size_refresh(void)
323 dm_bufio_default_cache_size); 349 dm_bufio_default_cache_size);
324 dm_bufio_cache_size_latch = dm_bufio_default_cache_size; 350 dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
325 } 351 }
326
327 dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch /
328 (dm_bufio_client_count ? : 1);
329} 352}
330 353
331/* 354/*
@@ -431,8 +454,6 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
431 return NULL; 454 return NULL;
432 } 455 }
433 456
434 adjust_total_allocated(b->data_mode, (long)c->block_size);
435
436#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 457#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
437 b->stack_len = 0; 458 b->stack_len = 0;
438#endif 459#endif
@@ -446,8 +467,6 @@ static void free_buffer(struct dm_buffer *b)
446{ 467{
447 struct dm_bufio_client *c = b->c; 468 struct dm_bufio_client *c = b->c;
448 469
449 adjust_total_allocated(b->data_mode, -(long)c->block_size);
450
451 free_buffer_data(c, b->data, b->data_mode); 470 free_buffer_data(c, b->data, b->data_mode);
452 kmem_cache_free(c->slab_buffer, b); 471 kmem_cache_free(c->slab_buffer, b);
453} 472}
@@ -465,6 +484,8 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
465 list_add(&b->lru_list, &c->lru[dirty]); 484 list_add(&b->lru_list, &c->lru[dirty]);
466 __insert(b->c, b); 485 __insert(b->c, b);
467 b->last_accessed = jiffies; 486 b->last_accessed = jiffies;
487
488 adjust_total_allocated(b, false);
468} 489}
469 490
470/* 491/*
@@ -479,6 +500,8 @@ static void __unlink_buffer(struct dm_buffer *b)
479 c->n_buffers[b->list_mode]--; 500 c->n_buffers[b->list_mode]--;
480 __remove(b->c, b); 501 __remove(b->c, b);
481 list_del(&b->lru_list); 502 list_del(&b->lru_list);
503
504 adjust_total_allocated(b, true);
482} 505}
483 506
484/* 507/*
@@ -488,6 +511,8 @@ static void __relink_lru(struct dm_buffer *b, int dirty)
488{ 511{
489 struct dm_bufio_client *c = b->c; 512 struct dm_bufio_client *c = b->c;
490 513
514 b->accessed = 1;
515
491 BUG_ON(!c->n_buffers[b->list_mode]); 516 BUG_ON(!c->n_buffers[b->list_mode]);
492 517
493 c->n_buffers[b->list_mode]--; 518 c->n_buffers[b->list_mode]--;
@@ -907,36 +932,6 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
907} 932}
908 933
909/* 934/*
910 * Get writeback threshold and buffer limit for a given client.
911 */
912static void __get_memory_limit(struct dm_bufio_client *c,
913 unsigned long *threshold_buffers,
914 unsigned long *limit_buffers)
915{
916 unsigned long buffers;
917
918 if (unlikely(READ_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) {
919 if (mutex_trylock(&dm_bufio_clients_lock)) {
920 __cache_size_refresh();
921 mutex_unlock(&dm_bufio_clients_lock);
922 }
923 }
924
925 buffers = dm_bufio_cache_size_per_client;
926 if (likely(c->sectors_per_block_bits >= 0))
927 buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT;
928 else
929 buffers /= c->block_size;
930
931 if (buffers < c->minimum_buffers)
932 buffers = c->minimum_buffers;
933
934 *limit_buffers = buffers;
935 *threshold_buffers = mult_frac(buffers,
936 DM_BUFIO_WRITEBACK_PERCENT, 100);
937}
938
939/*
940 * Check if we're over watermark. 935 * Check if we're over watermark.
941 * If we are over threshold_buffers, start freeing buffers. 936 * If we are over threshold_buffers, start freeing buffers.
942 * If we're over "limit_buffers", block until we get under the limit. 937 * If we're over "limit_buffers", block until we get under the limit.
@@ -944,23 +939,7 @@ static void __get_memory_limit(struct dm_bufio_client *c,
944static void __check_watermark(struct dm_bufio_client *c, 939static void __check_watermark(struct dm_bufio_client *c,
945 struct list_head *write_list) 940 struct list_head *write_list)
946{ 941{
947 unsigned long threshold_buffers, limit_buffers; 942 if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO)
948
949 __get_memory_limit(c, &threshold_buffers, &limit_buffers);
950
951 while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] >
952 limit_buffers) {
953
954 struct dm_buffer *b = __get_unclaimed_buffer(c);
955
956 if (!b)
957 return;
958
959 __free_buffer_wake(b);
960 cond_resched();
961 }
962
963 if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
964 __write_dirty_buffers_async(c, 1, write_list); 943 __write_dirty_buffers_async(c, 1, write_list);
965} 944}
966 945
@@ -1841,6 +1820,74 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
1841 dm_bufio_unlock(c); 1820 dm_bufio_unlock(c);
1842} 1821}
1843 1822
1823static void do_global_cleanup(struct work_struct *w)
1824{
1825 struct dm_bufio_client *locked_client = NULL;
1826 struct dm_bufio_client *current_client;
1827 struct dm_buffer *b;
1828 unsigned spinlock_hold_count;
1829 unsigned long threshold = dm_bufio_cache_size -
1830 dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO;
1831 unsigned long loops = global_num * 2;
1832
1833 mutex_lock(&dm_bufio_clients_lock);
1834
1835 while (1) {
1836 cond_resched();
1837
1838 spin_lock(&global_spinlock);
1839 if (unlikely(dm_bufio_current_allocated <= threshold))
1840 break;
1841
1842 spinlock_hold_count = 0;
1843get_next:
1844 if (!loops--)
1845 break;
1846 if (unlikely(list_empty(&global_queue)))
1847 break;
1848 b = list_entry(global_queue.prev, struct dm_buffer, global_list);
1849
1850 if (b->accessed) {
1851 b->accessed = 0;
1852 list_move(&b->global_list, &global_queue);
1853 if (likely(++spinlock_hold_count < 16))
1854 goto get_next;
1855 spin_unlock(&global_spinlock);
1856 continue;
1857 }
1858
1859 current_client = b->c;
1860 if (unlikely(current_client != locked_client)) {
1861 if (locked_client)
1862 dm_bufio_unlock(locked_client);
1863
1864 if (!dm_bufio_trylock(current_client)) {
1865 spin_unlock(&global_spinlock);
1866 dm_bufio_lock(current_client);
1867 locked_client = current_client;
1868 continue;
1869 }
1870
1871 locked_client = current_client;
1872 }
1873
1874 spin_unlock(&global_spinlock);
1875
1876 if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) {
1877 spin_lock(&global_spinlock);
1878 list_move(&b->global_list, &global_queue);
1879 spin_unlock(&global_spinlock);
1880 }
1881 }
1882
1883 spin_unlock(&global_spinlock);
1884
1885 if (locked_client)
1886 dm_bufio_unlock(locked_client);
1887
1888 mutex_unlock(&dm_bufio_clients_lock);
1889}
1890
1844static void cleanup_old_buffers(void) 1891static void cleanup_old_buffers(void)
1845{ 1892{
1846 unsigned long max_age_hz = get_max_age_hz(); 1893 unsigned long max_age_hz = get_max_age_hz();
@@ -1856,14 +1903,11 @@ static void cleanup_old_buffers(void)
1856 mutex_unlock(&dm_bufio_clients_lock); 1903 mutex_unlock(&dm_bufio_clients_lock);
1857} 1904}
1858 1905
1859static struct workqueue_struct *dm_bufio_wq;
1860static struct delayed_work dm_bufio_work;
1861
1862static void work_fn(struct work_struct *w) 1906static void work_fn(struct work_struct *w)
1863{ 1907{
1864 cleanup_old_buffers(); 1908 cleanup_old_buffers();
1865 1909
1866 queue_delayed_work(dm_bufio_wq, &dm_bufio_work, 1910 queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
1867 DM_BUFIO_WORK_TIMER_SECS * HZ); 1911 DM_BUFIO_WORK_TIMER_SECS * HZ);
1868} 1912}
1869 1913
@@ -1905,8 +1949,9 @@ static int __init dm_bufio_init(void)
1905 if (!dm_bufio_wq) 1949 if (!dm_bufio_wq)
1906 return -ENOMEM; 1950 return -ENOMEM;
1907 1951
1908 INIT_DELAYED_WORK(&dm_bufio_work, work_fn); 1952 INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
1909 queue_delayed_work(dm_bufio_wq, &dm_bufio_work, 1953 INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
1954 queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
1910 DM_BUFIO_WORK_TIMER_SECS * HZ); 1955 DM_BUFIO_WORK_TIMER_SECS * HZ);
1911 1956
1912 return 0; 1957 return 0;
@@ -1919,7 +1964,8 @@ static void __exit dm_bufio_exit(void)
1919{ 1964{
1920 int bug = 0; 1965 int bug = 0;
1921 1966
1922 cancel_delayed_work_sync(&dm_bufio_work); 1967 cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
1968 flush_workqueue(dm_bufio_wq);
1923 destroy_workqueue(dm_bufio_wq); 1969 destroy_workqueue(dm_bufio_wq);
1924 1970
1925 if (dm_bufio_client_count) { 1971 if (dm_bufio_client_count) {