aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-05 17:27:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-05 17:27:02 -0400
commit3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch)
tree22fbac64eb40a0b29bfa4c029695f39b2f591e62
parentdd972f924df6bdbc0ab185a38d5d2361dbc26311 (diff)
parentbc6d65e6dc89c3b7ff78e4ad797117c122ffde8e (diff)
Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "It's a pretty quiet round this time, which is nice. This contains: - series from Bart, cleaning up the way we set/test/clear atomic queue flags. - series from Bart, fixing races between gendisk and queue registration and removal. - set of bcache fixes and improvements from various folks, by way of Michael Lyle. - set of lightnvm updates from Matias, most of it being the 1.2 to 2.0 transition. - removal of unused DIO flags from Nikolay. - blk-mq/sbitmap memory ordering fixes from Omar. - divide-by-zero fix for BFQ from Paolo. - minor documentation patches from Randy. - timeout fix from Tejun. - Alpha "can't write a char atomically" fix from Mikulas. - set of NVMe fixes by way of Keith. - bsg and bsg-lib improvements from Christoph. - a few sed-opal fixes from Jonas. - cdrom check-disk-change deadlock fix from Maurizio. - various little fixes, comment fixes, etc from various folks" * tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits) blk-mq: Directly schedule q->timeout_work when aborting a request blktrace: fix comment in blktrace_api.h lightnvm: remove function name in strings lightnvm: pblk: remove some unnecessary NULL checks lightnvm: pblk: don't recover unwritten lines lightnvm: pblk: implement 2.0 support lightnvm: pblk: implement get log report chunk lightnvm: pblk: rename ppaf* to addrf* lightnvm: pblk: check for supported version lightnvm: implement get log report chunk helpers lightnvm: make address conversions depend on generic device lightnvm: add support for 2.0 address format lightnvm: normalize geometry nomenclature lightnvm: complete geo structure with maxoc* lightnvm: add shorten OCSSD version in geo lightnvm: add minor version to generic geometry lightnvm: simplify geometry structure lightnvm: pblk: refactor init/exit sequences lightnvm: Avoid validation of default op value lightnvm: centralize permission check for lightnvm ioctl ...
-rw-r--r--Documentation/cdrom/cdrom-standard.tex31
-rw-r--r--Documentation/fault-injection/fault-injection.txt8
-rw-r--r--Documentation/fault-injection/nvme-fault-injection.txt116
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c1
-rw-r--r--block/bfq-iosched.c25
-rw-r--r--block/bfq-iosched.h2
-rw-r--r--block/bio.c4
-rw-r--r--block/blk-cgroup.c78
-rw-r--r--block/blk-core.c250
-rw-r--r--block/blk-mq-debugfs.c150
-rw-r--r--block/blk-mq-pci.c6
-rw-r--r--block/blk-mq.c20
-rw-r--r--block/blk-settings.c6
-rw-r--r--block/blk-stat.c6
-rw-r--r--block/blk-sysfs.c29
-rw-r--r--block/blk-timeout.c8
-rw-r--r--block/blk-zoned.c4
-rw-r--r--block/blk.h69
-rw-r--r--block/bsg-lib.c165
-rw-r--r--block/bsg.c262
-rw-r--r--block/sed-opal.c37
-rw-r--r--drivers/block/brd.c1
-rw-r--r--drivers/block/drbd/drbd_main.c3
-rw-r--r--drivers/block/drbd/drbd_nl.c4
-rw-r--r--drivers/block/loop.c77
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c8
-rw-r--r--drivers/block/nbd.c8
-rw-r--r--drivers/block/null_blk.c124
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/block/rbd.c13
-rw-r--r--drivers/block/rsxx/dev.c6
-rw-r--r--drivers/block/skd_main.c4
-rw-r--r--drivers/block/umem.c7
-rw-r--r--drivers/block/xen-blkfront.c10
-rw-r--r--drivers/block/zram/zram_drv.c8
-rw-r--r--drivers/block/zram/zram_drv.h1
-rw-r--r--drivers/cdrom/cdrom.c3
-rw-r--r--drivers/cdrom/gdrom.c3
-rw-r--r--drivers/ide/ide-cd.c10
-rw-r--r--drivers/ide/ide-cd.h6
-rw-r--r--drivers/ide/ide-disk.c4
-rw-r--r--drivers/ide/ide-probe.c4
-rw-r--r--drivers/lightnvm/core.c240
-rw-r--r--drivers/lightnvm/pblk-cache.c4
-rw-r--r--drivers/lightnvm/pblk-core.c202
-rw-r--r--drivers/lightnvm/pblk-gc.c12
-rw-r--r--drivers/lightnvm/pblk-init.c820
-rw-r--r--drivers/lightnvm/pblk-map.c6
-rw-r--r--drivers/lightnvm/pblk-rb.c21
-rw-r--r--drivers/lightnvm/pblk-read.c2
-rw-r--r--drivers/lightnvm/pblk-recovery.c91
-rw-r--r--drivers/lightnvm/pblk-rl.c2
-rw-r--r--drivers/lightnvm/pblk-sysfs.c235
-rw-r--r--drivers/lightnvm/pblk-write.c2
-rw-r--r--drivers/lightnvm/pblk.h304
-rw-r--r--drivers/md/bcache/alloc.c3
-rw-r--r--drivers/md/bcache/bcache.h57
-rw-r--r--drivers/md/bcache/bset.c4
-rw-r--r--drivers/md/bcache/bset.h5
-rw-r--r--drivers/md/bcache/btree.c26
-rw-r--r--drivers/md/bcache/closure.c17
-rw-r--r--drivers/md/bcache/closure.h5
-rw-r--r--drivers/md/bcache/debug.c14
-rw-r--r--drivers/md/bcache/extents.c2
-rw-r--r--drivers/md/bcache/io.c16
-rw-r--r--drivers/md/bcache/journal.c8
-rw-r--r--drivers/md/bcache/request.c186
-rw-r--r--drivers/md/bcache/super.c160
-rw-r--r--drivers/md/bcache/sysfs.c55
-rw-r--r--drivers/md/bcache/util.c25
-rw-r--r--drivers/md/bcache/util.h6
-rw-r--r--drivers/md/bcache/writeback.c92
-rw-r--r--drivers/md/bcache/writeback.h4
-rw-r--r--drivers/md/dm-table.c16
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/md/md-linear.c4
-rw-r--r--drivers/md/md.c10
-rw-r--r--drivers/md/raid0.c4
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c4
-rw-r--r--drivers/misc/cardreader/rtsx_pcr.c4
-rw-r--r--drivers/mmc/core/block.c2
-rw-r--r--drivers/mmc/core/queue.c8
-rw-r--r--drivers/mtd/mtd_blkdevs.c6
-rw-r--r--drivers/nvdimm/blk.c2
-rw-r--r--drivers/nvdimm/btt.c2
-rw-r--r--drivers/nvdimm/nd.h1
-rw-r--r--drivers/nvdimm/pmem.c6
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c123
-rw-r--r--drivers/nvme/host/fault_inject.c79
-rw-r--r--drivers/nvme/host/fc.c36
-rw-r--r--drivers/nvme/host/lightnvm.c757
-rw-r--r--drivers/nvme/host/multipath.c8
-rw-r--r--drivers/nvme/host/nvme.h35
-rw-r--r--drivers/nvme/host/pci.c26
-rw-r--r--drivers/nvme/host/rdma.c34
-rw-r--r--drivers/nvme/target/configfs.c65
-rw-r--r--drivers/nvme/target/core.c12
-rw-r--r--drivers/nvme/target/discovery.c30
-rw-r--r--drivers/nvme/target/fc.c23
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/nvme/target/nvmet.h12
-rw-r--r--drivers/nvme/target/rdma.c72
-rw-r--r--drivers/s390/block/dasd.c4
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c4
-rw-r--r--drivers/s390/block/xpram.c4
-rw-r--r--drivers/s390/scsi/zfcp_fc.c4
-rw-r--r--drivers/scsi/gdth.h3
-rw-r--r--drivers/scsi/iscsi_tcp.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c2
-rw-r--r--drivers/scsi/scsi_debug.c2
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/scsi/scsi_sysfs.c3
-rw-r--r--drivers/scsi/scsi_transport_sas.c3
-rw-r--r--drivers/scsi/sd.c8
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c2
-rw-r--r--drivers/scsi/sr.c2
-rw-r--r--drivers/staging/rts5208/rtsx_chip.h12
-rw-r--r--drivers/staging/rts5208/rtsx_transport.c10
-rw-r--r--drivers/target/iscsi/iscsi_target.c28
-rw-r--r--drivers/target/loopback/tcm_loop.c2
-rw-r--r--fs/direct-io.c9
-rw-r--r--include/linux/blk-cgroup.h1
-rw-r--r--include/linux/blk-mq-pci.h3
-rw-r--r--include/linux/blk_types.h5
-rw-r--r--include/linux/blkdev.h121
-rw-r--r--include/linux/bsg-lib.h7
-rw-r--r--include/linux/bsg.h35
-rw-r--r--include/linux/device-mapper.h2
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/ide.h1
-rw-r--r--include/linux/inet.h1
-rw-r--r--include/linux/lightnvm.h334
-rw-r--r--include/linux/rtsx_pci.h12
-rw-r--r--include/linux/sbitmap.h8
-rw-r--r--include/linux/scatterlist.h23
-rw-r--r--include/uapi/linux/blktrace_api.h2
-rw-r--r--include/uapi/linux/msdos_fs.h2
-rw-r--r--lib/sbitmap.c10
-rw-r--r--mm/backing-dev.c2
-rw-r--r--net/core/utils.c23
148 files changed, 4266 insertions, 2060 deletions
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex
index 8f85b0e41046..f7cd455973f7 100644
--- a/Documentation/cdrom/cdrom-standard.tex
+++ b/Documentation/cdrom/cdrom-standard.tex
@@ -234,6 +234,7 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr
234 &int& (* open)(struct\ cdrom_device_info *, int)\cr 234 &int& (* open)(struct\ cdrom_device_info *, int)\cr
235 &void& (* release)(struct\ cdrom_device_info *);\cr 235 &void& (* release)(struct\ cdrom_device_info *);\cr
236 &int& (* drive_status)(struct\ cdrom_device_info *, int);\cr 236 &int& (* drive_status)(struct\ cdrom_device_info *, int);\cr
237 &unsigned\ int& (* check_events)(struct\ cdrom_device_info *, unsigned\ int, int);\cr
237 &int& (* media_changed)(struct\ cdrom_device_info *, int);\cr 238 &int& (* media_changed)(struct\ cdrom_device_info *, int);\cr
238 &int& (* tray_move)(struct\ cdrom_device_info *, int);\cr 239 &int& (* tray_move)(struct\ cdrom_device_info *, int);\cr
239 &int& (* lock_door)(struct\ cdrom_device_info *, int);\cr 240 &int& (* lock_door)(struct\ cdrom_device_info *, int);\cr
@@ -245,10 +246,9 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr
245 &int& (* reset)(struct\ cdrom_device_info *);\cr 246 &int& (* reset)(struct\ cdrom_device_info *);\cr
246 &int& (* audio_ioctl)(struct\ cdrom_device_info *, unsigned\ int, 247 &int& (* audio_ioctl)(struct\ cdrom_device_info *, unsigned\ int,
247 void *{});\cr 248 void *{});\cr
248 &int& (* dev_ioctl)(struct\ cdrom_device_info *, unsigned\ int,
249 unsigned\ long);\cr
250\noalign{\medskip} 249\noalign{\medskip}
251 &const\ int& capability;& capability flags \cr 250 &const\ int& capability;& capability flags \cr
251 &int& (* generic_packet)(struct\ cdrom_device_info *, struct\ packet_command *{});\cr
252\};\cr 252\};\cr
253} 253}
254$$ 254$$
@@ -274,19 +274,32 @@ $$
274\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}& 274\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
275 $/*$ \rm# $*/$\hfil\cr 275 $/*$ \rm# $*/$\hfil\cr
276struct& cdrom_device_info\ \{ \hidewidth\cr 276struct& cdrom_device_info\ \{ \hidewidth\cr
277 & struct\ cdrom_device_ops *& ops;& device operations for this major\cr 277 & const\ struct\ cdrom_device_ops *& ops;& device operations for this major\cr
278 & struct\ cdrom_device_info *& next;& next device_info for this major\cr 278 & struct\ list_head& list;& linked list of all device_info\cr
279 & struct\ gendisk *& disk;& matching block layer disk\cr
279 & void *& handle;& driver-dependent data\cr 280 & void *& handle;& driver-dependent data\cr
280\noalign{\medskip} 281\noalign{\medskip}
281 & kdev_t& dev;& device number (incorporates minor)\cr
282 & int& mask;& mask of capability: disables them \cr 282 & int& mask;& mask of capability: disables them \cr
283 & int& speed;& maximum speed for reading data \cr 283 & int& speed;& maximum speed for reading data \cr
284 & int& capacity;& number of discs in a jukebox \cr 284 & int& capacity;& number of discs in a jukebox \cr
285\noalign{\medskip} 285\noalign{\medskip}
286 &int& options : 30;& options flags \cr 286 &unsigned\ int& options : 30;& options flags \cr
287 &unsigned& mc_flags : 2;& media-change buffer flags \cr 287 &unsigned& mc_flags : 2;& media-change buffer flags \cr
288 &unsigned\ int& vfs_events;& cached events for vfs path\cr
289 &unsigned\ int& ioctl_events;& cached events for ioctl path\cr
288 & int& use_count;& number of times device is opened\cr 290 & int& use_count;& number of times device is opened\cr
289 & char& name[20];& name of the device type\cr 291 & char& name[20];& name of the device type\cr
292\noalign{\medskip}
293 &__u8& sanyo_slot : 2;& Sanyo 3-CD changer support\cr
294 &__u8& keeplocked : 1;& CDROM_LOCKDOOR status\cr
295 &__u8& reserved : 5;& not used yet\cr
296 & int& cdda_method;& see CDDA_* flags\cr
297 &__u8& last_sense;& saves last sense key\cr
298 &__u8& media_written;& dirty flag, DVD+RW bookkeeping\cr
299 &unsigned\ short& mmc3_profile;& current MMC3 profile\cr
300 & int& for_data;& unknown:TBD\cr
301 & int\ (* exit)\ (struct\ cdrom_device_info *);&& unknown:TBD\cr
302 & int& mrw_mode_page;& which MRW mode page is in use\cr
290\}\cr 303\}\cr
291}$$ 304}$$
292Using this $struct$, a linked list of the registered minor devices is 305Using this $struct$, a linked list of the registered minor devices is
@@ -298,9 +311,7 @@ The $mask$ flags can be used to mask out some of the capabilities listed
298in $ops\to capability$, if a specific drive doesn't support a feature 311in $ops\to capability$, if a specific drive doesn't support a feature
299of the driver. The value $speed$ specifies the maximum head-rate of the 312of the driver. The value $speed$ specifies the maximum head-rate of the
300drive, measured in units of normal audio speed (176\,kB/sec raw data or 313drive, measured in units of normal audio speed (176\,kB/sec raw data or
301150\,kB/sec file system data). The value $n_discs$ should reflect the 314150\,kB/sec file system data). The parameters are declared $const$
302number of discs the drive can hold simultaneously, if it is designed
303as a juke-box, or otherwise~1. The parameters are declared $const$
304because they describe properties of the drive, which don't change after 315because they describe properties of the drive, which don't change after
305registration. 316registration.
306 317
@@ -1002,7 +1013,7 @@ taken over the torch in maintaining \cdromc\ and integrating much
1002\cdrom-related code in the 2.1-kernel. Thanks to Scott Snyder and 1013\cdrom-related code in the 2.1-kernel. Thanks to Scott Snyder and
1003Gerd Knorr, who were the first to implement this interface for SCSI 1014Gerd Knorr, who were the first to implement this interface for SCSI
1004and IDE-CD drivers and added many ideas for extension of the data 1015and IDE-CD drivers and added many ideas for extension of the data
1005structures relative to kernel~2.0. Further thanks to Heiko Ei{\sz}feldt, 1016structures relative to kernel~2.0. Further thanks to Heiko Ei{\ss}feldt,
1006Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard M\"onkeberg and Andrew 1017Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard M\"onkeberg and Andrew
1007Kroll, the \linux\ \cdrom\ device driver developers who were kind 1018Kroll, the \linux\ \cdrom\ device driver developers who were kind
1008enough to give suggestions and criticisms during the writing. Finally 1019enough to give suggestions and criticisms during the writing. Finally
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index de1dc35fe500..4d1b7b4ccfaf 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -36,6 +36,14 @@ o fail_function
36 ALLOW_ERROR_INJECTION() macro, by setting debugfs entries 36 ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
37 under /sys/kernel/debug/fail_function. No boot option supported. 37 under /sys/kernel/debug/fail_function. No boot option supported.
38 38
39o NVMe fault injection
40
41 inject NVMe status code and retry flag on devices permitted by setting
42 debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default
43 status code is NVME_SC_INVALID_OPCODE with no retry. The status code and
44 retry flag can be set via the debugfs.
45
46
39Configure fault-injection capabilities behavior 47Configure fault-injection capabilities behavior
40----------------------------------------------- 48-----------------------------------------------
41 49
diff --git a/Documentation/fault-injection/nvme-fault-injection.txt b/Documentation/fault-injection/nvme-fault-injection.txt
new file mode 100644
index 000000000000..8fbf3bf60b62
--- /dev/null
+++ b/Documentation/fault-injection/nvme-fault-injection.txt
@@ -0,0 +1,116 @@
1NVMe Fault Injection
2====================
3Linux's fault injection framework provides a systematic way to support
4error injection via debugfs in the /sys/kernel/debug directory. When
5enabled, the default NVME_SC_INVALID_OPCODE with no retry will be
6injected into the nvme_end_request. Users can change the default status
7code and no retry flag via the debugfs. The list of Generic Command
8Status can be found in include/linux/nvme.h
9
10Following examples show how to inject an error into the nvme.
11
12First, enable CONFIG_FAULT_INJECTION_DEBUG_FS kernel config,
13recompile the kernel. After booting up the kernel, do the
14following.
15
16Example 1: Inject default status code with no retry
17---------------------------------------------------
18
19mount /dev/nvme0n1 /mnt
20echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
21echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
22cp a.file /mnt
23
24Expected Result:
25
26cp: cannot stat ‘/mnt/a.file’: Input/output error
27
28Message from dmesg:
29
30FAULT_INJECTION: forcing a failure.
31name fault_inject, interval 1, probability 100, space 0, times 1
32CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc8+ #2
33Hardware name: innotek GmbH VirtualBox/VirtualBox,
34BIOS VirtualBox 12/01/2006
35Call Trace:
36 <IRQ>
37 dump_stack+0x5c/0x7d
38 should_fail+0x148/0x170
39 nvme_should_fail+0x2f/0x50 [nvme_core]
40 nvme_process_cq+0xe7/0x1d0 [nvme]
41 nvme_irq+0x1e/0x40 [nvme]
42 __handle_irq_event_percpu+0x3a/0x190
43 handle_irq_event_percpu+0x30/0x70
44 handle_irq_event+0x36/0x60
45 handle_fasteoi_irq+0x78/0x120
46 handle_irq+0xa7/0x130
47 ? tick_irq_enter+0xa8/0xc0
48 do_IRQ+0x43/0xc0
49 common_interrupt+0xa2/0xa2
50 </IRQ>
51RIP: 0010:native_safe_halt+0x2/0x10
52RSP: 0018:ffffffff82003e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd
53RAX: ffffffff817a10c0 RBX: ffffffff82012480 RCX: 0000000000000000
54RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
55RBP: 0000000000000000 R08: 000000008e38ce64 R09: 0000000000000000
56R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff82012480
57R13: ffffffff82012480 R14: 0000000000000000 R15: 0000000000000000
58 ? __sched_text_end+0x4/0x4
59 default_idle+0x18/0xf0
60 do_idle+0x150/0x1d0
61 cpu_startup_entry+0x6f/0x80
62 start_kernel+0x4c4/0x4e4
63 ? set_init_arg+0x55/0x55
64 secondary_startup_64+0xa5/0xb0
65 print_req_error: I/O error, dev nvme0n1, sector 9240
66EXT4-fs error (device nvme0n1): ext4_find_entry:1436:
67inode #2: comm cp: reading directory lblock 0
68
69Example 2: Inject default status code with retry
70------------------------------------------------
71
72mount /dev/nvme0n1 /mnt
73echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
74echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
75echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/status
76echo 0 > /sys/kernel/debug/nvme0n1/fault_inject/dont_retry
77
78cp a.file /mnt
79
80Expected Result:
81
82command success without error
83
84Message from dmesg:
85
86FAULT_INJECTION: forcing a failure.
87name fault_inject, interval 1, probability 100, space 0, times 1
88CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc8+ #4
89Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
90Call Trace:
91 <IRQ>
92 dump_stack+0x5c/0x7d
93 should_fail+0x148/0x170
94 nvme_should_fail+0x30/0x60 [nvme_core]
95 nvme_loop_queue_response+0x84/0x110 [nvme_loop]
96 nvmet_req_complete+0x11/0x40 [nvmet]
97 nvmet_bio_done+0x28/0x40 [nvmet]
98 blk_update_request+0xb0/0x310
99 blk_mq_end_request+0x18/0x60
100 flush_smp_call_function_queue+0x3d/0xf0
101 smp_call_function_single_interrupt+0x2c/0xc0
102 call_function_single_interrupt+0xa2/0xb0
103 </IRQ>
104RIP: 0010:native_safe_halt+0x2/0x10
105RSP: 0018:ffffc9000068bec0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff04
106RAX: ffffffff817a10c0 RBX: ffff88011a3c9680 RCX: 0000000000000000
107RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
108RBP: 0000000000000001 R08: 000000008e38c131 R09: 0000000000000000
109R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011a3c9680
110R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000
111 ? __sched_text_end+0x4/0x4
112 default_idle+0x18/0xf0
113 do_idle+0x150/0x1d0
114 cpu_startup_entry+0x6f/0x80
115 start_secondary+0x187/0x1e0
116 secondary_startup_64+0xa5/0xb0
diff --git a/MAINTAINERS b/MAINTAINERS
index 9a820d2e3f4e..d95925d4e1f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2646,6 +2646,7 @@ L: linux-block@vger.kernel.org
2646T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 2646T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
2647S: Maintained 2647S: Maintained
2648F: block/ 2648F: block/
2649F: drivers/block/
2649F: kernel/trace/blktrace.c 2650F: kernel/trace/blktrace.c
2650F: lib/sbitmap.c 2651F: lib/sbitmap.c
2651 2652
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 1b6418407467..026211e7ab09 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -21,7 +21,6 @@
21#include <platform/simcall.h> 21#include <platform/simcall.h>
22 22
23#define SIMDISK_MAJOR 240 23#define SIMDISK_MAJOR 240
24#define SECTOR_SHIFT 9
25#define SIMDISK_MINORS 1 24#define SIMDISK_MINORS 1
26#define MAX_SIMDISK_COUNT 10 25#define MAX_SIMDISK_COUNT 10
27 26
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index aeca22d91101..f0ecd98509d8 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -201,7 +201,20 @@ static struct kmem_cache *bfq_pool;
201/* Target observation time interval for a peak-rate update (ns) */ 201/* Target observation time interval for a peak-rate update (ns) */
202#define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC 202#define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC
203 203
204/* Shift used for peak rate fixed precision calculations. */ 204/*
205 * Shift used for peak-rate fixed precision calculations.
206 * With
207 * - the current shift: 16 positions
208 * - the current type used to store rate: u32
209 * - the current unit of measure for rate: [sectors/usec], or, more precisely,
210 * [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift,
211 * the range of rates that can be stored is
212 * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec =
213 * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec =
214 * [15, 65G] sectors/sec
215 * Which, assuming a sector size of 512B, corresponds to a range of
216 * [7.5K, 33T] B/sec
217 */
205#define BFQ_RATE_SHIFT 16 218#define BFQ_RATE_SHIFT 16
206 219
207/* 220/*
@@ -2637,6 +2650,16 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
2637 rate /= divisor; /* smoothing constant alpha = 1/divisor */ 2650 rate /= divisor; /* smoothing constant alpha = 1/divisor */
2638 2651
2639 bfqd->peak_rate += rate; 2652 bfqd->peak_rate += rate;
2653
2654 /*
2655 * For a very slow device, bfqd->peak_rate can reach 0 (see
2656 * the minimum representable values reported in the comments
2657 * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid
2658 * divisions by zero where bfqd->peak_rate is used as a
2659 * divisor.
2660 */
2661 bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate);
2662
2640 update_thr_responsiveness_params(bfqd); 2663 update_thr_responsiveness_params(bfqd);
2641 2664
2642reset_computation: 2665reset_computation:
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 350c39ae2896..ae2f3dadec44 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -499,7 +499,7 @@ struct bfq_data {
499 u64 delta_from_first; 499 u64 delta_from_first;
500 /* 500 /*
501 * Current estimate of the device peak rate, measured in 501 * Current estimate of the device peak rate, measured in
502 * [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by 502 * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
503 * BFQ_RATE_SHIFT is performed to increase precision in 503 * BFQ_RATE_SHIFT is performed to increase precision in
504 * fixed-point calculations. 504 * fixed-point calculations.
505 */ 505 */
diff --git a/block/bio.c b/block/bio.c
index e1708db48258..53e0f0a1ed94 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -43,9 +43,9 @@
43 * break badly! cannot be bigger than what you can fit into an 43 * break badly! cannot be bigger than what you can fit into an
44 * unsigned short 44 * unsigned short
45 */ 45 */
46#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 46#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
47static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { 47static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
48 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 48 BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
49}; 49};
50#undef BV 50#undef BV
51 51
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c2033a232a44..1c16694ae145 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -307,11 +307,28 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
307 } 307 }
308} 308}
309 309
310static void blkg_pd_offline(struct blkcg_gq *blkg)
311{
312 int i;
313
314 lockdep_assert_held(blkg->q->queue_lock);
315 lockdep_assert_held(&blkg->blkcg->lock);
316
317 for (i = 0; i < BLKCG_MAX_POLS; i++) {
318 struct blkcg_policy *pol = blkcg_policy[i];
319
320 if (blkg->pd[i] && !blkg->pd[i]->offline &&
321 pol->pd_offline_fn) {
322 pol->pd_offline_fn(blkg->pd[i]);
323 blkg->pd[i]->offline = true;
324 }
325 }
326}
327
310static void blkg_destroy(struct blkcg_gq *blkg) 328static void blkg_destroy(struct blkcg_gq *blkg)
311{ 329{
312 struct blkcg *blkcg = blkg->blkcg; 330 struct blkcg *blkcg = blkg->blkcg;
313 struct blkcg_gq *parent = blkg->parent; 331 struct blkcg_gq *parent = blkg->parent;
314 int i;
315 332
316 lockdep_assert_held(blkg->q->queue_lock); 333 lockdep_assert_held(blkg->q->queue_lock);
317 lockdep_assert_held(&blkcg->lock); 334 lockdep_assert_held(&blkcg->lock);
@@ -320,13 +337,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
320 WARN_ON_ONCE(list_empty(&blkg->q_node)); 337 WARN_ON_ONCE(list_empty(&blkg->q_node));
321 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); 338 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
322 339
323 for (i = 0; i < BLKCG_MAX_POLS; i++) {
324 struct blkcg_policy *pol = blkcg_policy[i];
325
326 if (blkg->pd[i] && pol->pd_offline_fn)
327 pol->pd_offline_fn(blkg->pd[i]);
328 }
329
330 if (parent) { 340 if (parent) {
331 blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes); 341 blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
332 blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios); 342 blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -369,6 +379,7 @@ static void blkg_destroy_all(struct request_queue *q)
369 struct blkcg *blkcg = blkg->blkcg; 379 struct blkcg *blkcg = blkg->blkcg;
370 380
371 spin_lock(&blkcg->lock); 381 spin_lock(&blkcg->lock);
382 blkg_pd_offline(blkg);
372 blkg_destroy(blkg); 383 blkg_destroy(blkg);
373 spin_unlock(&blkcg->lock); 384 spin_unlock(&blkcg->lock);
374 } 385 }
@@ -995,25 +1006,25 @@ static struct cftype blkcg_legacy_files[] = {
995 * @css: css of interest 1006 * @css: css of interest
996 * 1007 *
997 * This function is called when @css is about to go away and responsible 1008 * This function is called when @css is about to go away and responsible
998 * for shooting down all blkgs associated with @css. blkgs should be 1009 * for offlining all blkgs pd and killing all wbs associated with @css.
999 * removed while holding both q and blkcg locks. As blkcg lock is nested 1010 * blkgs pd offline should be done while holding both q and blkcg locks.
1000 * inside q lock, this function performs reverse double lock dancing. 1011 * As blkcg lock is nested inside q lock, this function performs reverse
1012 * double lock dancing.
1001 * 1013 *
1002 * This is the blkcg counterpart of ioc_release_fn(). 1014 * This is the blkcg counterpart of ioc_release_fn().
1003 */ 1015 */
1004static void blkcg_css_offline(struct cgroup_subsys_state *css) 1016static void blkcg_css_offline(struct cgroup_subsys_state *css)
1005{ 1017{
1006 struct blkcg *blkcg = css_to_blkcg(css); 1018 struct blkcg *blkcg = css_to_blkcg(css);
1019 struct blkcg_gq *blkg;
1007 1020
1008 spin_lock_irq(&blkcg->lock); 1021 spin_lock_irq(&blkcg->lock);
1009 1022
1010 while (!hlist_empty(&blkcg->blkg_list)) { 1023 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1011 struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
1012 struct blkcg_gq, blkcg_node);
1013 struct request_queue *q = blkg->q; 1024 struct request_queue *q = blkg->q;
1014 1025
1015 if (spin_trylock(q->queue_lock)) { 1026 if (spin_trylock(q->queue_lock)) {
1016 blkg_destroy(blkg); 1027 blkg_pd_offline(blkg);
1017 spin_unlock(q->queue_lock); 1028 spin_unlock(q->queue_lock);
1018 } else { 1029 } else {
1019 spin_unlock_irq(&blkcg->lock); 1030 spin_unlock_irq(&blkcg->lock);
@@ -1027,11 +1038,43 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
1027 wb_blkcg_offline(blkcg); 1038 wb_blkcg_offline(blkcg);
1028} 1039}
1029 1040
1041/**
1042 * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
1043 * @blkcg: blkcg of interest
1044 *
1045 * This function is called when blkcg css is about to free and responsible for
1046 * destroying all blkgs associated with @blkcg.
1047 * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
1048 * is nested inside q lock, this function performs reverse double lock dancing.
1049 */
1050static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
1051{
1052 spin_lock_irq(&blkcg->lock);
1053 while (!hlist_empty(&blkcg->blkg_list)) {
1054 struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
1055 struct blkcg_gq,
1056 blkcg_node);
1057 struct request_queue *q = blkg->q;
1058
1059 if (spin_trylock(q->queue_lock)) {
1060 blkg_destroy(blkg);
1061 spin_unlock(q->queue_lock);
1062 } else {
1063 spin_unlock_irq(&blkcg->lock);
1064 cpu_relax();
1065 spin_lock_irq(&blkcg->lock);
1066 }
1067 }
1068 spin_unlock_irq(&blkcg->lock);
1069}
1070
1030static void blkcg_css_free(struct cgroup_subsys_state *css) 1071static void blkcg_css_free(struct cgroup_subsys_state *css)
1031{ 1072{
1032 struct blkcg *blkcg = css_to_blkcg(css); 1073 struct blkcg *blkcg = css_to_blkcg(css);
1033 int i; 1074 int i;
1034 1075
1076 blkcg_destroy_all_blkgs(blkcg);
1077
1035 mutex_lock(&blkcg_pol_mutex); 1078 mutex_lock(&blkcg_pol_mutex);
1036 1079
1037 list_del(&blkcg->all_blkcgs_node); 1080 list_del(&blkcg->all_blkcgs_node);
@@ -1371,8 +1414,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
1371 spin_lock(&blkg->blkcg->lock); 1414 spin_lock(&blkg->blkcg->lock);
1372 1415
1373 if (blkg->pd[pol->plid]) { 1416 if (blkg->pd[pol->plid]) {
1374 if (pol->pd_offline_fn) 1417 if (!blkg->pd[pol->plid]->offline &&
1418 pol->pd_offline_fn) {
1375 pol->pd_offline_fn(blkg->pd[pol->plid]); 1419 pol->pd_offline_fn(blkg->pd[pol->plid]);
1420 blkg->pd[pol->plid]->offline = true;
1421 }
1376 pol->pd_free_fn(blkg->pd[pol->plid]); 1422 pol->pd_free_fn(blkg->pd[pol->plid]);
1377 blkg->pd[pol->plid] = NULL; 1423 blkg->pd[pol->plid] = NULL;
1378 } 1424 }
diff --git a/block/blk-core.c b/block/blk-core.c
index 6d82c4f7fadd..abcb8684ba67 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -71,6 +71,78 @@ struct kmem_cache *blk_requestq_cachep;
71 */ 71 */
72static struct workqueue_struct *kblockd_workqueue; 72static struct workqueue_struct *kblockd_workqueue;
73 73
74/**
75 * blk_queue_flag_set - atomically set a queue flag
76 * @flag: flag to be set
77 * @q: request queue
78 */
79void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
80{
81 unsigned long flags;
82
83 spin_lock_irqsave(q->queue_lock, flags);
84 queue_flag_set(flag, q);
85 spin_unlock_irqrestore(q->queue_lock, flags);
86}
87EXPORT_SYMBOL(blk_queue_flag_set);
88
89/**
90 * blk_queue_flag_clear - atomically clear a queue flag
91 * @flag: flag to be cleared
92 * @q: request queue
93 */
94void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
95{
96 unsigned long flags;
97
98 spin_lock_irqsave(q->queue_lock, flags);
99 queue_flag_clear(flag, q);
100 spin_unlock_irqrestore(q->queue_lock, flags);
101}
102EXPORT_SYMBOL(blk_queue_flag_clear);
103
104/**
105 * blk_queue_flag_test_and_set - atomically test and set a queue flag
106 * @flag: flag to be set
107 * @q: request queue
108 *
109 * Returns the previous value of @flag - 0 if the flag was not set and 1 if
110 * the flag was already set.
111 */
112bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
113{
114 unsigned long flags;
115 bool res;
116
117 spin_lock_irqsave(q->queue_lock, flags);
118 res = queue_flag_test_and_set(flag, q);
119 spin_unlock_irqrestore(q->queue_lock, flags);
120
121 return res;
122}
123EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
124
125/**
126 * blk_queue_flag_test_and_clear - atomically test and clear a queue flag
127 * @flag: flag to be cleared
128 * @q: request queue
129 *
130 * Returns the previous value of @flag - 0 if the flag was not set and 1 if
131 * the flag was set.
132 */
133bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
134{
135 unsigned long flags;
136 bool res;
137
138 spin_lock_irqsave(q->queue_lock, flags);
139 res = queue_flag_test_and_clear(flag, q);
140 spin_unlock_irqrestore(q->queue_lock, flags);
141
142 return res;
143}
144EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
145
74static void blk_clear_congested(struct request_list *rl, int sync) 146static void blk_clear_congested(struct request_list *rl, int sync)
75{ 147{
76#ifdef CONFIG_CGROUP_WRITEBACK 148#ifdef CONFIG_CGROUP_WRITEBACK
@@ -361,25 +433,14 @@ EXPORT_SYMBOL(blk_sync_queue);
361 */ 433 */
362int blk_set_preempt_only(struct request_queue *q) 434int blk_set_preempt_only(struct request_queue *q)
363{ 435{
364 unsigned long flags; 436 return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
365 int res;
366
367 spin_lock_irqsave(q->queue_lock, flags);
368 res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
369 spin_unlock_irqrestore(q->queue_lock, flags);
370
371 return res;
372} 437}
373EXPORT_SYMBOL_GPL(blk_set_preempt_only); 438EXPORT_SYMBOL_GPL(blk_set_preempt_only);
374 439
375void blk_clear_preempt_only(struct request_queue *q) 440void blk_clear_preempt_only(struct request_queue *q)
376{ 441{
377 unsigned long flags; 442 blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
378
379 spin_lock_irqsave(q->queue_lock, flags);
380 queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
381 wake_up_all(&q->mq_freeze_wq); 443 wake_up_all(&q->mq_freeze_wq);
382 spin_unlock_irqrestore(q->queue_lock, flags);
383} 444}
384EXPORT_SYMBOL_GPL(blk_clear_preempt_only); 445EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
385 446
@@ -629,9 +690,7 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
629 690
630void blk_set_queue_dying(struct request_queue *q) 691void blk_set_queue_dying(struct request_queue *q)
631{ 692{
632 spin_lock_irq(q->queue_lock); 693 blk_queue_flag_set(QUEUE_FLAG_DYING, q);
633 queue_flag_set(QUEUE_FLAG_DYING, q);
634 spin_unlock_irq(q->queue_lock);
635 694
636 /* 695 /*
637 * When queue DYING flag is set, we need to block new req 696 * When queue DYING flag is set, we need to block new req
@@ -719,6 +778,37 @@ void blk_cleanup_queue(struct request_queue *q)
719 del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); 778 del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
720 blk_sync_queue(q); 779 blk_sync_queue(q);
721 780
781 /*
782 * I/O scheduler exit is only safe after the sysfs scheduler attribute
783 * has been removed.
784 */
785 WARN_ON_ONCE(q->kobj.state_in_sysfs);
786
787 /*
788 * Since the I/O scheduler exit code may access cgroup information,
789 * perform I/O scheduler exit before disassociating from the block
790 * cgroup controller.
791 */
792 if (q->elevator) {
793 ioc_clear_queue(q);
794 elevator_exit(q, q->elevator);
795 q->elevator = NULL;
796 }
797
798 /*
799 * Remove all references to @q from the block cgroup controller before
800 * restoring @q->queue_lock to avoid that restoring this pointer causes
801 * e.g. blkcg_print_blkgs() to crash.
802 */
803 blkcg_exit_queue(q);
804
805 /*
806 * Since the cgroup code may dereference the @q->backing_dev_info
807 * pointer, only decrease its reference count after having removed the
808 * association with the block cgroup controller.
809 */
810 bdi_put(q->backing_dev_info);
811
722 if (q->mq_ops) 812 if (q->mq_ops)
723 blk_mq_free_queue(q); 813 blk_mq_free_queue(q);
724 percpu_ref_exit(&q->q_usage_counter); 814 percpu_ref_exit(&q->q_usage_counter);
@@ -810,7 +900,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl)
810 900
811struct request_queue *blk_alloc_queue(gfp_t gfp_mask) 901struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
812{ 902{
813 return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE); 903 return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
814} 904}
815EXPORT_SYMBOL(blk_alloc_queue); 905EXPORT_SYMBOL(blk_alloc_queue);
816 906
@@ -827,7 +917,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
827 bool success = false; 917 bool success = false;
828 int ret; 918 int ret;
829 919
830 rcu_read_lock_sched(); 920 rcu_read_lock();
831 if (percpu_ref_tryget_live(&q->q_usage_counter)) { 921 if (percpu_ref_tryget_live(&q->q_usage_counter)) {
832 /* 922 /*
833 * The code that sets the PREEMPT_ONLY flag is 923 * The code that sets the PREEMPT_ONLY flag is
@@ -840,7 +930,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
840 percpu_ref_put(&q->q_usage_counter); 930 percpu_ref_put(&q->q_usage_counter);
841 } 931 }
842 } 932 }
843 rcu_read_unlock_sched(); 933 rcu_read_unlock();
844 934
845 if (success) 935 if (success)
846 return 0; 936 return 0;
@@ -888,7 +978,21 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
888 kblockd_schedule_work(&q->timeout_work); 978 kblockd_schedule_work(&q->timeout_work);
889} 979}
890 980
891struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 981/**
982 * blk_alloc_queue_node - allocate a request queue
983 * @gfp_mask: memory allocation flags
984 * @node_id: NUMA node to allocate memory from
985 * @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
986 * serialize calls to the legacy .request_fn() callback. Ignored for
987 * blk-mq request queues.
988 *
989 * Note: pass the queue lock as the third argument to this function instead of
990 * setting the queue lock pointer explicitly to avoid triggering a sporadic
991 * crash in the blkcg code. This function namely calls blkcg_init_queue() and
992 * the queue lock pointer must be set before blkcg_init_queue() is called.
993 */
994struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
995 spinlock_t *lock)
892{ 996{
893 struct request_queue *q; 997 struct request_queue *q;
894 998
@@ -939,11 +1043,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
939 mutex_init(&q->sysfs_lock); 1043 mutex_init(&q->sysfs_lock);
940 spin_lock_init(&q->__queue_lock); 1044 spin_lock_init(&q->__queue_lock);
941 1045
942 /* 1046 if (!q->mq_ops)
943 * By default initialize queue_lock to internal lock and driver can 1047 q->queue_lock = lock ? : &q->__queue_lock;
944 * override it later if need be.
945 */
946 q->queue_lock = &q->__queue_lock;
947 1048
948 /* 1049 /*
949 * A queue starts its life with bypass turned on to avoid 1050 * A queue starts its life with bypass turned on to avoid
@@ -952,7 +1053,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
952 * registered by blk_register_queue(). 1053 * registered by blk_register_queue().
953 */ 1054 */
954 q->bypass_depth = 1; 1055 q->bypass_depth = 1;
955 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); 1056 queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
956 1057
957 init_waitqueue_head(&q->mq_freeze_wq); 1058 init_waitqueue_head(&q->mq_freeze_wq);
958 1059
@@ -1030,13 +1131,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1030{ 1131{
1031 struct request_queue *q; 1132 struct request_queue *q;
1032 1133
1033 q = blk_alloc_queue_node(GFP_KERNEL, node_id); 1134 q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
1034 if (!q) 1135 if (!q)
1035 return NULL; 1136 return NULL;
1036 1137
1037 q->request_fn = rfn; 1138 q->request_fn = rfn;
1038 if (lock)
1039 q->queue_lock = lock;
1040 if (blk_init_allocated_queue(q) < 0) { 1139 if (blk_init_allocated_queue(q) < 0) {
1041 blk_cleanup_queue(q); 1140 blk_cleanup_queue(q);
1042 return NULL; 1141 return NULL;
@@ -2023,7 +2122,7 @@ out_unlock:
2023 return BLK_QC_T_NONE; 2122 return BLK_QC_T_NONE;
2024} 2123}
2025 2124
2026static void handle_bad_sector(struct bio *bio) 2125static void handle_bad_sector(struct bio *bio, sector_t maxsector)
2027{ 2126{
2028 char b[BDEVNAME_SIZE]; 2127 char b[BDEVNAME_SIZE];
2029 2128
@@ -2031,7 +2130,7 @@ static void handle_bad_sector(struct bio *bio)
2031 printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", 2130 printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
2032 bio_devname(bio, b), bio->bi_opf, 2131 bio_devname(bio, b), bio->bi_opf,
2033 (unsigned long long)bio_end_sector(bio), 2132 (unsigned long long)bio_end_sector(bio),
2034 (long long)get_capacity(bio->bi_disk)); 2133 (long long)maxsector);
2035} 2134}
2036 2135
2037#ifdef CONFIG_FAIL_MAKE_REQUEST 2136#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -2093,67 +2192,58 @@ static noinline int should_fail_bio(struct bio *bio)
2093ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO); 2192ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
2094 2193
2095/* 2194/*
2195 * Check whether this bio extends beyond the end of the device or partition.
2196 * This may well happen - the kernel calls bread() without checking the size of
2197 * the device, e.g., when mounting a file system.
2198 */
2199static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
2200{
2201 unsigned int nr_sectors = bio_sectors(bio);
2202
2203 if (nr_sectors && maxsector &&
2204 (nr_sectors > maxsector ||
2205 bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
2206 handle_bad_sector(bio, maxsector);
2207 return -EIO;
2208 }
2209 return 0;
2210}
2211
2212/*
2096 * Remap block n of partition p to block n+start(p) of the disk. 2213 * Remap block n of partition p to block n+start(p) of the disk.
2097 */ 2214 */
2098static inline int blk_partition_remap(struct bio *bio) 2215static inline int blk_partition_remap(struct bio *bio)
2099{ 2216{
2100 struct hd_struct *p; 2217 struct hd_struct *p;
2101 int ret = 0; 2218 int ret = -EIO;
2102 2219
2103 rcu_read_lock(); 2220 rcu_read_lock();
2104 p = __disk_get_part(bio->bi_disk, bio->bi_partno); 2221 p = __disk_get_part(bio->bi_disk, bio->bi_partno);
2105 if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) || 2222 if (unlikely(!p))
2106 bio_check_ro(bio, p))) { 2223 goto out;
2107 ret = -EIO; 2224 if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
2225 goto out;
2226 if (unlikely(bio_check_ro(bio, p)))
2108 goto out; 2227 goto out;
2109 }
2110 2228
2111 /* 2229 /*
2112 * Zone reset does not include bi_size so bio_sectors() is always 0. 2230 * Zone reset does not include bi_size so bio_sectors() is always 0.
2113 * Include a test for the reset op code and perform the remap if needed. 2231 * Include a test for the reset op code and perform the remap if needed.
2114 */ 2232 */
2115 if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET) 2233 if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
2116 goto out; 2234 if (bio_check_eod(bio, part_nr_sects_read(p)))
2117 2235 goto out;
2118 bio->bi_iter.bi_sector += p->start_sect; 2236 bio->bi_iter.bi_sector += p->start_sect;
2119 bio->bi_partno = 0; 2237 bio->bi_partno = 0;
2120 trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), 2238 trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
2121 bio->bi_iter.bi_sector - p->start_sect); 2239 bio->bi_iter.bi_sector - p->start_sect);
2122 2240 }
2241 ret = 0;
2123out: 2242out:
2124 rcu_read_unlock(); 2243 rcu_read_unlock();
2125 return ret; 2244 return ret;
2126} 2245}
2127 2246
2128/*
2129 * Check whether this bio extends beyond the end of the device.
2130 */
2131static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
2132{
2133 sector_t maxsector;
2134
2135 if (!nr_sectors)
2136 return 0;
2137
2138 /* Test device or partition size, when known. */
2139 maxsector = get_capacity(bio->bi_disk);
2140 if (maxsector) {
2141 sector_t sector = bio->bi_iter.bi_sector;
2142
2143 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
2144 /*
2145 * This may well happen - the kernel calls bread()
2146 * without checking the size of the device, e.g., when
2147 * mounting a device.
2148 */
2149 handle_bad_sector(bio);
2150 return 1;
2151 }
2152 }
2153
2154 return 0;
2155}
2156
2157static noinline_for_stack bool 2247static noinline_for_stack bool
2158generic_make_request_checks(struct bio *bio) 2248generic_make_request_checks(struct bio *bio)
2159{ 2249{
@@ -2164,9 +2254,6 @@ generic_make_request_checks(struct bio *bio)
2164 2254
2165 might_sleep(); 2255 might_sleep();
2166 2256
2167 if (bio_check_eod(bio, nr_sectors))
2168 goto end_io;
2169
2170 q = bio->bi_disk->queue; 2257 q = bio->bi_disk->queue;
2171 if (unlikely(!q)) { 2258 if (unlikely(!q)) {
2172 printk(KERN_ERR 2259 printk(KERN_ERR
@@ -2186,17 +2273,16 @@ generic_make_request_checks(struct bio *bio)
2186 if (should_fail_bio(bio)) 2273 if (should_fail_bio(bio))
2187 goto end_io; 2274 goto end_io;
2188 2275
2189 if (!bio->bi_partno) { 2276 if (bio->bi_partno) {
2190 if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) 2277 if (unlikely(blk_partition_remap(bio)))
2191 goto end_io; 2278 goto end_io;
2192 } else { 2279 } else {
2193 if (blk_partition_remap(bio)) 2280 if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
2281 goto end_io;
2282 if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
2194 goto end_io; 2283 goto end_io;
2195 } 2284 }
2196 2285
2197 if (bio_check_eod(bio, nr_sectors))
2198 goto end_io;
2199
2200 /* 2286 /*
2201 * Filter flush bio's early so that make_request based 2287 * Filter flush bio's early so that make_request based
2202 * drivers without flush support don't have to worry 2288 * drivers without flush support don't have to worry
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 21cbc1f071c6..58b3b79cbe83 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -24,6 +24,64 @@
24#include "blk-mq-debugfs.h" 24#include "blk-mq-debugfs.h"
25#include "blk-mq-tag.h" 25#include "blk-mq-tag.h"
26 26
27static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
28{
29 if (stat->nr_samples) {
30 seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
31 stat->nr_samples, stat->mean, stat->min, stat->max);
32 } else {
33 seq_puts(m, "samples=0");
34 }
35}
36
37static int queue_poll_stat_show(void *data, struct seq_file *m)
38{
39 struct request_queue *q = data;
40 int bucket;
41
42 for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
43 seq_printf(m, "read (%d Bytes): ", 1 << (9+bucket));
44 print_stat(m, &q->poll_stat[2*bucket]);
45 seq_puts(m, "\n");
46
47 seq_printf(m, "write (%d Bytes): ", 1 << (9+bucket));
48 print_stat(m, &q->poll_stat[2*bucket+1]);
49 seq_puts(m, "\n");
50 }
51 return 0;
52}
53
54static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
55 __acquires(&q->requeue_lock)
56{
57 struct request_queue *q = m->private;
58
59 spin_lock_irq(&q->requeue_lock);
60 return seq_list_start(&q->requeue_list, *pos);
61}
62
63static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
64{
65 struct request_queue *q = m->private;
66
67 return seq_list_next(v, &q->requeue_list, pos);
68}
69
70static void queue_requeue_list_stop(struct seq_file *m, void *v)
71 __releases(&q->requeue_lock)
72{
73 struct request_queue *q = m->private;
74
75 spin_unlock_irq(&q->requeue_lock);
76}
77
78static const struct seq_operations queue_requeue_list_seq_ops = {
79 .start = queue_requeue_list_start,
80 .next = queue_requeue_list_next,
81 .stop = queue_requeue_list_stop,
82 .show = blk_mq_debugfs_rq_show,
83};
84
27static int blk_flags_show(struct seq_file *m, const unsigned long flags, 85static int blk_flags_show(struct seq_file *m, const unsigned long flags,
28 const char *const *flag_name, int flag_name_count) 86 const char *const *flag_name, int flag_name_count)
29{ 87{
@@ -125,16 +183,6 @@ inval:
125 return count; 183 return count;
126} 184}
127 185
128static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
129{
130 if (stat->nr_samples) {
131 seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
132 stat->nr_samples, stat->mean, stat->min, stat->max);
133 } else {
134 seq_puts(m, "samples=0");
135 }
136}
137
138static int queue_write_hint_show(void *data, struct seq_file *m) 186static int queue_write_hint_show(void *data, struct seq_file *m)
139{ 187{
140 struct request_queue *q = data; 188 struct request_queue *q = data;
@@ -158,23 +206,30 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
158 return count; 206 return count;
159} 207}
160 208
161static int queue_poll_stat_show(void *data, struct seq_file *m) 209static int queue_zone_wlock_show(void *data, struct seq_file *m)
162{ 210{
163 struct request_queue *q = data; 211 struct request_queue *q = data;
164 int bucket; 212 unsigned int i;
165 213
166 for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) { 214 if (!q->seq_zones_wlock)
167 seq_printf(m, "read (%d Bytes): ", 1 << (9+bucket)); 215 return 0;
168 print_stat(m, &q->poll_stat[2*bucket]); 216
169 seq_puts(m, "\n"); 217 for (i = 0; i < blk_queue_nr_zones(q); i++)
218 if (test_bit(i, q->seq_zones_wlock))
219 seq_printf(m, "%u\n", i);
170 220
171 seq_printf(m, "write (%d Bytes): ", 1 << (9+bucket));
172 print_stat(m, &q->poll_stat[2*bucket+1]);
173 seq_puts(m, "\n");
174 }
175 return 0; 221 return 0;
176} 222}
177 223
224static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
225 { "poll_stat", 0400, queue_poll_stat_show },
226 { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
227 { "state", 0600, queue_state_show, queue_state_write },
228 { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
229 { "zone_wlock", 0400, queue_zone_wlock_show, NULL },
230 { },
231};
232
178#define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name 233#define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name
179static const char *const hctx_state_name[] = { 234static const char *const hctx_state_name[] = {
180 HCTX_STATE_NAME(STOPPED), 235 HCTX_STATE_NAME(STOPPED),
@@ -295,6 +350,20 @@ static const char *const rqf_name[] = {
295}; 350};
296#undef RQF_NAME 351#undef RQF_NAME
297 352
353static const char *const blk_mq_rq_state_name_array[] = {
354 [MQ_RQ_IDLE] = "idle",
355 [MQ_RQ_IN_FLIGHT] = "in_flight",
356 [MQ_RQ_COMPLETE] = "complete",
357};
358
359static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
360{
361 if (WARN_ON_ONCE((unsigned int)rq_state >
362 ARRAY_SIZE(blk_mq_rq_state_name_array)))
363 return "(?)";
364 return blk_mq_rq_state_name_array[rq_state];
365}
366
298int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) 367int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
299{ 368{
300 const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; 369 const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -311,7 +380,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
311 seq_puts(m, ", .rq_flags="); 380 seq_puts(m, ", .rq_flags=");
312 blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, 381 blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
313 ARRAY_SIZE(rqf_name)); 382 ARRAY_SIZE(rqf_name));
314 seq_printf(m, ", complete=%d", blk_rq_is_complete(rq)); 383 seq_printf(m, ", .state=%s", blk_mq_rq_state_name(blk_mq_rq_state(rq)));
315 seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag, 384 seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
316 rq->internal_tag); 385 rq->internal_tag);
317 if (mq_ops->show_rq) 386 if (mq_ops->show_rq)
@@ -327,37 +396,6 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
327} 396}
328EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); 397EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
329 398
330static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
331 __acquires(&q->requeue_lock)
332{
333 struct request_queue *q = m->private;
334
335 spin_lock_irq(&q->requeue_lock);
336 return seq_list_start(&q->requeue_list, *pos);
337}
338
339static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
340{
341 struct request_queue *q = m->private;
342
343 return seq_list_next(v, &q->requeue_list, pos);
344}
345
346static void queue_requeue_list_stop(struct seq_file *m, void *v)
347 __releases(&q->requeue_lock)
348{
349 struct request_queue *q = m->private;
350
351 spin_unlock_irq(&q->requeue_lock);
352}
353
354static const struct seq_operations queue_requeue_list_seq_ops = {
355 .start = queue_requeue_list_start,
356 .next = queue_requeue_list_next,
357 .stop = queue_requeue_list_stop,
358 .show = blk_mq_debugfs_rq_show,
359};
360
361static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) 399static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
362 __acquires(&hctx->lock) 400 __acquires(&hctx->lock)
363{ 401{
@@ -747,14 +785,6 @@ static const struct file_operations blk_mq_debugfs_fops = {
747 .release = blk_mq_debugfs_release, 785 .release = blk_mq_debugfs_release,
748}; 786};
749 787
750static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
751 {"poll_stat", 0400, queue_poll_stat_show},
752 {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
753 {"state", 0600, queue_state_show, queue_state_write},
754 {"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
755 {},
756};
757
758static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { 788static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
759 {"state", 0400, hctx_state_show}, 789 {"state", 0400, hctx_state_show},
760 {"flags", 0400, hctx_flags_show}, 790 {"flags", 0400, hctx_flags_show},
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 76944e3271bf..e233996bb76f 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -21,6 +21,7 @@
21 * blk_mq_pci_map_queues - provide a default queue mapping for PCI device 21 * blk_mq_pci_map_queues - provide a default queue mapping for PCI device
22 * @set: tagset to provide the mapping for 22 * @set: tagset to provide the mapping for
23 * @pdev: PCI device associated with @set. 23 * @pdev: PCI device associated with @set.
24 * @offset: Offset to use for the pci irq vector
24 * 25 *
25 * This function assumes the PCI device @pdev has at least as many available 26 * This function assumes the PCI device @pdev has at least as many available
26 * interrupt vectors as @set has queues. It will then query the vector 27 * interrupt vectors as @set has queues. It will then query the vector
@@ -28,13 +29,14 @@
28 * that maps a queue to the CPUs that have irq affinity for the corresponding 29 * that maps a queue to the CPUs that have irq affinity for the corresponding
29 * vector. 30 * vector.
30 */ 31 */
31int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev) 32int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
33 int offset)
32{ 34{
33 const struct cpumask *mask; 35 const struct cpumask *mask;
34 unsigned int queue, cpu; 36 unsigned int queue, cpu;
35 37
36 for (queue = 0; queue < set->nr_hw_queues; queue++) { 38 for (queue = 0; queue < set->nr_hw_queues; queue++) {
37 mask = pci_irq_get_affinity(pdev, queue); 39 mask = pci_irq_get_affinity(pdev, queue + offset);
38 if (!mask) 40 if (!mask)
39 goto fallback; 41 goto fallback;
40 42
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 16e83e6df404..f5c7dbcb954f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -194,11 +194,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
194 */ 194 */
195void blk_mq_quiesce_queue_nowait(struct request_queue *q) 195void blk_mq_quiesce_queue_nowait(struct request_queue *q)
196{ 196{
197 unsigned long flags; 197 blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
198
199 spin_lock_irqsave(q->queue_lock, flags);
200 queue_flag_set(QUEUE_FLAG_QUIESCED, q);
201 spin_unlock_irqrestore(q->queue_lock, flags);
202} 198}
203EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); 199EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
204 200
@@ -239,11 +235,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
239 */ 235 */
240void blk_mq_unquiesce_queue(struct request_queue *q) 236void blk_mq_unquiesce_queue(struct request_queue *q)
241{ 237{
242 unsigned long flags; 238 blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
243
244 spin_lock_irqsave(q->queue_lock, flags);
245 queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
246 spin_unlock_irqrestore(q->queue_lock, flags);
247 239
248 /* dispatch requests which are inserted during quiescing */ 240 /* dispatch requests which are inserted during quiescing */
249 blk_mq_run_hw_queues(q, true); 241 blk_mq_run_hw_queues(q, true);
@@ -986,9 +978,9 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
986 struct blk_mq_hw_ctx *hctx = flush_data->hctx; 978 struct blk_mq_hw_ctx *hctx = flush_data->hctx;
987 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; 979 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
988 980
989 sbitmap_clear_bit(sb, bitnr);
990 spin_lock(&ctx->lock); 981 spin_lock(&ctx->lock);
991 list_splice_tail_init(&ctx->rq_list, flush_data->list); 982 list_splice_tail_init(&ctx->rq_list, flush_data->list);
983 sbitmap_clear_bit(sb, bitnr);
992 spin_unlock(&ctx->lock); 984 spin_unlock(&ctx->lock);
993 return true; 985 return true;
994} 986}
@@ -2556,7 +2548,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
2556{ 2548{
2557 struct request_queue *uninit_q, *q; 2549 struct request_queue *uninit_q, *q;
2558 2550
2559 uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); 2551 uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
2560 if (!uninit_q) 2552 if (!uninit_q)
2561 return ERR_PTR(-ENOMEM); 2553 return ERR_PTR(-ENOMEM);
2562 2554
@@ -2678,7 +2670,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
2678 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 2670 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
2679 2671
2680 if (!(set->flags & BLK_MQ_F_SG_MERGE)) 2672 if (!(set->flags & BLK_MQ_F_SG_MERGE))
2681 q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE; 2673 queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
2682 2674
2683 q->sg_reserved_size = INT_MAX; 2675 q->sg_reserved_size = INT_MAX;
2684 2676
@@ -3005,7 +2997,7 @@ EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
3005static bool blk_poll_stats_enable(struct request_queue *q) 2997static bool blk_poll_stats_enable(struct request_queue *q)
3006{ 2998{
3007 if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) || 2999 if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
3008 test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags)) 3000 blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
3009 return true; 3001 return true;
3010 blk_stat_add_callback(q, q->poll_cb); 3002 blk_stat_add_callback(q, q->poll_cb);
3011 return false; 3003 return false;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 48ebe6be07b7..d1de71124656 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -859,12 +859,10 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment);
859 859
860void blk_queue_flush_queueable(struct request_queue *q, bool queueable) 860void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
861{ 861{
862 spin_lock_irq(q->queue_lock);
863 if (queueable) 862 if (queueable)
864 clear_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); 863 blk_queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
865 else 864 else
866 set_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); 865 blk_queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
867 spin_unlock_irq(q->queue_lock);
868} 866}
869EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); 867EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
870 868
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 28003bf9941c..bd365a95fcf8 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -152,7 +152,7 @@ void blk_stat_add_callback(struct request_queue *q,
152 152
153 spin_lock(&q->stats->lock); 153 spin_lock(&q->stats->lock);
154 list_add_tail_rcu(&cb->list, &q->stats->callbacks); 154 list_add_tail_rcu(&cb->list, &q->stats->callbacks);
155 set_bit(QUEUE_FLAG_STATS, &q->queue_flags); 155 blk_queue_flag_set(QUEUE_FLAG_STATS, q);
156 spin_unlock(&q->stats->lock); 156 spin_unlock(&q->stats->lock);
157} 157}
158EXPORT_SYMBOL_GPL(blk_stat_add_callback); 158EXPORT_SYMBOL_GPL(blk_stat_add_callback);
@@ -163,7 +163,7 @@ void blk_stat_remove_callback(struct request_queue *q,
163 spin_lock(&q->stats->lock); 163 spin_lock(&q->stats->lock);
164 list_del_rcu(&cb->list); 164 list_del_rcu(&cb->list);
165 if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting) 165 if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
166 clear_bit(QUEUE_FLAG_STATS, &q->queue_flags); 166 blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
167 spin_unlock(&q->stats->lock); 167 spin_unlock(&q->stats->lock);
168 168
169 del_timer_sync(&cb->timer); 169 del_timer_sync(&cb->timer);
@@ -191,7 +191,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
191{ 191{
192 spin_lock(&q->stats->lock); 192 spin_lock(&q->stats->lock);
193 q->stats->enable_accounting = true; 193 q->stats->enable_accounting = true;
194 set_bit(QUEUE_FLAG_STATS, &q->queue_flags); 194 blk_queue_flag_set(QUEUE_FLAG_STATS, q);
195 spin_unlock(&q->stats->lock); 195 spin_unlock(&q->stats->lock);
196} 196}
197 197
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cbea895a5547..d00d1b0ec109 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -276,12 +276,10 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \
276 if (neg) \ 276 if (neg) \
277 val = !val; \ 277 val = !val; \
278 \ 278 \
279 spin_lock_irq(q->queue_lock); \
280 if (val) \ 279 if (val) \
281 queue_flag_set(QUEUE_FLAG_##flag, q); \ 280 blk_queue_flag_set(QUEUE_FLAG_##flag, q); \
282 else \ 281 else \
283 queue_flag_clear(QUEUE_FLAG_##flag, q); \ 282 blk_queue_flag_clear(QUEUE_FLAG_##flag, q); \
284 spin_unlock_irq(q->queue_lock); \
285 return ret; \ 283 return ret; \
286} 284}
287 285
@@ -414,12 +412,10 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
414 if (ret < 0) 412 if (ret < 0)
415 return ret; 413 return ret;
416 414
417 spin_lock_irq(q->queue_lock);
418 if (poll_on) 415 if (poll_on)
419 queue_flag_set(QUEUE_FLAG_POLL, q); 416 blk_queue_flag_set(QUEUE_FLAG_POLL, q);
420 else 417 else
421 queue_flag_clear(QUEUE_FLAG_POLL, q); 418 blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
422 spin_unlock_irq(q->queue_lock);
423 419
424 return ret; 420 return ret;
425} 421}
@@ -487,12 +483,10 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
487 if (set == -1) 483 if (set == -1)
488 return -EINVAL; 484 return -EINVAL;
489 485
490 spin_lock_irq(q->queue_lock);
491 if (set) 486 if (set)
492 queue_flag_set(QUEUE_FLAG_WC, q); 487 blk_queue_flag_set(QUEUE_FLAG_WC, q);
493 else 488 else
494 queue_flag_clear(QUEUE_FLAG_WC, q); 489 blk_queue_flag_clear(QUEUE_FLAG_WC, q);
495 spin_unlock_irq(q->queue_lock);
496 490
497 return count; 491 return count;
498} 492}
@@ -798,13 +792,6 @@ static void __blk_release_queue(struct work_struct *work)
798 if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags)) 792 if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
799 blk_stat_remove_callback(q, q->poll_cb); 793 blk_stat_remove_callback(q, q->poll_cb);
800 blk_stat_free_callback(q->poll_cb); 794 blk_stat_free_callback(q->poll_cb);
801 bdi_put(q->backing_dev_info);
802 blkcg_exit_queue(q);
803
804 if (q->elevator) {
805 ioc_clear_queue(q);
806 elevator_exit(q, q->elevator);
807 }
808 795
809 blk_free_queue_stats(q->stats); 796 blk_free_queue_stats(q->stats);
810 797
@@ -953,9 +940,7 @@ void blk_unregister_queue(struct gendisk *disk)
953 */ 940 */
954 mutex_lock(&q->sysfs_lock); 941 mutex_lock(&q->sysfs_lock);
955 942
956 spin_lock_irq(q->queue_lock); 943 blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
957 queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
958 spin_unlock_irq(q->queue_lock);
959 944
960 /* 945 /*
961 * Remove the sysfs attributes before unregistering the queue data 946 * Remove the sysfs attributes before unregistering the queue data
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index a05e3676d24a..652d4d4d3e97 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -57,12 +57,10 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
57 char *p = (char *) buf; 57 char *p = (char *) buf;
58 58
59 val = simple_strtoul(p, &p, 10); 59 val = simple_strtoul(p, &p, 10);
60 spin_lock_irq(q->queue_lock);
61 if (val) 60 if (val)
62 queue_flag_set(QUEUE_FLAG_FAIL_IO, q); 61 blk_queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
63 else 62 else
64 queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); 63 blk_queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
65 spin_unlock_irq(q->queue_lock);
66 } 64 }
67 65
68 return count; 66 return count;
@@ -165,7 +163,7 @@ void blk_abort_request(struct request *req)
165 * No need for fancy synchronizations. 163 * No need for fancy synchronizations.
166 */ 164 */
167 blk_rq_set_deadline(req, jiffies); 165 blk_rq_set_deadline(req, jiffies);
168 mod_timer(&req->q->timeout, 0); 166 kblockd_schedule_work(&req->q->timeout_work);
169 } else { 167 } else {
170 if (blk_mark_rq_complete(req)) 168 if (blk_mark_rq_complete(req))
171 return; 169 return;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index acb7252c7e81..08e84ef2bc05 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -296,7 +296,7 @@ int blkdev_reset_zones(struct block_device *bdev,
296} 296}
297EXPORT_SYMBOL_GPL(blkdev_reset_zones); 297EXPORT_SYMBOL_GPL(blkdev_reset_zones);
298 298
299/** 299/*
300 * BLKREPORTZONE ioctl processing. 300 * BLKREPORTZONE ioctl processing.
301 * Called from blkdev_ioctl. 301 * Called from blkdev_ioctl.
302 */ 302 */
@@ -355,7 +355,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
355 return ret; 355 return ret;
356} 356}
357 357
358/** 358/*
359 * BLKRESETZONE ioctl processing. 359 * BLKRESETZONE ioctl processing.
360 * Called from blkdev_ioctl. 360 * Called from blkdev_ioctl.
361 */ 361 */
diff --git a/block/blk.h b/block/blk.h
index 46db5dc83dcb..b034fd2460c4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -41,6 +41,75 @@ extern struct kmem_cache *request_cachep;
41extern struct kobj_type blk_queue_ktype; 41extern struct kobj_type blk_queue_ktype;
42extern struct ida blk_queue_ida; 42extern struct ida blk_queue_ida;
43 43
44/*
45 * @q->queue_lock is set while a queue is being initialized. Since we know
46 * that no other threads access the queue object before @q->queue_lock has
47 * been set, it is safe to manipulate queue flags without holding the
48 * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
49 * blk_init_allocated_queue().
50 */
51static inline void queue_lockdep_assert_held(struct request_queue *q)
52{
53 if (q->queue_lock)
54 lockdep_assert_held(q->queue_lock);
55}
56
57static inline void queue_flag_set_unlocked(unsigned int flag,
58 struct request_queue *q)
59{
60 if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
61 kref_read(&q->kobj.kref))
62 lockdep_assert_held(q->queue_lock);
63 __set_bit(flag, &q->queue_flags);
64}
65
66static inline void queue_flag_clear_unlocked(unsigned int flag,
67 struct request_queue *q)
68{
69 if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
70 kref_read(&q->kobj.kref))
71 lockdep_assert_held(q->queue_lock);
72 __clear_bit(flag, &q->queue_flags);
73}
74
75static inline int queue_flag_test_and_clear(unsigned int flag,
76 struct request_queue *q)
77{
78 queue_lockdep_assert_held(q);
79
80 if (test_bit(flag, &q->queue_flags)) {
81 __clear_bit(flag, &q->queue_flags);
82 return 1;
83 }
84
85 return 0;
86}
87
88static inline int queue_flag_test_and_set(unsigned int flag,
89 struct request_queue *q)
90{
91 queue_lockdep_assert_held(q);
92
93 if (!test_bit(flag, &q->queue_flags)) {
94 __set_bit(flag, &q->queue_flags);
95 return 0;
96 }
97
98 return 1;
99}
100
101static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
102{
103 queue_lockdep_assert_held(q);
104 __set_bit(flag, &q->queue_flags);
105}
106
107static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
108{
109 queue_lockdep_assert_held(q);
110 __clear_bit(flag, &q->queue_flags);
111}
112
44static inline struct blk_flush_queue *blk_get_flush_queue( 113static inline struct blk_flush_queue *blk_get_flush_queue(
45 struct request_queue *q, struct blk_mq_ctx *ctx) 114 struct request_queue *q, struct blk_mq_ctx *ctx)
46{ 115{
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 1474153f73e3..fc2e5ff2c4b9 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -27,6 +27,94 @@
27#include <linux/bsg-lib.h> 27#include <linux/bsg-lib.h>
28#include <linux/export.h> 28#include <linux/export.h>
29#include <scsi/scsi_cmnd.h> 29#include <scsi/scsi_cmnd.h>
30#include <scsi/sg.h>
31
32#define uptr64(val) ((void __user *)(uintptr_t)(val))
33
34static int bsg_transport_check_proto(struct sg_io_v4 *hdr)
35{
36 if (hdr->protocol != BSG_PROTOCOL_SCSI ||
37 hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_TRANSPORT)
38 return -EINVAL;
39 if (!capable(CAP_SYS_RAWIO))
40 return -EPERM;
41 return 0;
42}
43
44static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
45 fmode_t mode)
46{
47 struct bsg_job *job = blk_mq_rq_to_pdu(rq);
48
49 job->request_len = hdr->request_len;
50 job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
51 if (IS_ERR(job->request))
52 return PTR_ERR(job->request);
53 return 0;
54}
55
56static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
57{
58 struct bsg_job *job = blk_mq_rq_to_pdu(rq);
59 int ret = 0;
60
61 /*
62 * The assignments below don't make much sense, but are kept for
63 * bug by bug backwards compatibility:
64 */
65 hdr->device_status = job->result & 0xff;
66 hdr->transport_status = host_byte(job->result);
67 hdr->driver_status = driver_byte(job->result);
68 hdr->info = 0;
69 if (hdr->device_status || hdr->transport_status || hdr->driver_status)
70 hdr->info |= SG_INFO_CHECK;
71 hdr->response_len = 0;
72
73 if (job->result < 0) {
74 /* we're only returning the result field in the reply */
75 job->reply_len = sizeof(u32);
76 ret = job->result;
77 }
78
79 if (job->reply_len && hdr->response) {
80 int len = min(hdr->max_response_len, job->reply_len);
81
82 if (copy_to_user(uptr64(hdr->response), job->reply, len))
83 ret = -EFAULT;
84 else
85 hdr->response_len = len;
86 }
87
88 /* we assume all request payload was transferred, residual == 0 */
89 hdr->dout_resid = 0;
90
91 if (rq->next_rq) {
92 unsigned int rsp_len = job->reply_payload.payload_len;
93
94 if (WARN_ON(job->reply_payload_rcv_len > rsp_len))
95 hdr->din_resid = 0;
96 else
97 hdr->din_resid = rsp_len - job->reply_payload_rcv_len;
98 } else {
99 hdr->din_resid = 0;
100 }
101
102 return ret;
103}
104
105static void bsg_transport_free_rq(struct request *rq)
106{
107 struct bsg_job *job = blk_mq_rq_to_pdu(rq);
108
109 kfree(job->request);
110}
111
112static const struct bsg_ops bsg_transport_ops = {
113 .check_proto = bsg_transport_check_proto,
114 .fill_hdr = bsg_transport_fill_hdr,
115 .complete_rq = bsg_transport_complete_rq,
116 .free_rq = bsg_transport_free_rq,
117};
30 118
31/** 119/**
32 * bsg_teardown_job - routine to teardown a bsg job 120 * bsg_teardown_job - routine to teardown a bsg job
@@ -35,7 +123,7 @@
35static void bsg_teardown_job(struct kref *kref) 123static void bsg_teardown_job(struct kref *kref)
36{ 124{
37 struct bsg_job *job = container_of(kref, struct bsg_job, kref); 125 struct bsg_job *job = container_of(kref, struct bsg_job, kref);
38 struct request *rq = job->req; 126 struct request *rq = blk_mq_rq_from_pdu(job);
39 127
40 put_device(job->dev); /* release reference for the request */ 128 put_device(job->dev); /* release reference for the request */
41 129
@@ -68,28 +156,9 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
68void bsg_job_done(struct bsg_job *job, int result, 156void bsg_job_done(struct bsg_job *job, int result,
69 unsigned int reply_payload_rcv_len) 157 unsigned int reply_payload_rcv_len)
70{ 158{
71 struct request *req = job->req; 159 job->result = result;
72 struct request *rsp = req->next_rq; 160 job->reply_payload_rcv_len = reply_payload_rcv_len;
73 struct scsi_request *rq = scsi_req(req); 161 blk_complete_request(blk_mq_rq_from_pdu(job));
74 int err;
75
76 err = scsi_req(job->req)->result = result;
77 if (err < 0)
78 /* we're only returning the result field in the reply */
79 rq->sense_len = sizeof(u32);
80 else
81 rq->sense_len = job->reply_len;
82 /* we assume all request payload was transferred, residual == 0 */
83 rq->resid_len = 0;
84
85 if (rsp) {
86 WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len);
87
88 /* set reply (bidi) residual */
89 scsi_req(rsp)->resid_len -=
90 min(reply_payload_rcv_len, scsi_req(rsp)->resid_len);
91 }
92 blk_complete_request(req);
93} 162}
94EXPORT_SYMBOL_GPL(bsg_job_done); 163EXPORT_SYMBOL_GPL(bsg_job_done);
95 164
@@ -114,7 +183,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
114 if (!buf->sg_list) 183 if (!buf->sg_list)
115 return -ENOMEM; 184 return -ENOMEM;
116 sg_init_table(buf->sg_list, req->nr_phys_segments); 185 sg_init_table(buf->sg_list, req->nr_phys_segments);
117 scsi_req(req)->resid_len = blk_rq_bytes(req);
118 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); 186 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
119 buf->payload_len = blk_rq_bytes(req); 187 buf->payload_len = blk_rq_bytes(req);
120 return 0; 188 return 0;
@@ -125,15 +193,13 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
125 * @dev: device that is being sent the bsg request 193 * @dev: device that is being sent the bsg request
126 * @req: BSG request that needs a job structure 194 * @req: BSG request that needs a job structure
127 */ 195 */
128static int bsg_prepare_job(struct device *dev, struct request *req) 196static bool bsg_prepare_job(struct device *dev, struct request *req)
129{ 197{
130 struct request *rsp = req->next_rq; 198 struct request *rsp = req->next_rq;
131 struct scsi_request *rq = scsi_req(req);
132 struct bsg_job *job = blk_mq_rq_to_pdu(req); 199 struct bsg_job *job = blk_mq_rq_to_pdu(req);
133 int ret; 200 int ret;
134 201
135 job->request = rq->cmd; 202 job->timeout = req->timeout;
136 job->request_len = rq->cmd_len;
137 203
138 if (req->bio) { 204 if (req->bio) {
139 ret = bsg_map_buffer(&job->request_payload, req); 205 ret = bsg_map_buffer(&job->request_payload, req);
@@ -149,12 +215,13 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
149 /* take a reference for the request */ 215 /* take a reference for the request */
150 get_device(job->dev); 216 get_device(job->dev);
151 kref_init(&job->kref); 217 kref_init(&job->kref);
152 return 0; 218 return true;
153 219
154failjob_rls_rqst_payload: 220failjob_rls_rqst_payload:
155 kfree(job->request_payload.sg_list); 221 kfree(job->request_payload.sg_list);
156failjob_rls_job: 222failjob_rls_job:
157 return -ENOMEM; 223 job->result = -ENOMEM;
224 return false;
158} 225}
159 226
160/** 227/**
@@ -183,9 +250,7 @@ static void bsg_request_fn(struct request_queue *q)
183 break; 250 break;
184 spin_unlock_irq(q->queue_lock); 251 spin_unlock_irq(q->queue_lock);
185 252
186 ret = bsg_prepare_job(dev, req); 253 if (!bsg_prepare_job(dev, req)) {
187 if (ret) {
188 scsi_req(req)->result = ret;
189 blk_end_request_all(req, BLK_STS_OK); 254 blk_end_request_all(req, BLK_STS_OK);
190 spin_lock_irq(q->queue_lock); 255 spin_lock_irq(q->queue_lock);
191 continue; 256 continue;
@@ -202,47 +267,34 @@ static void bsg_request_fn(struct request_queue *q)
202 spin_lock_irq(q->queue_lock); 267 spin_lock_irq(q->queue_lock);
203} 268}
204 269
270/* called right after the request is allocated for the request_queue */
205static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) 271static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
206{ 272{
207 struct bsg_job *job = blk_mq_rq_to_pdu(req); 273 struct bsg_job *job = blk_mq_rq_to_pdu(req);
208 struct scsi_request *sreq = &job->sreq;
209
210 /* called right after the request is allocated for the request_queue */
211 274
212 sreq->sense = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp); 275 job->reply = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
213 if (!sreq->sense) 276 if (!job->reply)
214 return -ENOMEM; 277 return -ENOMEM;
215
216 return 0; 278 return 0;
217} 279}
218 280
281/* called right before the request is given to the request_queue user */
219static void bsg_initialize_rq(struct request *req) 282static void bsg_initialize_rq(struct request *req)
220{ 283{
221 struct bsg_job *job = blk_mq_rq_to_pdu(req); 284 struct bsg_job *job = blk_mq_rq_to_pdu(req);
222 struct scsi_request *sreq = &job->sreq; 285 void *reply = job->reply;
223 void *sense = sreq->sense;
224
225 /* called right before the request is given to the request_queue user */
226 286
227 memset(job, 0, sizeof(*job)); 287 memset(job, 0, sizeof(*job));
228 288 job->reply = reply;
229 scsi_req_init(sreq); 289 job->reply_len = SCSI_SENSE_BUFFERSIZE;
230
231 sreq->sense = sense;
232 sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
233
234 job->req = req;
235 job->reply = sense;
236 job->reply_len = sreq->sense_len;
237 job->dd_data = job + 1; 290 job->dd_data = job + 1;
238} 291}
239 292
240static void bsg_exit_rq(struct request_queue *q, struct request *req) 293static void bsg_exit_rq(struct request_queue *q, struct request *req)
241{ 294{
242 struct bsg_job *job = blk_mq_rq_to_pdu(req); 295 struct bsg_job *job = blk_mq_rq_to_pdu(req);
243 struct scsi_request *sreq = &job->sreq;
244 296
245 kfree(sreq->sense); 297 kfree(job->reply);
246} 298}
247 299
248/** 300/**
@@ -275,12 +327,11 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
275 327
276 q->queuedata = dev; 328 q->queuedata = dev;
277 q->bsg_job_fn = job_fn; 329 q->bsg_job_fn = job_fn;
278 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); 330 blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
279 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
280 blk_queue_softirq_done(q, bsg_softirq_done); 331 blk_queue_softirq_done(q, bsg_softirq_done);
281 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); 332 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
282 333
283 ret = bsg_register_queue(q, dev, name, release); 334 ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
284 if (ret) { 335 if (ret) {
285 printk(KERN_ERR "%s: bsg interface failed to " 336 printk(KERN_ERR "%s: bsg interface failed to "
286 "initialize - register queue\n", dev->kobj.name); 337 "initialize - register queue\n", dev->kobj.name);
diff --git a/block/bsg.c b/block/bsg.c
index 06dc96e1f670..defa06c11858 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -130,114 +130,120 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
130 return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)]; 130 return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
131} 131}
132 132
133static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, 133#define uptr64(val) ((void __user *)(uintptr_t)(val))
134 struct sg_io_v4 *hdr, struct bsg_device *bd, 134
135 fmode_t mode) 135static int bsg_scsi_check_proto(struct sg_io_v4 *hdr)
136{
137 if (hdr->protocol != BSG_PROTOCOL_SCSI ||
138 hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
139 return -EINVAL;
140 return 0;
141}
142
143static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
144 fmode_t mode)
136{ 145{
137 struct scsi_request *req = scsi_req(rq); 146 struct scsi_request *sreq = scsi_req(rq);
138 147
139 if (hdr->request_len > BLK_MAX_CDB) { 148 sreq->cmd_len = hdr->request_len;
140 req->cmd = kzalloc(hdr->request_len, GFP_KERNEL); 149 if (sreq->cmd_len > BLK_MAX_CDB) {
141 if (!req->cmd) 150 sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
151 if (!sreq->cmd)
142 return -ENOMEM; 152 return -ENOMEM;
143 } 153 }
144 154
145 if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request, 155 if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
146 hdr->request_len))
147 return -EFAULT; 156 return -EFAULT;
148 157 if (blk_verify_command(sreq->cmd, mode))
149 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
150 if (blk_verify_command(req->cmd, mode))
151 return -EPERM;
152 } else if (!capable(CAP_SYS_RAWIO))
153 return -EPERM; 158 return -EPERM;
154
155 /*
156 * fill in request structure
157 */
158 req->cmd_len = hdr->request_len;
159
160 rq->timeout = msecs_to_jiffies(hdr->timeout);
161 if (!rq->timeout)
162 rq->timeout = q->sg_timeout;
163 if (!rq->timeout)
164 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
165 if (rq->timeout < BLK_MIN_SG_TIMEOUT)
166 rq->timeout = BLK_MIN_SG_TIMEOUT;
167
168 return 0; 159 return 0;
169} 160}
170 161
171/* 162static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
172 * Check if sg_io_v4 from user is allowed and valid
173 */
174static int
175bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op)
176{ 163{
164 struct scsi_request *sreq = scsi_req(rq);
177 int ret = 0; 165 int ret = 0;
178 166
179 if (hdr->guard != 'Q') 167 /*
180 return -EINVAL; 168 * fill in all the output members
169 */
170 hdr->device_status = sreq->result & 0xff;
171 hdr->transport_status = host_byte(sreq->result);
172 hdr->driver_status = driver_byte(sreq->result);
173 hdr->info = 0;
174 if (hdr->device_status || hdr->transport_status || hdr->driver_status)
175 hdr->info |= SG_INFO_CHECK;
176 hdr->response_len = 0;
181 177
182 switch (hdr->protocol) { 178 if (sreq->sense_len && hdr->response) {
183 case BSG_PROTOCOL_SCSI: 179 int len = min_t(unsigned int, hdr->max_response_len,
184 switch (hdr->subprotocol) { 180 sreq->sense_len);
185 case BSG_SUB_PROTOCOL_SCSI_CMD: 181
186 case BSG_SUB_PROTOCOL_SCSI_TRANSPORT: 182 if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
187 break; 183 ret = -EFAULT;
188 default: 184 else
189 ret = -EINVAL; 185 hdr->response_len = len;
190 } 186 }
191 break; 187
192 default: 188 if (rq->next_rq) {
193 ret = -EINVAL; 189 hdr->dout_resid = sreq->resid_len;
190 hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
191 } else if (rq_data_dir(rq) == READ) {
192 hdr->din_resid = sreq->resid_len;
193 } else {
194 hdr->dout_resid = sreq->resid_len;
194 } 195 }
195 196
196 *op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN;
197 return ret; 197 return ret;
198} 198}
199 199
200/* 200static void bsg_scsi_free_rq(struct request *rq)
201 * map sg_io_v4 to a request. 201{
202 */ 202 scsi_req_free_cmd(scsi_req(rq));
203}
204
205static const struct bsg_ops bsg_scsi_ops = {
206 .check_proto = bsg_scsi_check_proto,
207 .fill_hdr = bsg_scsi_fill_hdr,
208 .complete_rq = bsg_scsi_complete_rq,
209 .free_rq = bsg_scsi_free_rq,
210};
211
203static struct request * 212static struct request *
204bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode) 213bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
205{ 214{
206 struct request_queue *q = bd->queue;
207 struct request *rq, *next_rq = NULL; 215 struct request *rq, *next_rq = NULL;
208 int ret; 216 int ret;
209 unsigned int op, dxfer_len;
210 void __user *dxferp = NULL;
211 struct bsg_class_device *bcd = &q->bsg_dev;
212 217
213 /* if the LLD has been removed then the bsg_unregister_queue will 218 if (!q->bsg_dev.class_dev)
214 * eventually be called and the class_dev was freed, so we can no
215 * longer use this request_queue. Return no such address.
216 */
217 if (!bcd->class_dev)
218 return ERR_PTR(-ENXIO); 219 return ERR_PTR(-ENXIO);
219 220
220 bsg_dbg(bd, "map hdr %llx/%u %llx/%u\n", 221 if (hdr->guard != 'Q')
221 (unsigned long long) hdr->dout_xferp, 222 return ERR_PTR(-EINVAL);
222 hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
223 hdr->din_xfer_len);
224 223
225 ret = bsg_validate_sgv4_hdr(hdr, &op); 224 ret = q->bsg_dev.ops->check_proto(hdr);
226 if (ret) 225 if (ret)
227 return ERR_PTR(ret); 226 return ERR_PTR(ret);
228 227
229 /* 228 rq = blk_get_request(q, hdr->dout_xfer_len ?
230 * map scatter-gather elements separately and string them to request 229 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
231 */ 230 GFP_KERNEL);
232 rq = blk_get_request(q, op, GFP_KERNEL);
233 if (IS_ERR(rq)) 231 if (IS_ERR(rq))
234 return rq; 232 return rq;
235 233
236 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, mode); 234 ret = q->bsg_dev.ops->fill_hdr(rq, hdr, mode);
237 if (ret) 235 if (ret)
238 goto out; 236 goto out;
239 237
240 if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) { 238 rq->timeout = msecs_to_jiffies(hdr->timeout);
239 if (!rq->timeout)
240 rq->timeout = q->sg_timeout;
241 if (!rq->timeout)
242 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
243 if (rq->timeout < BLK_MIN_SG_TIMEOUT)
244 rq->timeout = BLK_MIN_SG_TIMEOUT;
245
246 if (hdr->dout_xfer_len && hdr->din_xfer_len) {
241 if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) { 247 if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
242 ret = -EOPNOTSUPP; 248 ret = -EOPNOTSUPP;
243 goto out; 249 goto out;
@@ -246,42 +252,39 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
246 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); 252 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
247 if (IS_ERR(next_rq)) { 253 if (IS_ERR(next_rq)) {
248 ret = PTR_ERR(next_rq); 254 ret = PTR_ERR(next_rq);
249 next_rq = NULL;
250 goto out; 255 goto out;
251 } 256 }
252 rq->next_rq = next_rq;
253 257
254 dxferp = (void __user *)(unsigned long)hdr->din_xferp; 258 rq->next_rq = next_rq;
255 ret = blk_rq_map_user(q, next_rq, NULL, dxferp, 259 ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr->din_xferp),
256 hdr->din_xfer_len, GFP_KERNEL); 260 hdr->din_xfer_len, GFP_KERNEL);
257 if (ret) 261 if (ret)
258 goto out; 262 goto out_free_nextrq;
259 } 263 }
260 264
261 if (hdr->dout_xfer_len) { 265 if (hdr->dout_xfer_len) {
262 dxfer_len = hdr->dout_xfer_len; 266 ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->dout_xferp),
263 dxferp = (void __user *)(unsigned long)hdr->dout_xferp; 267 hdr->dout_xfer_len, GFP_KERNEL);
264 } else if (hdr->din_xfer_len) { 268 } else if (hdr->din_xfer_len) {
265 dxfer_len = hdr->din_xfer_len; 269 ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
266 dxferp = (void __user *)(unsigned long)hdr->din_xferp; 270 hdr->din_xfer_len, GFP_KERNEL);
267 } else 271 } else {
268 dxfer_len = 0; 272 ret = blk_rq_map_user(q, rq, NULL, NULL, 0, GFP_KERNEL);
269
270 if (dxfer_len) {
271 ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
272 GFP_KERNEL);
273 if (ret)
274 goto out;
275 } 273 }
276 274
275 if (ret)
276 goto out_unmap_nextrq;
277 return rq; 277 return rq;
278
279out_unmap_nextrq:
280 if (rq->next_rq)
281 blk_rq_unmap_user(rq->next_rq->bio);
282out_free_nextrq:
283 if (rq->next_rq)
284 blk_put_request(rq->next_rq);
278out: 285out:
279 scsi_req_free_cmd(scsi_req(rq)); 286 q->bsg_dev.ops->free_rq(rq);
280 blk_put_request(rq); 287 blk_put_request(rq);
281 if (next_rq) {
282 blk_rq_unmap_user(next_rq->bio);
283 blk_put_request(next_rq);
284 }
285 return ERR_PTR(ret); 288 return ERR_PTR(ret);
286} 289}
287 290
@@ -383,56 +386,18 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
383static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, 386static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
384 struct bio *bio, struct bio *bidi_bio) 387 struct bio *bio, struct bio *bidi_bio)
385{ 388{
386 struct scsi_request *req = scsi_req(rq); 389 int ret;
387 int ret = 0;
388
389 pr_debug("rq %p bio %p 0x%x\n", rq, bio, req->result);
390 /*
391 * fill in all the output members
392 */
393 hdr->device_status = req->result & 0xff;
394 hdr->transport_status = host_byte(req->result);
395 hdr->driver_status = driver_byte(req->result);
396 hdr->info = 0;
397 if (hdr->device_status || hdr->transport_status || hdr->driver_status)
398 hdr->info |= SG_INFO_CHECK;
399 hdr->response_len = 0;
400
401 if (req->sense_len && hdr->response) {
402 int len = min_t(unsigned int, hdr->max_response_len,
403 req->sense_len);
404 390
405 ret = copy_to_user((void __user *)(unsigned long)hdr->response, 391 ret = rq->q->bsg_dev.ops->complete_rq(rq, hdr);
406 req->sense, len);
407 if (!ret)
408 hdr->response_len = len;
409 else
410 ret = -EFAULT;
411 }
412 392
413 if (rq->next_rq) { 393 if (rq->next_rq) {
414 hdr->dout_resid = req->resid_len;
415 hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
416 blk_rq_unmap_user(bidi_bio); 394 blk_rq_unmap_user(bidi_bio);
417 blk_put_request(rq->next_rq); 395 blk_put_request(rq->next_rq);
418 } else if (rq_data_dir(rq) == READ) 396 }
419 hdr->din_resid = req->resid_len;
420 else
421 hdr->dout_resid = req->resid_len;
422
423 /*
424 * If the request generated a negative error number, return it
425 * (providing we aren't already returning an error); if it's
426 * just a protocol response (i.e. non negative), that gets
427 * processed above.
428 */
429 if (!ret && req->result < 0)
430 ret = req->result;
431 397
432 blk_rq_unmap_user(bio); 398 blk_rq_unmap_user(bio);
433 scsi_req_free_cmd(req); 399 rq->q->bsg_dev.ops->free_rq(rq);
434 blk_put_request(rq); 400 blk_put_request(rq);
435
436 return ret; 401 return ret;
437} 402}
438 403
@@ -614,7 +579,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
614 /* 579 /*
615 * get a request, fill in the blanks, and add to request queue 580 * get a request, fill in the blanks, and add to request queue
616 */ 581 */
617 rq = bsg_map_hdr(bd, &bc->hdr, mode); 582 rq = bsg_map_hdr(bd->queue, &bc->hdr, mode);
618 if (IS_ERR(rq)) { 583 if (IS_ERR(rq)) {
619 ret = PTR_ERR(rq); 584 ret = PTR_ERR(rq);
620 rq = NULL; 585 rq = NULL;
@@ -742,11 +707,6 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
742 struct bsg_device *bd; 707 struct bsg_device *bd;
743 unsigned char buf[32]; 708 unsigned char buf[32];
744 709
745 if (!blk_queue_scsi_passthrough(rq)) {
746 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
747 return ERR_PTR(-EINVAL);
748 }
749
750 if (!blk_get_queue(rq)) 710 if (!blk_get_queue(rq))
751 return ERR_PTR(-ENXIO); 711 return ERR_PTR(-ENXIO);
752 712
@@ -907,7 +867,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
907 if (copy_from_user(&hdr, uarg, sizeof(hdr))) 867 if (copy_from_user(&hdr, uarg, sizeof(hdr)))
908 return -EFAULT; 868 return -EFAULT;
909 869
910 rq = bsg_map_hdr(bd, &hdr, file->f_mode); 870 rq = bsg_map_hdr(bd->queue, &hdr, file->f_mode);
911 if (IS_ERR(rq)) 871 if (IS_ERR(rq))
912 return PTR_ERR(rq); 872 return PTR_ERR(rq);
913 873
@@ -959,7 +919,8 @@ void bsg_unregister_queue(struct request_queue *q)
959EXPORT_SYMBOL_GPL(bsg_unregister_queue); 919EXPORT_SYMBOL_GPL(bsg_unregister_queue);
960 920
961int bsg_register_queue(struct request_queue *q, struct device *parent, 921int bsg_register_queue(struct request_queue *q, struct device *parent,
962 const char *name, void (*release)(struct device *)) 922 const char *name, const struct bsg_ops *ops,
923 void (*release)(struct device *))
963{ 924{
964 struct bsg_class_device *bcd; 925 struct bsg_class_device *bcd;
965 dev_t dev; 926 dev_t dev;
@@ -996,6 +957,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
996 bcd->queue = q; 957 bcd->queue = q;
997 bcd->parent = get_device(parent); 958 bcd->parent = get_device(parent);
998 bcd->release = release; 959 bcd->release = release;
960 bcd->ops = ops;
999 kref_init(&bcd->ref); 961 kref_init(&bcd->ref);
1000 dev = MKDEV(bsg_major, bcd->minor); 962 dev = MKDEV(bsg_major, bcd->minor);
1001 class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname); 963 class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
@@ -1023,7 +985,17 @@ unlock:
1023 mutex_unlock(&bsg_mutex); 985 mutex_unlock(&bsg_mutex);
1024 return ret; 986 return ret;
1025} 987}
1026EXPORT_SYMBOL_GPL(bsg_register_queue); 988
989int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
990{
991 if (!blk_queue_scsi_passthrough(q)) {
992 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
993 return -EINVAL;
994 }
995
996 return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
997}
998EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
1027 999
1028static struct cdev bsg_cdev; 1000static struct cdev bsg_cdev;
1029 1001
diff --git a/block/sed-opal.c b/block/sed-opal.c
index e4929eec547f..945f4b8610e0 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -554,15 +554,14 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
554 554
555 size_t len; 555 size_t len;
556 int msb; 556 int msb;
557 u8 n;
558 557
559 if (!(number & ~TINY_ATOM_DATA_MASK)) { 558 if (!(number & ~TINY_ATOM_DATA_MASK)) {
560 add_token_u8(err, cmd, number); 559 add_token_u8(err, cmd, number);
561 return; 560 return;
562 } 561 }
563 562
564 msb = fls(number); 563 msb = fls64(number);
565 len = DIV_ROUND_UP(msb, 4); 564 len = DIV_ROUND_UP(msb, 8);
566 565
567 if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) { 566 if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
568 pr_debug("Error adding u64: end of buffer.\n"); 567 pr_debug("Error adding u64: end of buffer.\n");
@@ -570,10 +569,8 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
570 return; 569 return;
571 } 570 }
572 add_short_atom_header(cmd, false, false, len); 571 add_short_atom_header(cmd, false, false, len);
573 while (len--) { 572 while (len--)
574 n = number >> (len * 8); 573 add_token_u8(err, cmd, number >> (len * 8));
575 add_token_u8(err, cmd, n);
576 }
577} 574}
578 575
579static void add_token_bytestring(int *err, struct opal_dev *cmd, 576static void add_token_bytestring(int *err, struct opal_dev *cmd,
@@ -871,6 +868,9 @@ static int response_parse(const u8 *buf, size_t length,
871static size_t response_get_string(const struct parsed_resp *resp, int n, 868static size_t response_get_string(const struct parsed_resp *resp, int n,
872 const char **store) 869 const char **store)
873{ 870{
871 u8 skip;
872 const struct opal_resp_tok *token;
873
874 *store = NULL; 874 *store = NULL;
875 if (!resp) { 875 if (!resp) {
876 pr_debug("Response is NULL\n"); 876 pr_debug("Response is NULL\n");
@@ -883,13 +883,30 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
883 return 0; 883 return 0;
884 } 884 }
885 885
886 if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) { 886 token = &resp->toks[n];
887 if (token->type != OPAL_DTA_TOKENID_BYTESTRING) {
887 pr_debug("Token is not a byte string!\n"); 888 pr_debug("Token is not a byte string!\n");
888 return 0; 889 return 0;
889 } 890 }
890 891
891 *store = resp->toks[n].pos + 1; 892 switch (token->width) {
892 return resp->toks[n].len - 1; 893 case OPAL_WIDTH_TINY:
894 case OPAL_WIDTH_SHORT:
895 skip = 1;
896 break;
897 case OPAL_WIDTH_MEDIUM:
898 skip = 2;
899 break;
900 case OPAL_WIDTH_LONG:
901 skip = 4;
902 break;
903 default:
904 pr_debug("Token has invalid width!\n");
905 return 0;
906 }
907
908 *store = token->pos + skip;
909 return token->len - skip;
893} 910}
894 911
895static u64 response_get_u64(const struct parsed_resp *resp, int n) 912static u64 response_get_u64(const struct parsed_resp *resp, int n)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index deea78e485da..66cb0f857f64 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -24,7 +24,6 @@
24 24
25#include <linux/uaccess.h> 25#include <linux/uaccess.h>
26 26
27#define SECTOR_SHIFT 9
28#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) 27#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
29#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) 28#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
30 29
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0a0394aa1b9c..185f1ef00a7c 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2816 2816
2817 drbd_init_set_defaults(device); 2817 drbd_init_set_defaults(device);
2818 2818
2819 q = blk_alloc_queue(GFP_KERNEL); 2819 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
2820 if (!q) 2820 if (!q)
2821 goto out_no_q; 2821 goto out_no_q;
2822 device->rq_queue = q; 2822 device->rq_queue = q;
@@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2848 /* Setting the max_hw_sectors to an odd value of 8kibyte here 2848 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2849 This triggers a max_bio_size message upon first attach or connect */ 2849 This triggers a max_bio_size message upon first attach or connect */
2850 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); 2850 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
2851 q->queue_lock = &resource->req_lock;
2852 2851
2853 device->md_io.page = alloc_page(GFP_KERNEL); 2852 device->md_io.page = alloc_page(GFP_KERNEL);
2854 if (!device->md_io.page) 2853 if (!device->md_io.page)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index a12f77e6891e..b4f02768ba47 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1212,10 +1212,10 @@ static void decide_on_discard_support(struct drbd_device *device,
1212 * topology on all peers. */ 1212 * topology on all peers. */
1213 blk_queue_discard_granularity(q, 512); 1213 blk_queue_discard_granularity(q, 512);
1214 q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); 1214 q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
1215 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 1215 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
1216 q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); 1216 q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
1217 } else { 1217 } else {
1218 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 1218 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
1219 blk_queue_discard_granularity(q, 0); 1219 blk_queue_discard_granularity(q, 0);
1220 q->limits.max_discard_sectors = 0; 1220 q->limits.max_discard_sectors = 0;
1221 q->limits.max_write_zeroes_sectors = 0; 1221 q->limits.max_write_zeroes_sectors = 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ee62d2d517bf..264abaaff662 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -214,10 +214,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
214 blk_mq_freeze_queue(lo->lo_queue); 214 blk_mq_freeze_queue(lo->lo_queue);
215 lo->use_dio = use_dio; 215 lo->use_dio = use_dio;
216 if (use_dio) { 216 if (use_dio) {
217 queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); 217 blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
218 lo->lo_flags |= LO_FLAGS_DIRECT_IO; 218 lo->lo_flags |= LO_FLAGS_DIRECT_IO;
219 } else { 219 } else {
220 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); 220 blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
221 lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; 221 lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
222 } 222 }
223 blk_mq_unfreeze_queue(lo->lo_queue); 223 blk_mq_unfreeze_queue(lo->lo_queue);
@@ -817,7 +817,7 @@ static void loop_config_discard(struct loop_device *lo)
817 q->limits.discard_alignment = 0; 817 q->limits.discard_alignment = 0;
818 blk_queue_max_discard_sectors(q, 0); 818 blk_queue_max_discard_sectors(q, 0);
819 blk_queue_max_write_zeroes_sectors(q, 0); 819 blk_queue_max_write_zeroes_sectors(q, 0);
820 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 820 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
821 return; 821 return;
822 } 822 }
823 823
@@ -826,7 +826,7 @@ static void loop_config_discard(struct loop_device *lo)
826 826
827 blk_queue_max_discard_sectors(q, UINT_MAX >> 9); 827 blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
828 blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); 828 blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
829 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 829 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
830} 830}
831 831
832static void loop_unprepare_queue(struct loop_device *lo) 832static void loop_unprepare_queue(struct loop_device *lo)
@@ -1167,21 +1167,17 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1167static int 1167static int
1168loop_get_status(struct loop_device *lo, struct loop_info64 *info) 1168loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1169{ 1169{
1170 struct file *file = lo->lo_backing_file; 1170 struct file *file;
1171 struct kstat stat; 1171 struct kstat stat;
1172 int error; 1172 int ret;
1173 1173
1174 if (lo->lo_state != Lo_bound) 1174 if (lo->lo_state != Lo_bound) {
1175 mutex_unlock(&lo->lo_ctl_mutex);
1175 return -ENXIO; 1176 return -ENXIO;
1176 error = vfs_getattr(&file->f_path, &stat, 1177 }
1177 STATX_INO, AT_STATX_SYNC_AS_STAT); 1178
1178 if (error)
1179 return error;
1180 memset(info, 0, sizeof(*info)); 1179 memset(info, 0, sizeof(*info));
1181 info->lo_number = lo->lo_number; 1180 info->lo_number = lo->lo_number;
1182 info->lo_device = huge_encode_dev(stat.dev);
1183 info->lo_inode = stat.ino;
1184 info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1185 info->lo_offset = lo->lo_offset; 1181 info->lo_offset = lo->lo_offset;
1186 info->lo_sizelimit = lo->lo_sizelimit; 1182 info->lo_sizelimit = lo->lo_sizelimit;
1187 info->lo_flags = lo->lo_flags; 1183 info->lo_flags = lo->lo_flags;
@@ -1194,7 +1190,19 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1194 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, 1190 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1195 lo->lo_encrypt_key_size); 1191 lo->lo_encrypt_key_size);
1196 } 1192 }
1197 return 0; 1193
1194 /* Drop lo_ctl_mutex while we call into the filesystem. */
1195 file = get_file(lo->lo_backing_file);
1196 mutex_unlock(&lo->lo_ctl_mutex);
1197 ret = vfs_getattr(&file->f_path, &stat, STATX_INO,
1198 AT_STATX_SYNC_AS_STAT);
1199 if (!ret) {
1200 info->lo_device = huge_encode_dev(stat.dev);
1201 info->lo_inode = stat.ino;
1202 info->lo_rdevice = huge_encode_dev(stat.rdev);
1203 }
1204 fput(file);
1205 return ret;
1198} 1206}
1199 1207
1200static void 1208static void
@@ -1352,7 +1360,10 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1352 struct loop_device *lo = bdev->bd_disk->private_data; 1360 struct loop_device *lo = bdev->bd_disk->private_data;
1353 int err; 1361 int err;
1354 1362
1355 mutex_lock_nested(&lo->lo_ctl_mutex, 1); 1363 err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1);
1364 if (err)
1365 goto out_unlocked;
1366
1356 switch (cmd) { 1367 switch (cmd) {
1357 case LOOP_SET_FD: 1368 case LOOP_SET_FD:
1358 err = loop_set_fd(lo, mode, bdev, arg); 1369 err = loop_set_fd(lo, mode, bdev, arg);
@@ -1374,7 +1385,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1374 break; 1385 break;
1375 case LOOP_GET_STATUS: 1386 case LOOP_GET_STATUS:
1376 err = loop_get_status_old(lo, (struct loop_info __user *) arg); 1387 err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1377 break; 1388 /* loop_get_status() unlocks lo_ctl_mutex */
1389 goto out_unlocked;
1378 case LOOP_SET_STATUS64: 1390 case LOOP_SET_STATUS64:
1379 err = -EPERM; 1391 err = -EPERM;
1380 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1392 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
@@ -1383,7 +1395,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1383 break; 1395 break;
1384 case LOOP_GET_STATUS64: 1396 case LOOP_GET_STATUS64:
1385 err = loop_get_status64(lo, (struct loop_info64 __user *) arg); 1397 err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1386 break; 1398 /* loop_get_status() unlocks lo_ctl_mutex */
1399 goto out_unlocked;
1387 case LOOP_SET_CAPACITY: 1400 case LOOP_SET_CAPACITY:
1388 err = -EPERM; 1401 err = -EPERM;
1389 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1402 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
@@ -1535,16 +1548,20 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1535 1548
1536 switch(cmd) { 1549 switch(cmd) {
1537 case LOOP_SET_STATUS: 1550 case LOOP_SET_STATUS:
1538 mutex_lock(&lo->lo_ctl_mutex); 1551 err = mutex_lock_killable(&lo->lo_ctl_mutex);
1539 err = loop_set_status_compat( 1552 if (!err) {
1540 lo, (const struct compat_loop_info __user *) arg); 1553 err = loop_set_status_compat(lo,
1541 mutex_unlock(&lo->lo_ctl_mutex); 1554 (const struct compat_loop_info __user *)arg);
1555 mutex_unlock(&lo->lo_ctl_mutex);
1556 }
1542 break; 1557 break;
1543 case LOOP_GET_STATUS: 1558 case LOOP_GET_STATUS:
1544 mutex_lock(&lo->lo_ctl_mutex); 1559 err = mutex_lock_killable(&lo->lo_ctl_mutex);
1545 err = loop_get_status_compat( 1560 if (!err) {
1546 lo, (struct compat_loop_info __user *) arg); 1561 err = loop_get_status_compat(lo,
1547 mutex_unlock(&lo->lo_ctl_mutex); 1562 (struct compat_loop_info __user *)arg);
1563 /* loop_get_status() unlocks lo_ctl_mutex */
1564 }
1548 break; 1565 break;
1549 case LOOP_SET_CAPACITY: 1566 case LOOP_SET_CAPACITY:
1550 case LOOP_CLR_FD: 1567 case LOOP_CLR_FD:
@@ -1808,7 +1825,7 @@ static int loop_add(struct loop_device **l, int i)
1808 * page. For directio mode, merge does help to dispatch bigger request 1825 * page. For directio mode, merge does help to dispatch bigger request
1809 * to underlayer disk. We will enable merge once directio is enabled. 1826 * to underlayer disk. We will enable merge once directio is enabled.
1810 */ 1827 */
1811 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); 1828 blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
1812 1829
1813 err = -ENOMEM; 1830 err = -ENOMEM;
1814 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1831 disk = lo->lo_disk = alloc_disk(1 << part_shift);
@@ -1864,8 +1881,8 @@ out:
1864 1881
1865static void loop_remove(struct loop_device *lo) 1882static void loop_remove(struct loop_device *lo)
1866{ 1883{
1867 blk_cleanup_queue(lo->lo_queue);
1868 del_gendisk(lo->lo_disk); 1884 del_gendisk(lo->lo_disk);
1885 blk_cleanup_queue(lo->lo_queue);
1869 blk_mq_free_tag_set(&lo->tag_set); 1886 blk_mq_free_tag_set(&lo->tag_set);
1870 put_disk(lo->lo_disk); 1887 put_disk(lo->lo_disk);
1871 kfree(lo); 1888 kfree(lo);
@@ -1949,7 +1966,9 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
1949 ret = loop_lookup(&lo, parm); 1966 ret = loop_lookup(&lo, parm);
1950 if (ret < 0) 1967 if (ret < 0)
1951 break; 1968 break;
1952 mutex_lock(&lo->lo_ctl_mutex); 1969 ret = mutex_lock_killable(&lo->lo_ctl_mutex);
1970 if (ret)
1971 break;
1953 if (lo->lo_state != Lo_unbound) { 1972 if (lo->lo_state != Lo_unbound) {
1954 ret = -EBUSY; 1973 ret = -EBUSY;
1955 mutex_unlock(&lo->lo_ctl_mutex); 1974 mutex_unlock(&lo->lo_ctl_mutex);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index b8af7352a18f..769c551e3d71 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -159,7 +159,7 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
159 if (vendor_id == 0xFFFF) { 159 if (vendor_id == 0xFFFF) {
160 dd->sr = true; 160 dd->sr = true;
161 if (dd->queue) 161 if (dd->queue)
162 set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags); 162 blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue);
163 else 163 else
164 dev_warn(&dd->pdev->dev, 164 dev_warn(&dd->pdev->dev,
165 "%s: dd->queue is NULL\n", __func__); 165 "%s: dd->queue is NULL\n", __func__);
@@ -3855,8 +3855,8 @@ skip_create_disk:
3855 goto start_service_thread; 3855 goto start_service_thread;
3856 3856
3857 /* Set device limits. */ 3857 /* Set device limits. */
3858 set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); 3858 blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
3859 clear_bit(QUEUE_FLAG_ADD_RANDOM, &dd->queue->queue_flags); 3859 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
3860 blk_queue_max_segments(dd->queue, MTIP_MAX_SG); 3860 blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
3861 blk_queue_physical_block_size(dd->queue, 4096); 3861 blk_queue_physical_block_size(dd->queue, 4096);
3862 blk_queue_max_hw_sectors(dd->queue, 0xffff); 3862 blk_queue_max_hw_sectors(dd->queue, 0xffff);
@@ -3866,7 +3866,7 @@ skip_create_disk:
3866 3866
3867 /* Signal trim support */ 3867 /* Signal trim support */
3868 if (dd->trim_supp == true) { 3868 if (dd->trim_supp == true) {
3869 set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); 3869 blk_queue_flag_set(QUEUE_FLAG_DISCARD, dd->queue);
3870 dd->queue->limits.discard_granularity = 4096; 3870 dd->queue->limits.discard_granularity = 4096;
3871 blk_queue_max_discard_sectors(dd->queue, 3871 blk_queue_max_discard_sectors(dd->queue,
3872 MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); 3872 MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 86258b00a1d4..afbc202ca6fd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -964,7 +964,7 @@ static void nbd_parse_flags(struct nbd_device *nbd)
964 else 964 else
965 set_disk_ro(nbd->disk, false); 965 set_disk_ro(nbd->disk, false);
966 if (config->flags & NBD_FLAG_SEND_TRIM) 966 if (config->flags & NBD_FLAG_SEND_TRIM)
967 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 967 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
968 if (config->flags & NBD_FLAG_SEND_FLUSH) { 968 if (config->flags & NBD_FLAG_SEND_FLUSH) {
969 if (config->flags & NBD_FLAG_SEND_FUA) 969 if (config->flags & NBD_FLAG_SEND_FUA)
970 blk_queue_write_cache(nbd->disk->queue, true, true); 970 blk_queue_write_cache(nbd->disk->queue, true, true);
@@ -1040,7 +1040,7 @@ static void nbd_config_put(struct nbd_device *nbd)
1040 nbd->config = NULL; 1040 nbd->config = NULL;
1041 1041
1042 nbd->tag_set.timeout = 0; 1042 nbd->tag_set.timeout = 0;
1043 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1043 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
1044 1044
1045 mutex_unlock(&nbd->config_lock); 1045 mutex_unlock(&nbd->config_lock);
1046 nbd_put(nbd); 1046 nbd_put(nbd);
@@ -1488,8 +1488,8 @@ static int nbd_dev_add(int index)
1488 /* 1488 /*
1489 * Tell the block layer that we are not a rotational device 1489 * Tell the block layer that we are not a rotational device
1490 */ 1490 */
1491 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); 1491 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
1492 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1492 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
1493 disk->queue->limits.discard_granularity = 512; 1493 disk->queue->limits.discard_granularity = 512;
1494 blk_queue_max_discard_sectors(disk->queue, UINT_MAX); 1494 blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
1495 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1495 blk_queue_max_segment_size(disk->queue, UINT_MAX);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 287a09611c0f..a76553293a31 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -16,10 +16,8 @@
16#include <linux/badblocks.h> 16#include <linux/badblocks.h>
17#include <linux/fault-inject.h> 17#include <linux/fault-inject.h>
18 18
19#define SECTOR_SHIFT 9
20#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) 19#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
21#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) 20#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
22#define SECTOR_SIZE (1 << SECTOR_SHIFT)
23#define SECTOR_MASK (PAGE_SECTORS - 1) 21#define SECTOR_MASK (PAGE_SECTORS - 1)
24 22
25#define FREE_BATCH 16 23#define FREE_BATCH 16
@@ -29,6 +27,7 @@
29 27
30#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION 28#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
31static DECLARE_FAULT_ATTR(null_timeout_attr); 29static DECLARE_FAULT_ATTR(null_timeout_attr);
30static DECLARE_FAULT_ATTR(null_requeue_attr);
32#endif 31#endif
33 32
34static inline u64 mb_per_tick(int mbps) 33static inline u64 mb_per_tick(int mbps)
@@ -53,6 +52,7 @@ struct nullb_queue {
53 wait_queue_head_t wait; 52 wait_queue_head_t wait;
54 unsigned int queue_depth; 53 unsigned int queue_depth;
55 struct nullb_device *dev; 54 struct nullb_device *dev;
55 unsigned int requeue_selection;
56 56
57 struct nullb_cmd *cmds; 57 struct nullb_cmd *cmds;
58}; 58};
@@ -72,6 +72,7 @@ enum nullb_device_flags {
72 NULLB_DEV_FL_CACHE = 3, 72 NULLB_DEV_FL_CACHE = 3,
73}; 73};
74 74
75#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
75/* 76/*
76 * nullb_page is a page in memory for nullb devices. 77 * nullb_page is a page in memory for nullb devices.
77 * 78 *
@@ -86,10 +87,10 @@ enum nullb_device_flags {
86 */ 87 */
87struct nullb_page { 88struct nullb_page {
88 struct page *page; 89 struct page *page;
89 unsigned long bitmap; 90 DECLARE_BITMAP(bitmap, MAP_SZ);
90}; 91};
91#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1) 92#define NULLB_PAGE_LOCK (MAP_SZ - 1)
92#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2) 93#define NULLB_PAGE_FREE (MAP_SZ - 2)
93 94
94struct nullb_device { 95struct nullb_device {
95 struct nullb *nullb; 96 struct nullb *nullb;
@@ -170,6 +171,9 @@ MODULE_PARM_DESC(home_node, "Home node for the device");
170#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION 171#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
171static char g_timeout_str[80]; 172static char g_timeout_str[80];
172module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO); 173module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
174
175static char g_requeue_str[80];
176module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
173#endif 177#endif
174 178
175static int g_queue_mode = NULL_Q_MQ; 179static int g_queue_mode = NULL_Q_MQ;
@@ -728,7 +732,7 @@ static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
728 if (!t_page->page) 732 if (!t_page->page)
729 goto out_freepage; 733 goto out_freepage;
730 734
731 t_page->bitmap = 0; 735 memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
732 return t_page; 736 return t_page;
733out_freepage: 737out_freepage:
734 kfree(t_page); 738 kfree(t_page);
@@ -738,13 +742,20 @@ out:
738 742
739static void null_free_page(struct nullb_page *t_page) 743static void null_free_page(struct nullb_page *t_page)
740{ 744{
741 __set_bit(NULLB_PAGE_FREE, &t_page->bitmap); 745 __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
742 if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap)) 746 if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
743 return; 747 return;
744 __free_page(t_page->page); 748 __free_page(t_page->page);
745 kfree(t_page); 749 kfree(t_page);
746} 750}
747 751
752static bool null_page_empty(struct nullb_page *page)
753{
754 int size = MAP_SZ - 2;
755
756 return find_first_bit(page->bitmap, size) == size;
757}
758
748static void null_free_sector(struct nullb *nullb, sector_t sector, 759static void null_free_sector(struct nullb *nullb, sector_t sector,
749 bool is_cache) 760 bool is_cache)
750{ 761{
@@ -759,9 +770,9 @@ static void null_free_sector(struct nullb *nullb, sector_t sector,
759 770
760 t_page = radix_tree_lookup(root, idx); 771 t_page = radix_tree_lookup(root, idx);
761 if (t_page) { 772 if (t_page) {
762 __clear_bit(sector_bit, &t_page->bitmap); 773 __clear_bit(sector_bit, t_page->bitmap);
763 774
764 if (!t_page->bitmap) { 775 if (null_page_empty(t_page)) {
765 ret = radix_tree_delete_item(root, idx, t_page); 776 ret = radix_tree_delete_item(root, idx, t_page);
766 WARN_ON(ret != t_page); 777 WARN_ON(ret != t_page);
767 null_free_page(ret); 778 null_free_page(ret);
@@ -832,7 +843,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb,
832 t_page = radix_tree_lookup(root, idx); 843 t_page = radix_tree_lookup(root, idx);
833 WARN_ON(t_page && t_page->page->index != idx); 844 WARN_ON(t_page && t_page->page->index != idx);
834 845
835 if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap))) 846 if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
836 return t_page; 847 return t_page;
837 848
838 return NULL; 849 return NULL;
@@ -895,10 +906,10 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
895 906
896 t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); 907 t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
897 908
898 __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap); 909 __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
899 if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) { 910 if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
900 null_free_page(c_page); 911 null_free_page(c_page);
901 if (t_page && t_page->bitmap == 0) { 912 if (t_page && null_page_empty(t_page)) {
902 ret = radix_tree_delete_item(&nullb->dev->data, 913 ret = radix_tree_delete_item(&nullb->dev->data,
903 idx, t_page); 914 idx, t_page);
904 null_free_page(t_page); 915 null_free_page(t_page);
@@ -914,11 +925,11 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
914 925
915 for (i = 0; i < PAGE_SECTORS; 926 for (i = 0; i < PAGE_SECTORS;
916 i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { 927 i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
917 if (test_bit(i, &c_page->bitmap)) { 928 if (test_bit(i, c_page->bitmap)) {
918 offset = (i << SECTOR_SHIFT); 929 offset = (i << SECTOR_SHIFT);
919 memcpy(dst + offset, src + offset, 930 memcpy(dst + offset, src + offset,
920 nullb->dev->blocksize); 931 nullb->dev->blocksize);
921 __set_bit(i, &t_page->bitmap); 932 __set_bit(i, t_page->bitmap);
922 } 933 }
923 } 934 }
924 935
@@ -955,10 +966,10 @@ again:
955 * We found the page which is being flushed to disk by other 966 * We found the page which is being flushed to disk by other
956 * threads 967 * threads
957 */ 968 */
958 if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap)) 969 if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
959 c_pages[i] = NULL; 970 c_pages[i] = NULL;
960 else 971 else
961 __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap); 972 __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
962 } 973 }
963 974
964 one_round = 0; 975 one_round = 0;
@@ -1011,7 +1022,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
1011 kunmap_atomic(dst); 1022 kunmap_atomic(dst);
1012 kunmap_atomic(src); 1023 kunmap_atomic(src);
1013 1024
1014 __set_bit(sector & SECTOR_MASK, &t_page->bitmap); 1025 __set_bit(sector & SECTOR_MASK, t_page->bitmap);
1015 1026
1016 if (is_fua) 1027 if (is_fua)
1017 null_free_sector(nullb, sector, true); 1028 null_free_sector(nullb, sector, true);
@@ -1380,7 +1391,15 @@ static bool should_timeout_request(struct request *rq)
1380 if (g_timeout_str[0]) 1391 if (g_timeout_str[0])
1381 return should_fail(&null_timeout_attr, 1); 1392 return should_fail(&null_timeout_attr, 1);
1382#endif 1393#endif
1394 return false;
1395}
1383 1396
1397static bool should_requeue_request(struct request *rq)
1398{
1399#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1400 if (g_requeue_str[0])
1401 return should_fail(&null_requeue_attr, 1);
1402#endif
1384 return false; 1403 return false;
1385} 1404}
1386 1405
@@ -1391,11 +1410,17 @@ static void null_request_fn(struct request_queue *q)
1391 while ((rq = blk_fetch_request(q)) != NULL) { 1410 while ((rq = blk_fetch_request(q)) != NULL) {
1392 struct nullb_cmd *cmd = rq->special; 1411 struct nullb_cmd *cmd = rq->special;
1393 1412
1394 if (!should_timeout_request(rq)) { 1413 /* just ignore the request */
1395 spin_unlock_irq(q->queue_lock); 1414 if (should_timeout_request(rq))
1396 null_handle_cmd(cmd); 1415 continue;
1397 spin_lock_irq(q->queue_lock); 1416 if (should_requeue_request(rq)) {
1417 blk_requeue_request(q, rq);
1418 continue;
1398 } 1419 }
1420
1421 spin_unlock_irq(q->queue_lock);
1422 null_handle_cmd(cmd);
1423 spin_lock_irq(q->queue_lock);
1399 } 1424 }
1400} 1425}
1401 1426
@@ -1422,10 +1447,23 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
1422 1447
1423 blk_mq_start_request(bd->rq); 1448 blk_mq_start_request(bd->rq);
1424 1449
1425 if (!should_timeout_request(bd->rq)) 1450 if (should_requeue_request(bd->rq)) {
1426 return null_handle_cmd(cmd); 1451 /*
1452 * Alternate between hitting the core BUSY path, and the
1453 * driver driven requeue path
1454 */
1455 nq->requeue_selection++;
1456 if (nq->requeue_selection & 1)
1457 return BLK_STS_RESOURCE;
1458 else {
1459 blk_mq_requeue_request(bd->rq, true);
1460 return BLK_STS_OK;
1461 }
1462 }
1463 if (should_timeout_request(bd->rq))
1464 return BLK_STS_OK;
1427 1465
1428 return BLK_STS_OK; 1466 return null_handle_cmd(cmd);
1429} 1467}
1430 1468
1431static const struct blk_mq_ops null_mq_ops = { 1469static const struct blk_mq_ops null_mq_ops = {
@@ -1485,7 +1523,7 @@ static void null_config_discard(struct nullb *nullb)
1485 nullb->q->limits.discard_granularity = nullb->dev->blocksize; 1523 nullb->q->limits.discard_granularity = nullb->dev->blocksize;
1486 nullb->q->limits.discard_alignment = nullb->dev->blocksize; 1524 nullb->q->limits.discard_alignment = nullb->dev->blocksize;
1487 blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); 1525 blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
1488 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q); 1526 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
1489} 1527}
1490 1528
1491static int null_open(struct block_device *bdev, fmode_t mode) 1529static int null_open(struct block_device *bdev, fmode_t mode)
@@ -1659,16 +1697,27 @@ static void null_validate_conf(struct nullb_device *dev)
1659 dev->mbps = 0; 1697 dev->mbps = 0;
1660} 1698}
1661 1699
1662static bool null_setup_fault(void)
1663{
1664#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION 1700#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1665 if (!g_timeout_str[0]) 1701static bool __null_setup_fault(struct fault_attr *attr, char *str)
1702{
1703 if (!str[0])
1666 return true; 1704 return true;
1667 1705
1668 if (!setup_fault_attr(&null_timeout_attr, g_timeout_str)) 1706 if (!setup_fault_attr(attr, str))
1669 return false; 1707 return false;
1670 1708
1671 null_timeout_attr.verbose = 0; 1709 attr->verbose = 0;
1710 return true;
1711}
1712#endif
1713
1714static bool null_setup_fault(void)
1715{
1716#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1717 if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
1718 return false;
1719 if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
1720 return false;
1672#endif 1721#endif
1673 return true; 1722 return true;
1674} 1723}
@@ -1717,7 +1766,8 @@ static int null_add_dev(struct nullb_device *dev)
1717 } 1766 }
1718 null_init_queues(nullb); 1767 null_init_queues(nullb);
1719 } else if (dev->queue_mode == NULL_Q_BIO) { 1768 } else if (dev->queue_mode == NULL_Q_BIO) {
1720 nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); 1769 nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node,
1770 NULL);
1721 if (!nullb->q) { 1771 if (!nullb->q) {
1722 rv = -ENOMEM; 1772 rv = -ENOMEM;
1723 goto out_cleanup_queues; 1773 goto out_cleanup_queues;
@@ -1758,8 +1808,8 @@ static int null_add_dev(struct nullb_device *dev)
1758 } 1808 }
1759 1809
1760 nullb->q->queuedata = nullb; 1810 nullb->q->queuedata = nullb;
1761 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); 1811 blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
1762 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); 1812 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
1763 1813
1764 mutex_lock(&lock); 1814 mutex_lock(&lock);
1765 nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); 1815 nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
@@ -1802,10 +1852,6 @@ static int __init null_init(void)
1802 struct nullb *nullb; 1852 struct nullb *nullb;
1803 struct nullb_device *dev; 1853 struct nullb_device *dev;
1804 1854
1805 /* check for nullb_page.bitmap */
1806 if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT))
1807 return -EINVAL;
1808
1809 if (g_bs > PAGE_SIZE) { 1855 if (g_bs > PAGE_SIZE) {
1810 pr_warn("null_blk: invalid block size\n"); 1856 pr_warn("null_blk: invalid block size\n");
1811 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); 1857 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 7b8c6368beb7..a026211afb51 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -230,6 +230,8 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode)
230 struct pcd_unit *cd = bdev->bd_disk->private_data; 230 struct pcd_unit *cd = bdev->bd_disk->private_data;
231 int ret; 231 int ret;
232 232
233 check_disk_change(bdev);
234
233 mutex_lock(&pcd_mutex); 235 mutex_lock(&pcd_mutex);
234 ret = cdrom_open(&cd->info, bdev, mode); 236 ret = cdrom_open(&cd->info, bdev, mode);
235 mutex_unlock(&pcd_mutex); 237 mutex_unlock(&pcd_mutex);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8e40da093766..1e03b04819c8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -51,15 +51,6 @@
51#define RBD_DEBUG /* Activate rbd_assert() calls */ 51#define RBD_DEBUG /* Activate rbd_assert() calls */
52 52
53/* 53/*
54 * The basic unit of block I/O is a sector. It is interpreted in a
55 * number of contexts in Linux (blk, bio, genhd), but the default is
56 * universally 512 bytes. These symbols are just slightly more
57 * meaningful than the bare numbers they represent.
58 */
59#define SECTOR_SHIFT 9
60#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
61
62/*
63 * Increment the given counter and return its updated value. 54 * Increment the given counter and return its updated value.
64 * If the counter is already 0 it will not be incremented. 55 * If the counter is already 0 it will not be incremented.
65 * If the counter is already at its maximum value returns 56 * If the counter is already at its maximum value returns
@@ -4370,7 +4361,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
4370 goto out_tag_set; 4361 goto out_tag_set;
4371 } 4362 }
4372 4363
4373 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 4364 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
4374 /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ 4365 /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
4375 4366
4376 /* set io sizes to object size */ 4367 /* set io sizes to object size */
@@ -4383,7 +4374,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
4383 blk_queue_io_opt(q, segment_size); 4374 blk_queue_io_opt(q, segment_size);
4384 4375
4385 /* enable the discard support */ 4376 /* enable the discard support */
4386 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 4377 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
4387 q->limits.discard_granularity = segment_size; 4378 q->limits.discard_granularity = segment_size;
4388 blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); 4379 blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
4389 blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); 4380 blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index e397d3ee7308..dddb3f2490b6 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -287,10 +287,10 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
287 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); 287 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
288 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); 288 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
289 289
290 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); 290 blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue);
291 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, card->queue); 291 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue);
292 if (rsxx_discard_supported(card)) { 292 if (rsxx_discard_supported(card)) {
293 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); 293 blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue);
294 blk_queue_max_discard_sectors(card->queue, 294 blk_queue_max_discard_sectors(card->queue,
295 RSXX_HW_BLK_SIZE >> 9); 295 RSXX_HW_BLK_SIZE >> 9);
296 card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; 296 card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index e41935ab41ef..bc7aea6d7b7c 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -2858,8 +2858,8 @@ static int skd_cons_disk(struct skd_device *skdev)
2858 /* set optimal I/O size to 8KB */ 2858 /* set optimal I/O size to 8KB */
2859 blk_queue_io_opt(q, 8192); 2859 blk_queue_io_opt(q, 8192);
2860 2860
2861 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 2861 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
2862 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); 2862 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
2863 2863
2864 blk_queue_rq_timeout(q, 8 * HZ); 2864 blk_queue_rq_timeout(q, 8 * HZ);
2865 2865
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 8077123678ad..5c7fb8cc4149 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
888 card->Active = -1; /* no page is active */ 888 card->Active = -1; /* no page is active */
889 card->bio = NULL; 889 card->bio = NULL;
890 card->biotail = &card->bio; 890 card->biotail = &card->bio;
891 spin_lock_init(&card->lock);
891 892
892 card->queue = blk_alloc_queue(GFP_KERNEL); 893 card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
894 &card->lock);
893 if (!card->queue) 895 if (!card->queue)
894 goto failed_alloc; 896 goto failed_alloc;
895 897
896 blk_queue_make_request(card->queue, mm_make_request); 898 blk_queue_make_request(card->queue, mm_make_request);
897 card->queue->queue_lock = &card->lock;
898 card->queue->queuedata = card; 899 card->queue->queuedata = card;
899 900
900 tasklet_init(&card->tasklet, process_page, (unsigned long)card); 901 tasklet_init(&card->tasklet, process_page, (unsigned long)card);
@@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
968 dev_printk(KERN_INFO, &card->dev->dev, 969 dev_printk(KERN_INFO, &card->dev->dev,
969 "Window size %d bytes, IRQ %d\n", data, dev->irq); 970 "Window size %d bytes, IRQ %d\n", data, dev->irq);
970 971
971 spin_lock_init(&card->lock);
972
973 pci_set_drvdata(dev, card); 972 pci_set_drvdata(dev, card);
974 973
975 if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */ 974 if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 92ec1bbece51..2a8e7813bd1a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -932,15 +932,15 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
932 unsigned int segments = info->max_indirect_segments ? : 932 unsigned int segments = info->max_indirect_segments ? :
933 BLKIF_MAX_SEGMENTS_PER_REQUEST; 933 BLKIF_MAX_SEGMENTS_PER_REQUEST;
934 934
935 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); 935 blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
936 936
937 if (info->feature_discard) { 937 if (info->feature_discard) {
938 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); 938 blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
939 blk_queue_max_discard_sectors(rq, get_capacity(gd)); 939 blk_queue_max_discard_sectors(rq, get_capacity(gd));
940 rq->limits.discard_granularity = info->discard_granularity; 940 rq->limits.discard_granularity = info->discard_granularity;
941 rq->limits.discard_alignment = info->discard_alignment; 941 rq->limits.discard_alignment = info->discard_alignment;
942 if (info->feature_secdiscard) 942 if (info->feature_secdiscard)
943 queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); 943 blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
944 } 944 }
945 945
946 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 946 /* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -1611,8 +1611,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
1611 blkif_req(req)->error = BLK_STS_NOTSUPP; 1611 blkif_req(req)->error = BLK_STS_NOTSUPP;
1612 info->feature_discard = 0; 1612 info->feature_discard = 0;
1613 info->feature_secdiscard = 0; 1613 info->feature_secdiscard = 0;
1614 queue_flag_clear(QUEUE_FLAG_DISCARD, rq); 1614 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
1615 queue_flag_clear(QUEUE_FLAG_SECERASE, rq); 1615 blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
1616 } 1616 }
1617 break; 1617 break;
1618 case BLKIF_OP_FLUSH_DISKCACHE: 1618 case BLKIF_OP_FLUSH_DISKCACHE:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0afa6c8c3857..71b449613cfa 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1530,8 +1530,8 @@ static int zram_add(void)
1530 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1530 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1531 set_capacity(zram->disk, 0); 1531 set_capacity(zram->disk, 0);
1532 /* zram devices sort of resembles non-rotational disks */ 1532 /* zram devices sort of resembles non-rotational disks */
1533 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1533 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1534 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1534 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1535 1535
1536 /* 1536 /*
1537 * To ensure that we always get PAGE_SIZE aligned 1537 * To ensure that we always get PAGE_SIZE aligned
@@ -1544,7 +1544,7 @@ static int zram_add(void)
1544 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1544 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1545 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1545 zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1546 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1546 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1547 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1547 blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1548 1548
1549 /* 1549 /*
1550 * zram_bio_discard() will clear all logical blocks if logical block 1550 * zram_bio_discard() will clear all logical blocks if logical block
@@ -1620,8 +1620,8 @@ static int zram_remove(struct zram *zram)
1620 1620
1621 pr_info("Removed device: %s\n", zram->disk->disk_name); 1621 pr_info("Removed device: %s\n", zram->disk->disk_name);
1622 1622
1623 blk_cleanup_queue(zram->disk->queue);
1624 del_gendisk(zram->disk); 1623 del_gendisk(zram->disk);
1624 blk_cleanup_queue(zram->disk->queue);
1625 put_disk(zram->disk); 1625 put_disk(zram->disk);
1626 kfree(zram); 1626 kfree(zram);
1627 return 0; 1627 return 0;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 31762db861e3..1e9bf65c0bfb 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -37,7 +37,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
37 37
38/*-- End of configurable params */ 38/*-- End of configurable params */
39 39
40#define SECTOR_SHIFT 9
41#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) 40#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
42#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) 41#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
43#define ZRAM_LOGICAL_BLOCK_SHIFT 12 42#define ZRAM_LOGICAL_BLOCK_SHIFT 12
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index e36d160c458f..8327478effd0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1152,9 +1152,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
1152 1152
1153 cd_dbg(CD_OPEN, "entering cdrom_open\n"); 1153 cd_dbg(CD_OPEN, "entering cdrom_open\n");
1154 1154
1155 /* open is event synchronization point, check events first */
1156 check_disk_change(bdev);
1157
1158 /* if this was a O_NONBLOCK open and we should honor the flags, 1155 /* if this was a O_NONBLOCK open and we should honor the flags,
1159 * do a quick open without drive/disc integrity checks. */ 1156 * do a quick open without drive/disc integrity checks. */
1160 cdi->use_count++; 1157 cdi->use_count++;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 6495b03f576c..ae3a7537cf0f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -497,6 +497,9 @@ static const struct cdrom_device_ops gdrom_ops = {
497static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) 497static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
498{ 498{
499 int ret; 499 int ret;
500
501 check_disk_change(bdev);
502
500 mutex_lock(&gdrom_mutex); 503 mutex_lock(&gdrom_mutex);
501 ret = cdrom_open(gd.cd_info, bdev, mode); 504 ret = cdrom_open(gd.cd_info, bdev, mode);
502 mutex_unlock(&gdrom_mutex); 505 mutex_unlock(&gdrom_mutex);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 7c3ed7c9af77..5a8e8e3c22cd 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -712,7 +712,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
712 struct request_queue *q = drive->queue; 712 struct request_queue *q = drive->queue;
713 int write = rq_data_dir(rq) == WRITE; 713 int write = rq_data_dir(rq) == WRITE;
714 unsigned short sectors_per_frame = 714 unsigned short sectors_per_frame =
715 queue_logical_block_size(q) >> SECTOR_BITS; 715 queue_logical_block_size(q) >> SECTOR_SHIFT;
716 716
717 ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, " 717 ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, "
718 "secs_per_frame: %u", 718 "secs_per_frame: %u",
@@ -919,7 +919,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
919 * end up being bogus. 919 * end up being bogus.
920 */ 920 */
921 blocklen = be32_to_cpu(capbuf.blocklen); 921 blocklen = be32_to_cpu(capbuf.blocklen);
922 blocklen = (blocklen >> SECTOR_BITS) << SECTOR_BITS; 922 blocklen = (blocklen >> SECTOR_SHIFT) << SECTOR_SHIFT;
923 switch (blocklen) { 923 switch (blocklen) {
924 case 512: 924 case 512:
925 case 1024: 925 case 1024:
@@ -935,7 +935,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
935 } 935 }
936 936
937 *capacity = 1 + be32_to_cpu(capbuf.lba); 937 *capacity = 1 + be32_to_cpu(capbuf.lba);
938 *sectors_per_frame = blocklen >> SECTOR_BITS; 938 *sectors_per_frame = blocklen >> SECTOR_SHIFT;
939 939
940 ide_debug_log(IDE_DBG_PROBE, "cap: %lu, sectors_per_frame: %lu", 940 ide_debug_log(IDE_DBG_PROBE, "cap: %lu, sectors_per_frame: %lu",
941 *capacity, *sectors_per_frame); 941 *capacity, *sectors_per_frame);
@@ -1012,7 +1012,7 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense)
1012 drive->probed_capacity = toc->capacity * sectors_per_frame; 1012 drive->probed_capacity = toc->capacity * sectors_per_frame;
1013 1013
1014 blk_queue_logical_block_size(drive->queue, 1014 blk_queue_logical_block_size(drive->queue,
1015 sectors_per_frame << SECTOR_BITS); 1015 sectors_per_frame << SECTOR_SHIFT);
1016 1016
1017 /* first read just the header, so we know how long the TOC is */ 1017 /* first read just the header, so we know how long the TOC is */
1018 stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr, 1018 stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
@@ -1613,6 +1613,8 @@ static int idecd_open(struct block_device *bdev, fmode_t mode)
1613 struct cdrom_info *info; 1613 struct cdrom_info *info;
1614 int rc = -ENXIO; 1614 int rc = -ENXIO;
1615 1615
1616 check_disk_change(bdev);
1617
1616 mutex_lock(&ide_cd_mutex); 1618 mutex_lock(&ide_cd_mutex);
1617 info = ide_cd_get(bdev->bd_disk); 1619 info = ide_cd_get(bdev->bd_disk);
1618 if (!info) 1620 if (!info)
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 264e822eba58..04f0f310a856 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -21,11 +21,7 @@
21 21
22/************************************************************************/ 22/************************************************************************/
23 23
24#define SECTOR_BITS 9 24#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_SHIFT)
25#ifndef SECTOR_SIZE
26#define SECTOR_SIZE (1 << SECTOR_BITS)
27#endif
28#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_BITS)
29#define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32) 25#define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32)
30 26
31/* Capabilities Page size including 8 bytes of Mode Page Header */ 27/* Capabilities Page size including 8 bytes of Mode Page Header */
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 67bc72d78fbf..f1a7c58fe418 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -687,8 +687,8 @@ static void ide_disk_setup(ide_drive_t *drive)
687 queue_max_sectors(q) / 2); 687 queue_max_sectors(q) / 2);
688 688
689 if (ata_id_is_ssd(id)) { 689 if (ata_id_is_ssd(id)) {
690 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 690 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
691 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); 691 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
692 } 692 }
693 693
694 /* calculate drive capacity, and select LBA if possible */ 694 /* calculate drive capacity, and select LBA if possible */
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index caa20eb5f26b..2019e66eada7 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -766,14 +766,14 @@ static int ide_init_queue(ide_drive_t *drive)
766 * limits and LBA48 we could raise it but as yet 766 * limits and LBA48 we could raise it but as yet
767 * do not. 767 * do not.
768 */ 768 */
769 q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif)); 769 q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL);
770 if (!q) 770 if (!q)
771 return 1; 771 return 1;
772 772
773 q->request_fn = do_ide_request; 773 q->request_fn = do_ide_request;
774 q->initialize_rq_fn = ide_initialize_rq; 774 q->initialize_rq_fn = ide_initialize_rq;
775 q->cmd_size = sizeof(struct ide_request); 775 q->cmd_size = sizeof(struct ide_request);
776 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); 776 blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
777 if (blk_init_allocated_queue(q) < 0) { 777 if (blk_init_allocated_queue(q) < 0) {
778 blk_cleanup_queue(q); 778 blk_cleanup_queue(q);
779 return 1; 779 return 1;
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index dcc9e621e651..63171cdce270 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -36,13 +36,13 @@ static DECLARE_RWSEM(nvm_lock);
36/* Map between virtual and physical channel and lun */ 36/* Map between virtual and physical channel and lun */
37struct nvm_ch_map { 37struct nvm_ch_map {
38 int ch_off; 38 int ch_off;
39 int nr_luns; 39 int num_lun;
40 int *lun_offs; 40 int *lun_offs;
41}; 41};
42 42
43struct nvm_dev_map { 43struct nvm_dev_map {
44 struct nvm_ch_map *chnls; 44 struct nvm_ch_map *chnls;
45 int nr_chnls; 45 int num_ch;
46}; 46};
47 47
48static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) 48static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
@@ -114,15 +114,15 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear)
114 struct nvm_dev_map *dev_map = tgt_dev->map; 114 struct nvm_dev_map *dev_map = tgt_dev->map;
115 int i, j; 115 int i, j;
116 116
117 for (i = 0; i < dev_map->nr_chnls; i++) { 117 for (i = 0; i < dev_map->num_ch; i++) {
118 struct nvm_ch_map *ch_map = &dev_map->chnls[i]; 118 struct nvm_ch_map *ch_map = &dev_map->chnls[i];
119 int *lun_offs = ch_map->lun_offs; 119 int *lun_offs = ch_map->lun_offs;
120 int ch = i + ch_map->ch_off; 120 int ch = i + ch_map->ch_off;
121 121
122 if (clear) { 122 if (clear) {
123 for (j = 0; j < ch_map->nr_luns; j++) { 123 for (j = 0; j < ch_map->num_lun; j++) {
124 int lun = j + lun_offs[j]; 124 int lun = j + lun_offs[j];
125 int lunid = (ch * dev->geo.nr_luns) + lun; 125 int lunid = (ch * dev->geo.num_lun) + lun;
126 126
127 WARN_ON(!test_and_clear_bit(lunid, 127 WARN_ON(!test_and_clear_bit(lunid,
128 dev->lun_map)); 128 dev->lun_map));
@@ -147,47 +147,46 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
147 struct nvm_dev_map *dev_rmap = dev->rmap; 147 struct nvm_dev_map *dev_rmap = dev->rmap;
148 struct nvm_dev_map *dev_map; 148 struct nvm_dev_map *dev_map;
149 struct ppa_addr *luns; 149 struct ppa_addr *luns;
150 int nr_luns = lun_end - lun_begin + 1; 150 int num_lun = lun_end - lun_begin + 1;
151 int luns_left = nr_luns; 151 int luns_left = num_lun;
152 int nr_chnls = nr_luns / dev->geo.nr_luns; 152 int num_ch = num_lun / dev->geo.num_lun;
153 int nr_chnls_mod = nr_luns % dev->geo.nr_luns; 153 int num_ch_mod = num_lun % dev->geo.num_lun;
154 int bch = lun_begin / dev->geo.nr_luns; 154 int bch = lun_begin / dev->geo.num_lun;
155 int blun = lun_begin % dev->geo.nr_luns; 155 int blun = lun_begin % dev->geo.num_lun;
156 int lunid = 0; 156 int lunid = 0;
157 int lun_balanced = 1; 157 int lun_balanced = 1;
158 int prev_nr_luns; 158 int sec_per_lun, prev_num_lun;
159 int i, j; 159 int i, j;
160 160
161 nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; 161 num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1;
162 162
163 dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); 163 dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
164 if (!dev_map) 164 if (!dev_map)
165 goto err_dev; 165 goto err_dev;
166 166
167 dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map), 167 dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL);
168 GFP_KERNEL);
169 if (!dev_map->chnls) 168 if (!dev_map->chnls)
170 goto err_chnls; 169 goto err_chnls;
171 170
172 luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL); 171 luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL);
173 if (!luns) 172 if (!luns)
174 goto err_luns; 173 goto err_luns;
175 174
176 prev_nr_luns = (luns_left > dev->geo.nr_luns) ? 175 prev_num_lun = (luns_left > dev->geo.num_lun) ?
177 dev->geo.nr_luns : luns_left; 176 dev->geo.num_lun : luns_left;
178 for (i = 0; i < nr_chnls; i++) { 177 for (i = 0; i < num_ch; i++) {
179 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; 178 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
180 int *lun_roffs = ch_rmap->lun_offs; 179 int *lun_roffs = ch_rmap->lun_offs;
181 struct nvm_ch_map *ch_map = &dev_map->chnls[i]; 180 struct nvm_ch_map *ch_map = &dev_map->chnls[i];
182 int *lun_offs; 181 int *lun_offs;
183 int luns_in_chnl = (luns_left > dev->geo.nr_luns) ? 182 int luns_in_chnl = (luns_left > dev->geo.num_lun) ?
184 dev->geo.nr_luns : luns_left; 183 dev->geo.num_lun : luns_left;
185 184
186 if (lun_balanced && prev_nr_luns != luns_in_chnl) 185 if (lun_balanced && prev_num_lun != luns_in_chnl)
187 lun_balanced = 0; 186 lun_balanced = 0;
188 187
189 ch_map->ch_off = ch_rmap->ch_off = bch; 188 ch_map->ch_off = ch_rmap->ch_off = bch;
190 ch_map->nr_luns = luns_in_chnl; 189 ch_map->num_lun = luns_in_chnl;
191 190
192 lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); 191 lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
193 if (!lun_offs) 192 if (!lun_offs)
@@ -195,8 +194,8 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
195 194
196 for (j = 0; j < luns_in_chnl; j++) { 195 for (j = 0; j < luns_in_chnl; j++) {
197 luns[lunid].ppa = 0; 196 luns[lunid].ppa = 0;
198 luns[lunid].g.ch = i; 197 luns[lunid].a.ch = i;
199 luns[lunid++].g.lun = j; 198 luns[lunid++].a.lun = j;
200 199
201 lun_offs[j] = blun; 200 lun_offs[j] = blun;
202 lun_roffs[j + blun] = blun; 201 lun_roffs[j + blun] = blun;
@@ -209,24 +208,29 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
209 luns_left -= luns_in_chnl; 208 luns_left -= luns_in_chnl;
210 } 209 }
211 210
212 dev_map->nr_chnls = nr_chnls; 211 dev_map->num_ch = num_ch;
213 212
214 tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); 213 tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
215 if (!tgt_dev) 214 if (!tgt_dev)
216 goto err_ch; 215 goto err_ch;
217 216
217 /* Inherit device geometry from parent */
218 memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); 218 memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
219
219 /* Target device only owns a portion of the physical device */ 220 /* Target device only owns a portion of the physical device */
220 tgt_dev->geo.nr_chnls = nr_chnls; 221 tgt_dev->geo.num_ch = num_ch;
221 tgt_dev->geo.all_luns = nr_luns; 222 tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1;
222 tgt_dev->geo.nr_luns = (lun_balanced) ? prev_nr_luns : -1; 223 tgt_dev->geo.all_luns = num_lun;
224 tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk;
225
223 tgt_dev->geo.op = op; 226 tgt_dev->geo.op = op;
224 tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; 227
228 sec_per_lun = dev->geo.clba * dev->geo.num_chk;
229 tgt_dev->geo.total_secs = num_lun * sec_per_lun;
230
225 tgt_dev->q = dev->q; 231 tgt_dev->q = dev->q;
226 tgt_dev->map = dev_map; 232 tgt_dev->map = dev_map;
227 tgt_dev->luns = luns; 233 tgt_dev->luns = luns;
228 memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
229
230 tgt_dev->parent = dev; 234 tgt_dev->parent = dev;
231 235
232 return tgt_dev; 236 return tgt_dev;
@@ -296,24 +300,20 @@ static int __nvm_config_simple(struct nvm_dev *dev,
296static int __nvm_config_extended(struct nvm_dev *dev, 300static int __nvm_config_extended(struct nvm_dev *dev,
297 struct nvm_ioctl_create_extended *e) 301 struct nvm_ioctl_create_extended *e)
298{ 302{
299 struct nvm_geo *geo = &dev->geo;
300
301 if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) { 303 if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) {
302 e->lun_begin = 0; 304 e->lun_begin = 0;
303 e->lun_end = dev->geo.all_luns - 1; 305 e->lun_end = dev->geo.all_luns - 1;
304 } 306 }
305 307
306 /* op not set falls into target's default */ 308 /* op not set falls into target's default */
307 if (e->op == 0xFFFF) 309 if (e->op == 0xFFFF) {
308 e->op = NVM_TARGET_DEFAULT_OP; 310 e->op = NVM_TARGET_DEFAULT_OP;
309 311 } else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) {
310 if (e->op < NVM_TARGET_MIN_OP ||
311 e->op > NVM_TARGET_MAX_OP) {
312 pr_err("nvm: invalid over provisioning value\n"); 312 pr_err("nvm: invalid over provisioning value\n");
313 return -EINVAL; 313 return -EINVAL;
314 } 314 }
315 315
316 return nvm_config_check_luns(geo, e->lun_begin, e->lun_end); 316 return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end);
317} 317}
318 318
319static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) 319static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
@@ -384,7 +384,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
384 goto err_dev; 384 goto err_dev;
385 } 385 }
386 386
387 tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); 387 tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL);
388 if (!tqueue) { 388 if (!tqueue) {
389 ret = -ENOMEM; 389 ret = -ENOMEM;
390 goto err_disk; 390 goto err_disk;
@@ -407,7 +407,8 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
407 tdisk->private_data = targetdata; 407 tdisk->private_data = targetdata;
408 tqueue->queuedata = targetdata; 408 tqueue->queuedata = targetdata;
409 409
410 blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); 410 blk_queue_max_hw_sectors(tqueue,
411 (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
411 412
412 set_capacity(tdisk, tt->capacity(targetdata)); 413 set_capacity(tdisk, tt->capacity(targetdata));
413 add_disk(tdisk); 414 add_disk(tdisk);
@@ -503,20 +504,20 @@ static int nvm_register_map(struct nvm_dev *dev)
503 if (!rmap) 504 if (!rmap)
504 goto err_rmap; 505 goto err_rmap;
505 506
506 rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map), 507 rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map),
507 GFP_KERNEL); 508 GFP_KERNEL);
508 if (!rmap->chnls) 509 if (!rmap->chnls)
509 goto err_chnls; 510 goto err_chnls;
510 511
511 for (i = 0; i < dev->geo.nr_chnls; i++) { 512 for (i = 0; i < dev->geo.num_ch; i++) {
512 struct nvm_ch_map *ch_rmap; 513 struct nvm_ch_map *ch_rmap;
513 int *lun_roffs; 514 int *lun_roffs;
514 int luns_in_chnl = dev->geo.nr_luns; 515 int luns_in_chnl = dev->geo.num_lun;
515 516
516 ch_rmap = &rmap->chnls[i]; 517 ch_rmap = &rmap->chnls[i];
517 518
518 ch_rmap->ch_off = -1; 519 ch_rmap->ch_off = -1;
519 ch_rmap->nr_luns = luns_in_chnl; 520 ch_rmap->num_lun = luns_in_chnl;
520 521
521 lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); 522 lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
522 if (!lun_roffs) 523 if (!lun_roffs)
@@ -545,7 +546,7 @@ static void nvm_unregister_map(struct nvm_dev *dev)
545 struct nvm_dev_map *rmap = dev->rmap; 546 struct nvm_dev_map *rmap = dev->rmap;
546 int i; 547 int i;
547 548
548 for (i = 0; i < dev->geo.nr_chnls; i++) 549 for (i = 0; i < dev->geo.num_ch; i++)
549 kfree(rmap->chnls[i].lun_offs); 550 kfree(rmap->chnls[i].lun_offs);
550 551
551 kfree(rmap->chnls); 552 kfree(rmap->chnls);
@@ -555,22 +556,22 @@ static void nvm_unregister_map(struct nvm_dev *dev)
555static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) 556static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
556{ 557{
557 struct nvm_dev_map *dev_map = tgt_dev->map; 558 struct nvm_dev_map *dev_map = tgt_dev->map;
558 struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch]; 559 struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch];
559 int lun_off = ch_map->lun_offs[p->g.lun]; 560 int lun_off = ch_map->lun_offs[p->a.lun];
560 561
561 p->g.ch += ch_map->ch_off; 562 p->a.ch += ch_map->ch_off;
562 p->g.lun += lun_off; 563 p->a.lun += lun_off;
563} 564}
564 565
565static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) 566static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
566{ 567{
567 struct nvm_dev *dev = tgt_dev->parent; 568 struct nvm_dev *dev = tgt_dev->parent;
568 struct nvm_dev_map *dev_rmap = dev->rmap; 569 struct nvm_dev_map *dev_rmap = dev->rmap;
569 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch]; 570 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch];
570 int lun_roff = ch_rmap->lun_offs[p->g.lun]; 571 int lun_roff = ch_rmap->lun_offs[p->a.lun];
571 572
572 p->g.ch -= ch_rmap->ch_off; 573 p->a.ch -= ch_rmap->ch_off;
573 p->g.lun -= lun_roff; 574 p->a.lun -= lun_roff;
574} 575}
575 576
576static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, 577static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
@@ -580,7 +581,7 @@ static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
580 581
581 for (i = 0; i < nr_ppas; i++) { 582 for (i = 0; i < nr_ppas; i++) {
582 nvm_map_to_dev(tgt_dev, &ppa_list[i]); 583 nvm_map_to_dev(tgt_dev, &ppa_list[i]);
583 ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]); 584 ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]);
584 } 585 }
585} 586}
586 587
@@ -590,7 +591,7 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
590 int i; 591 int i;
591 592
592 for (i = 0; i < nr_ppas; i++) { 593 for (i = 0; i < nr_ppas; i++) {
593 ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]); 594 ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]);
594 nvm_map_to_tgt(tgt_dev, &ppa_list[i]); 595 nvm_map_to_tgt(tgt_dev, &ppa_list[i]);
595 } 596 }
596} 597}
@@ -674,7 +675,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
674 int i, plane_cnt, pl_idx; 675 int i, plane_cnt, pl_idx;
675 struct ppa_addr ppa; 676 struct ppa_addr ppa;
676 677
677 if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { 678 if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
678 rqd->nr_ppas = nr_ppas; 679 rqd->nr_ppas = nr_ppas;
679 rqd->ppa_addr = ppas[0]; 680 rqd->ppa_addr = ppas[0];
680 681
@@ -688,7 +689,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
688 return -ENOMEM; 689 return -ENOMEM;
689 } 690 }
690 691
691 plane_cnt = geo->plane_mode; 692 plane_cnt = geo->pln_mode;
692 rqd->nr_ppas *= plane_cnt; 693 rqd->nr_ppas *= plane_cnt;
693 694
694 for (i = 0; i < nr_ppas; i++) { 695 for (i = 0; i < nr_ppas; i++) {
@@ -711,6 +712,17 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
711 nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); 712 nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
712} 713}
713 714
715int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta,
716 struct ppa_addr ppa, int nchks)
717{
718 struct nvm_dev *dev = tgt_dev->parent;
719
720 nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
721
722 return dev->ops->get_chk_meta(tgt_dev->parent, meta,
723 (sector_t)ppa.ppa, nchks);
724}
725EXPORT_SYMBOL(nvm_get_chunk_meta);
714 726
715int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, 727int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
716 int nr_ppas, int type) 728 int nr_ppas, int type)
@@ -719,7 +731,7 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
719 struct nvm_rq rqd; 731 struct nvm_rq rqd;
720 int ret; 732 int ret;
721 733
722 if (nr_ppas > dev->ops->max_phys_sect) { 734 if (nr_ppas > NVM_MAX_VLBA) {
723 pr_err("nvm: unable to update all blocks atomically\n"); 735 pr_err("nvm: unable to update all blocks atomically\n");
724 return -EINVAL; 736 return -EINVAL;
725 } 737 }
@@ -740,14 +752,6 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
740} 752}
741EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); 753EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
742 754
743int nvm_max_phys_sects(struct nvm_tgt_dev *tgt_dev)
744{
745 struct nvm_dev *dev = tgt_dev->parent;
746
747 return dev->ops->max_phys_sect;
748}
749EXPORT_SYMBOL(nvm_max_phys_sects);
750
751int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) 755int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
752{ 756{
753 struct nvm_dev *dev = tgt_dev->parent; 757 struct nvm_dev *dev = tgt_dev->parent;
@@ -814,15 +818,15 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
814 struct nvm_geo *geo = &dev->geo; 818 struct nvm_geo *geo = &dev->geo;
815 int blk, offset, pl, blktype; 819 int blk, offset, pl, blktype;
816 820
817 if (nr_blks != geo->nr_chks * geo->plane_mode) 821 if (nr_blks != geo->num_chk * geo->pln_mode)
818 return -EINVAL; 822 return -EINVAL;
819 823
820 for (blk = 0; blk < geo->nr_chks; blk++) { 824 for (blk = 0; blk < geo->num_chk; blk++) {
821 offset = blk * geo->plane_mode; 825 offset = blk * geo->pln_mode;
822 blktype = blks[offset]; 826 blktype = blks[offset];
823 827
824 /* Bad blocks on any planes take precedence over other types */ 828 /* Bad blocks on any planes take precedence over other types */
825 for (pl = 0; pl < geo->plane_mode; pl++) { 829 for (pl = 0; pl < geo->pln_mode; pl++) {
826 if (blks[offset + pl] & 830 if (blks[offset + pl] &
827 (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { 831 (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
828 blktype = blks[offset + pl]; 832 blktype = blks[offset + pl];
@@ -833,7 +837,7 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
833 blks[blk] = blktype; 837 blks[blk] = blktype;
834 } 838 }
835 839
836 return geo->nr_chks; 840 return geo->num_chk;
837} 841}
838EXPORT_SYMBOL(nvm_bb_tbl_fold); 842EXPORT_SYMBOL(nvm_bb_tbl_fold);
839 843
@@ -850,44 +854,9 @@ EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
850 854
851static int nvm_core_init(struct nvm_dev *dev) 855static int nvm_core_init(struct nvm_dev *dev)
852{ 856{
853 struct nvm_id *id = &dev->identity;
854 struct nvm_id_group *grp = &id->grp;
855 struct nvm_geo *geo = &dev->geo; 857 struct nvm_geo *geo = &dev->geo;
856 int ret; 858 int ret;
857 859
858 memcpy(&geo->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
859
860 if (grp->mtype != 0) {
861 pr_err("nvm: memory type not supported\n");
862 return -EINVAL;
863 }
864
865 /* Whole device values */
866 geo->nr_chnls = grp->num_ch;
867 geo->nr_luns = grp->num_lun;
868
869 /* Generic device geometry values */
870 geo->ws_min = grp->ws_min;
871 geo->ws_opt = grp->ws_opt;
872 geo->ws_seq = grp->ws_seq;
873 geo->ws_per_chk = grp->ws_per_chk;
874 geo->nr_chks = grp->num_chk;
875 geo->sec_size = grp->csecs;
876 geo->oob_size = grp->sos;
877 geo->mccap = grp->mccap;
878 geo->max_rq_size = dev->ops->max_phys_sect * geo->sec_size;
879
880 geo->sec_per_chk = grp->clba;
881 geo->sec_per_lun = geo->sec_per_chk * geo->nr_chks;
882 geo->all_luns = geo->nr_luns * geo->nr_chnls;
883
884 /* 1.2 spec device geometry values */
885 geo->plane_mode = 1 << geo->ws_seq;
886 geo->nr_planes = geo->ws_opt / geo->ws_min;
887 geo->sec_per_pg = geo->ws_min;
888 geo->sec_per_pl = geo->sec_per_pg * geo->nr_planes;
889
890 dev->total_secs = geo->all_luns * geo->sec_per_lun;
891 dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns), 860 dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns),
892 sizeof(unsigned long), GFP_KERNEL); 861 sizeof(unsigned long), GFP_KERNEL);
893 if (!dev->lun_map) 862 if (!dev->lun_map)
@@ -902,7 +871,6 @@ static int nvm_core_init(struct nvm_dev *dev)
902 if (ret) 871 if (ret)
903 goto err_fmtype; 872 goto err_fmtype;
904 873
905 blk_queue_logical_block_size(dev->q, geo->sec_size);
906 return 0; 874 return 0;
907err_fmtype: 875err_fmtype:
908 kfree(dev->lun_map); 876 kfree(dev->lun_map);
@@ -927,18 +895,14 @@ static int nvm_init(struct nvm_dev *dev)
927 struct nvm_geo *geo = &dev->geo; 895 struct nvm_geo *geo = &dev->geo;
928 int ret = -EINVAL; 896 int ret = -EINVAL;
929 897
930 if (dev->ops->identity(dev, &dev->identity)) { 898 if (dev->ops->identity(dev)) {
931 pr_err("nvm: device could not be identified\n"); 899 pr_err("nvm: device could not be identified\n");
932 goto err; 900 goto err;
933 } 901 }
934 902
935 pr_debug("nvm: ver:%x nvm_vendor:%x\n", 903 pr_debug("nvm: ver:%u.%u nvm_vendor:%x\n",
936 dev->identity.ver_id, dev->identity.vmnt); 904 geo->major_ver_id, geo->minor_ver_id,
937 905 geo->vmnt);
938 if (dev->identity.ver_id != 1) {
939 pr_err("nvm: device not supported by kernel.");
940 goto err;
941 }
942 906
943 ret = nvm_core_init(dev); 907 ret = nvm_core_init(dev);
944 if (ret) { 908 if (ret) {
@@ -946,10 +910,10 @@ static int nvm_init(struct nvm_dev *dev)
946 goto err; 910 goto err;
947 } 911 }
948 912
949 pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n", 913 pr_info("nvm: registered %s [%u/%u/%u/%u/%u]\n",
950 dev->name, geo->sec_per_pg, geo->nr_planes, 914 dev->name, dev->geo.ws_min, dev->geo.ws_opt,
951 geo->ws_per_chk, geo->nr_chks, 915 dev->geo.num_chk, dev->geo.all_luns,
952 geo->all_luns, geo->nr_chnls); 916 dev->geo.num_ch);
953 return 0; 917 return 0;
954err: 918err:
955 pr_err("nvm: failed to initialize nvm\n"); 919 pr_err("nvm: failed to initialize nvm\n");
@@ -969,17 +933,10 @@ int nvm_register(struct nvm_dev *dev)
969 if (!dev->q || !dev->ops) 933 if (!dev->q || !dev->ops)
970 return -EINVAL; 934 return -EINVAL;
971 935
972 if (dev->ops->max_phys_sect > 256) { 936 dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
973 pr_info("nvm: max sectors supported is 256.\n"); 937 if (!dev->dma_pool) {
974 return -EINVAL; 938 pr_err("nvm: could not create dma pool\n");
975 } 939 return -ENOMEM;
976
977 if (dev->ops->max_phys_sect > 1) {
978 dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
979 if (!dev->dma_pool) {
980 pr_err("nvm: could not create dma pool\n");
981 return -ENOMEM;
982 }
983 } 940 }
984 941
985 ret = nvm_init(dev); 942 ret = nvm_init(dev);
@@ -1040,9 +997,6 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
1040 struct nvm_tgt_type *tt; 997 struct nvm_tgt_type *tt;
1041 int tgt_iter = 0; 998 int tgt_iter = 0;
1042 999
1043 if (!capable(CAP_SYS_ADMIN))
1044 return -EPERM;
1045
1046 info = memdup_user(arg, sizeof(struct nvm_ioctl_info)); 1000 info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
1047 if (IS_ERR(info)) 1001 if (IS_ERR(info))
1048 return -EFAULT; 1002 return -EFAULT;
@@ -1081,9 +1035,6 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
1081 struct nvm_dev *dev; 1035 struct nvm_dev *dev;
1082 int i = 0; 1036 int i = 0;
1083 1037
1084 if (!capable(CAP_SYS_ADMIN))
1085 return -EPERM;
1086
1087 devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL); 1038 devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
1088 if (!devices) 1039 if (!devices)
1089 return -ENOMEM; 1040 return -ENOMEM;
@@ -1124,9 +1075,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
1124{ 1075{
1125 struct nvm_ioctl_create create; 1076 struct nvm_ioctl_create create;
1126 1077
1127 if (!capable(CAP_SYS_ADMIN))
1128 return -EPERM;
1129
1130 if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create))) 1078 if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
1131 return -EFAULT; 1079 return -EFAULT;
1132 1080
@@ -1162,9 +1110,6 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
1162 struct nvm_dev *dev; 1110 struct nvm_dev *dev;
1163 int ret = 0; 1111 int ret = 0;
1164 1112
1165 if (!capable(CAP_SYS_ADMIN))
1166 return -EPERM;
1167
1168 if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) 1113 if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
1169 return -EFAULT; 1114 return -EFAULT;
1170 1115
@@ -1189,9 +1134,6 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
1189{ 1134{
1190 struct nvm_ioctl_dev_init init; 1135 struct nvm_ioctl_dev_init init;
1191 1136
1192 if (!capable(CAP_SYS_ADMIN))
1193 return -EPERM;
1194
1195 if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init))) 1137 if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
1196 return -EFAULT; 1138 return -EFAULT;
1197 1139
@@ -1208,9 +1150,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
1208{ 1150{
1209 struct nvm_ioctl_dev_factory fact; 1151 struct nvm_ioctl_dev_factory fact;
1210 1152
1211 if (!capable(CAP_SYS_ADMIN))
1212 return -EPERM;
1213
1214 if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory))) 1153 if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
1215 return -EFAULT; 1154 return -EFAULT;
1216 1155
@@ -1226,6 +1165,9 @@ static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
1226{ 1165{
1227 void __user *argp = (void __user *)arg; 1166 void __user *argp = (void __user *)arg;
1228 1167
1168 if (!capable(CAP_SYS_ADMIN))
1169 return -EPERM;
1170
1229 switch (cmd) { 1171 switch (cmd) {
1230 case NVM_INFO: 1172 case NVM_INFO:
1231 return nvm_ioctl_info(file, argp); 1173 return nvm_ioctl_info(file, argp);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 000fcad38136..29a23111b31c 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -63,6 +63,8 @@ retry:
63 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); 63 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
64 } 64 }
65 65
66 atomic64_add(nr_entries, &pblk->user_wa);
67
66#ifdef CONFIG_NVM_DEBUG 68#ifdef CONFIG_NVM_DEBUG
67 atomic_long_add(nr_entries, &pblk->inflight_writes); 69 atomic_long_add(nr_entries, &pblk->inflight_writes);
68 atomic_long_add(nr_entries, &pblk->req_writes); 70 atomic_long_add(nr_entries, &pblk->req_writes);
@@ -117,6 +119,8 @@ retry:
117 WARN_ONCE(gc_rq->secs_to_gc != valid_entries, 119 WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
118 "pblk: inconsistent GC write\n"); 120 "pblk: inconsistent GC write\n");
119 121
122 atomic64_add(valid_entries, &pblk->gc_wa);
123
120#ifdef CONFIG_NVM_DEBUG 124#ifdef CONFIG_NVM_DEBUG
121 atomic_long_add(valid_entries, &pblk->inflight_writes); 125 atomic_long_add(valid_entries, &pblk->inflight_writes);
122 atomic_long_add(valid_entries, &pblk->recov_gc_writes); 126 atomic_long_add(valid_entries, &pblk->recov_gc_writes);
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 0487b9340c1d..94d5d97c9d8a 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -44,11 +44,12 @@ static void pblk_line_mark_bb(struct work_struct *work)
44} 44}
45 45
46static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, 46static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
47 struct ppa_addr *ppa) 47 struct ppa_addr ppa_addr)
48{ 48{
49 struct nvm_tgt_dev *dev = pblk->dev; 49 struct nvm_tgt_dev *dev = pblk->dev;
50 struct nvm_geo *geo = &dev->geo; 50 struct nvm_geo *geo = &dev->geo;
51 int pos = pblk_ppa_to_pos(geo, *ppa); 51 struct ppa_addr *ppa;
52 int pos = pblk_ppa_to_pos(geo, ppa_addr);
52 53
53 pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos); 54 pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
54 atomic_long_inc(&pblk->erase_failed); 55 atomic_long_inc(&pblk->erase_failed);
@@ -58,26 +59,38 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
58 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", 59 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
59 line->id, pos); 60 line->id, pos);
60 61
62 /* Not necessary to mark bad blocks on 2.0 spec. */
63 if (geo->version == NVM_OCSSD_SPEC_20)
64 return;
65
66 ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
67 if (!ppa)
68 return;
69
70 *ppa = ppa_addr;
61 pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, 71 pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb,
62 GFP_ATOMIC, pblk->bb_wq); 72 GFP_ATOMIC, pblk->bb_wq);
63} 73}
64 74
65static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) 75static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
66{ 76{
77 struct nvm_tgt_dev *dev = pblk->dev;
78 struct nvm_geo *geo = &dev->geo;
79 struct nvm_chk_meta *chunk;
67 struct pblk_line *line; 80 struct pblk_line *line;
81 int pos;
68 82
69 line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)]; 83 line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
84 pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
85 chunk = &line->chks[pos];
86
70 atomic_dec(&line->left_seblks); 87 atomic_dec(&line->left_seblks);
71 88
72 if (rqd->error) { 89 if (rqd->error) {
73 struct ppa_addr *ppa; 90 chunk->state = NVM_CHK_ST_OFFLINE;
74 91 pblk_mark_bb(pblk, line, rqd->ppa_addr);
75 ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC); 92 } else {
76 if (!ppa) 93 chunk->state = NVM_CHK_ST_FREE;
77 return;
78
79 *ppa = rqd->ppa_addr;
80 pblk_mark_bb(pblk, line, ppa);
81 } 94 }
82 95
83 atomic_dec(&pblk->inflight_io); 96 atomic_dec(&pblk->inflight_io);
@@ -92,6 +105,49 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
92 mempool_free(rqd, pblk->e_rq_pool); 105 mempool_free(rqd, pblk->e_rq_pool);
93} 106}
94 107
108/*
109 * Get information for all chunks from the device.
110 *
111 * The caller is responsible for freeing the returned structure
112 */
113struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk)
114{
115 struct nvm_tgt_dev *dev = pblk->dev;
116 struct nvm_geo *geo = &dev->geo;
117 struct nvm_chk_meta *meta;
118 struct ppa_addr ppa;
119 unsigned long len;
120 int ret;
121
122 ppa.ppa = 0;
123
124 len = geo->all_chunks * sizeof(*meta);
125 meta = kzalloc(len, GFP_KERNEL);
126 if (!meta)
127 return ERR_PTR(-ENOMEM);
128
129 ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks);
130 if (ret) {
131 kfree(meta);
132 return ERR_PTR(-EIO);
133 }
134
135 return meta;
136}
137
138struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
139 struct nvm_chk_meta *meta,
140 struct ppa_addr ppa)
141{
142 struct nvm_tgt_dev *dev = pblk->dev;
143 struct nvm_geo *geo = &dev->geo;
144 int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun;
145 int lun_off = ppa.m.pu * geo->num_chk;
146 int chk_off = ppa.m.chk;
147
148 return meta + ch_off + lun_off + chk_off;
149}
150
95void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, 151void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
96 u64 paddr) 152 u64 paddr)
97{ 153{
@@ -613,7 +669,7 @@ next_rq:
613 memset(&rqd, 0, sizeof(struct nvm_rq)); 669 memset(&rqd, 0, sizeof(struct nvm_rq));
614 670
615 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 671 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
616 rq_len = rq_ppas * geo->sec_size; 672 rq_len = rq_ppas * geo->csecs;
617 673
618 bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, 674 bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
619 l_mg->emeta_alloc_type, GFP_KERNEL); 675 l_mg->emeta_alloc_type, GFP_KERNEL);
@@ -722,7 +778,7 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
722 if (bit >= lm->blk_per_line) 778 if (bit >= lm->blk_per_line)
723 return -1; 779 return -1;
724 780
725 return bit * geo->sec_per_pl; 781 return bit * geo->ws_opt;
726} 782}
727 783
728static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, 784static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
@@ -885,7 +941,7 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
885 } 941 }
886 942
887 ppa = pblk->luns[bit].bppa; /* set ch and lun */ 943 ppa = pblk->luns[bit].bppa; /* set ch and lun */
888 ppa.g.blk = line->id; 944 ppa.a.blk = line->id;
889 945
890 atomic_dec(&line->left_eblks); 946 atomic_dec(&line->left_eblks);
891 WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); 947 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
@@ -975,7 +1031,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
975 memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16); 1031 memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
976 smeta_buf->header.id = cpu_to_le32(line->id); 1032 smeta_buf->header.id = cpu_to_le32(line->id);
977 smeta_buf->header.type = cpu_to_le16(line->type); 1033 smeta_buf->header.type = cpu_to_le16(line->type);
978 smeta_buf->header.version = SMETA_VERSION; 1034 smeta_buf->header.version_major = SMETA_VERSION_MAJOR;
1035 smeta_buf->header.version_minor = SMETA_VERSION_MINOR;
979 1036
980 /* Start metadata */ 1037 /* Start metadata */
981 smeta_buf->seq_nr = cpu_to_le64(line->seq_nr); 1038 smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
@@ -998,6 +1055,12 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
998 /* End metadata */ 1055 /* End metadata */
999 memcpy(&emeta_buf->header, &smeta_buf->header, 1056 memcpy(&emeta_buf->header, &smeta_buf->header,
1000 sizeof(struct line_header)); 1057 sizeof(struct line_header));
1058
1059 emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
1060 emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
1061 emeta_buf->header.crc = cpu_to_le32(
1062 pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
1063
1001 emeta_buf->seq_nr = cpu_to_le64(line->seq_nr); 1064 emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
1002 emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line); 1065 emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
1003 emeta_buf->nr_valid_lbas = cpu_to_le64(0); 1066 emeta_buf->nr_valid_lbas = cpu_to_le64(0);
@@ -1018,28 +1081,26 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
1018 struct nvm_geo *geo = &dev->geo; 1081 struct nvm_geo *geo = &dev->geo;
1019 struct pblk_line_meta *lm = &pblk->lm; 1082 struct pblk_line_meta *lm = &pblk->lm;
1020 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1083 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1021 int nr_bb = 0;
1022 u64 off; 1084 u64 off;
1023 int bit = -1; 1085 int bit = -1;
1086 int emeta_secs;
1024 1087
1025 line->sec_in_line = lm->sec_per_line; 1088 line->sec_in_line = lm->sec_per_line;
1026 1089
1027 /* Capture bad block information on line mapping bitmaps */ 1090 /* Capture bad block information on line mapping bitmaps */
1028 while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line, 1091 while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
1029 bit + 1)) < lm->blk_per_line) { 1092 bit + 1)) < lm->blk_per_line) {
1030 off = bit * geo->sec_per_pl; 1093 off = bit * geo->ws_opt;
1031 bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off, 1094 bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
1032 lm->sec_per_line); 1095 lm->sec_per_line);
1033 bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux, 1096 bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
1034 lm->sec_per_line); 1097 lm->sec_per_line);
1035 line->sec_in_line -= geo->sec_per_chk; 1098 line->sec_in_line -= geo->clba;
1036 if (bit >= lm->emeta_bb)
1037 nr_bb++;
1038 } 1099 }
1039 1100
1040 /* Mark smeta metadata sectors as bad sectors */ 1101 /* Mark smeta metadata sectors as bad sectors */
1041 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); 1102 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1042 off = bit * geo->sec_per_pl; 1103 off = bit * geo->ws_opt;
1043 bitmap_set(line->map_bitmap, off, lm->smeta_sec); 1104 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
1044 line->sec_in_line -= lm->smeta_sec; 1105 line->sec_in_line -= lm->smeta_sec;
1045 line->smeta_ssec = off; 1106 line->smeta_ssec = off;
@@ -1055,18 +1116,18 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
1055 /* Mark emeta metadata sectors as bad sectors. We need to consider bad 1116 /* Mark emeta metadata sectors as bad sectors. We need to consider bad
1056 * blocks to make sure that there are enough sectors to store emeta 1117 * blocks to make sure that there are enough sectors to store emeta
1057 */ 1118 */
1058 off = lm->sec_per_line - lm->emeta_sec[0]; 1119 emeta_secs = lm->emeta_sec[0];
1059 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]); 1120 off = lm->sec_per_line;
1060 while (nr_bb) { 1121 while (emeta_secs) {
1061 off -= geo->sec_per_pl; 1122 off -= geo->ws_opt;
1062 if (!test_bit(off, line->invalid_bitmap)) { 1123 if (!test_bit(off, line->invalid_bitmap)) {
1063 bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl); 1124 bitmap_set(line->invalid_bitmap, off, geo->ws_opt);
1064 nr_bb--; 1125 emeta_secs -= geo->ws_opt;
1065 } 1126 }
1066 } 1127 }
1067 1128
1068 line->sec_in_line -= lm->emeta_sec[0];
1069 line->emeta_ssec = off; 1129 line->emeta_ssec = off;
1130 line->sec_in_line -= lm->emeta_sec[0];
1070 line->nr_valid_lbas = 0; 1131 line->nr_valid_lbas = 0;
1071 line->left_msecs = line->sec_in_line; 1132 line->left_msecs = line->sec_in_line;
1072 *line->vsc = cpu_to_le32(line->sec_in_line); 1133 *line->vsc = cpu_to_le32(line->sec_in_line);
@@ -1086,10 +1147,34 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
1086 return 1; 1147 return 1;
1087} 1148}
1088 1149
1150static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
1151{
1152 struct pblk_line_meta *lm = &pblk->lm;
1153 struct nvm_tgt_dev *dev = pblk->dev;
1154 struct nvm_geo *geo = &dev->geo;
1155 int blk_to_erase = atomic_read(&line->blk_in_line);
1156 int i;
1157
1158 for (i = 0; i < lm->blk_per_line; i++) {
1159 struct pblk_lun *rlun = &pblk->luns[i];
1160 int pos = pblk_ppa_to_pos(geo, rlun->bppa);
1161 int state = line->chks[pos].state;
1162
1163 /* Free chunks should not be erased */
1164 if (state & NVM_CHK_ST_FREE) {
1165 set_bit(pblk_ppa_to_pos(geo, rlun->bppa),
1166 line->erase_bitmap);
1167 blk_to_erase--;
1168 }
1169 }
1170
1171 return blk_to_erase;
1172}
1173
1089static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) 1174static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1090{ 1175{
1091 struct pblk_line_meta *lm = &pblk->lm; 1176 struct pblk_line_meta *lm = &pblk->lm;
1092 int blk_in_line = atomic_read(&line->blk_in_line); 1177 int blk_to_erase;
1093 1178
1094 line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC); 1179 line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
1095 if (!line->map_bitmap) 1180 if (!line->map_bitmap)
@@ -1102,7 +1187,21 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1102 return -ENOMEM; 1187 return -ENOMEM;
1103 } 1188 }
1104 1189
1190 /* Bad blocks do not need to be erased */
1191 bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
1192
1105 spin_lock(&line->lock); 1193 spin_lock(&line->lock);
1194
1195 /* If we have not written to this line, we need to mark up free chunks
1196 * as already erased
1197 */
1198 if (line->state == PBLK_LINESTATE_NEW) {
1199 blk_to_erase = pblk_prepare_new_line(pblk, line);
1200 line->state = PBLK_LINESTATE_FREE;
1201 } else {
1202 blk_to_erase = atomic_read(&line->blk_in_line);
1203 }
1204
1106 if (line->state != PBLK_LINESTATE_FREE) { 1205 if (line->state != PBLK_LINESTATE_FREE) {
1107 kfree(line->map_bitmap); 1206 kfree(line->map_bitmap);
1108 kfree(line->invalid_bitmap); 1207 kfree(line->invalid_bitmap);
@@ -1114,15 +1213,12 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1114 1213
1115 line->state = PBLK_LINESTATE_OPEN; 1214 line->state = PBLK_LINESTATE_OPEN;
1116 1215
1117 atomic_set(&line->left_eblks, blk_in_line); 1216 atomic_set(&line->left_eblks, blk_to_erase);
1118 atomic_set(&line->left_seblks, blk_in_line); 1217 atomic_set(&line->left_seblks, blk_to_erase);
1119 1218
1120 line->meta_distance = lm->meta_distance; 1219 line->meta_distance = lm->meta_distance;
1121 spin_unlock(&line->lock); 1220 spin_unlock(&line->lock);
1122 1221
1123 /* Bad blocks do not need to be erased */
1124 bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
1125
1126 kref_init(&line->ref); 1222 kref_init(&line->ref);
1127 1223
1128 return 0; 1224 return 0;
@@ -1399,13 +1495,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
1399 l_mg->data_line = new; 1495 l_mg->data_line = new;
1400 1496
1401 spin_lock(&l_mg->free_lock); 1497 spin_lock(&l_mg->free_lock);
1402 if (pblk->state != PBLK_STATE_RUNNING) {
1403 l_mg->data_line = NULL;
1404 l_mg->data_next = NULL;
1405 spin_unlock(&l_mg->free_lock);
1406 goto out;
1407 }
1408
1409 pblk_line_setup_metadata(new, l_mg, &pblk->lm); 1498 pblk_line_setup_metadata(new, l_mg, &pblk->lm);
1410 spin_unlock(&l_mg->free_lock); 1499 spin_unlock(&l_mg->free_lock);
1411 1500
@@ -1585,12 +1674,14 @@ static void pblk_line_should_sync_meta(struct pblk *pblk)
1585 1674
1586void pblk_line_close(struct pblk *pblk, struct pblk_line *line) 1675void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1587{ 1676{
1677 struct nvm_tgt_dev *dev = pblk->dev;
1678 struct nvm_geo *geo = &dev->geo;
1679 struct pblk_line_meta *lm = &pblk->lm;
1588 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1680 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1589 struct list_head *move_list; 1681 struct list_head *move_list;
1682 int i;
1590 1683
1591#ifdef CONFIG_NVM_DEBUG 1684#ifdef CONFIG_NVM_DEBUG
1592 struct pblk_line_meta *lm = &pblk->lm;
1593
1594 WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line), 1685 WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
1595 "pblk: corrupt closed line %d\n", line->id); 1686 "pblk: corrupt closed line %d\n", line->id);
1596#endif 1687#endif
@@ -1612,6 +1703,15 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1612 line->smeta = NULL; 1703 line->smeta = NULL;
1613 line->emeta = NULL; 1704 line->emeta = NULL;
1614 1705
1706 for (i = 0; i < lm->blk_per_line; i++) {
1707 struct pblk_lun *rlun = &pblk->luns[i];
1708 int pos = pblk_ppa_to_pos(geo, rlun->bppa);
1709 int state = line->chks[pos].state;
1710
1711 if (!(state & NVM_CHK_ST_OFFLINE))
1712 state = NVM_CHK_ST_CLOSED;
1713 }
1714
1615 spin_unlock(&line->lock); 1715 spin_unlock(&line->lock);
1616 spin_unlock(&l_mg->gc_lock); 1716 spin_unlock(&l_mg->gc_lock);
1617} 1717}
@@ -1622,11 +1722,16 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
1622 struct pblk_line_meta *lm = &pblk->lm; 1722 struct pblk_line_meta *lm = &pblk->lm;
1623 struct pblk_emeta *emeta = line->emeta; 1723 struct pblk_emeta *emeta = line->emeta;
1624 struct line_emeta *emeta_buf = emeta->buf; 1724 struct line_emeta *emeta_buf = emeta->buf;
1725 struct wa_counters *wa = emeta_to_wa(lm, emeta_buf);
1625 1726
1626 /* No need for exact vsc value; avoid a big line lock and take aprox. */ 1727 /* No need for exact vsc value; avoid a big line lock and take aprox. */
1627 memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len); 1728 memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
1628 memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len); 1729 memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
1629 1730
1731 wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa));
1732 wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
1733 wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
1734
1630 emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas); 1735 emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
1631 emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf)); 1736 emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
1632 1737
@@ -1680,8 +1785,8 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
1680 int i; 1785 int i;
1681 1786
1682 for (i = 1; i < nr_ppas; i++) 1787 for (i = 1; i < nr_ppas; i++)
1683 WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun || 1788 WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
1684 ppa_list[0].g.ch != ppa_list[i].g.ch); 1789 ppa_list[0].a.ch != ppa_list[i].a.ch);
1685#endif 1790#endif
1686 1791
1687 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); 1792 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
@@ -1725,8 +1830,8 @@ void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
1725 int i; 1830 int i;
1726 1831
1727 for (i = 1; i < nr_ppas; i++) 1832 for (i = 1; i < nr_ppas; i++)
1728 WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun || 1833 WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
1729 ppa_list[0].g.ch != ppa_list[i].g.ch); 1834 ppa_list[0].a.ch != ppa_list[i].a.ch);
1730#endif 1835#endif
1731 1836
1732 rlun = &pblk->luns[pos]; 1837 rlun = &pblk->luns[pos];
@@ -1739,10 +1844,10 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1739 struct nvm_tgt_dev *dev = pblk->dev; 1844 struct nvm_tgt_dev *dev = pblk->dev;
1740 struct nvm_geo *geo = &dev->geo; 1845 struct nvm_geo *geo = &dev->geo;
1741 struct pblk_lun *rlun; 1846 struct pblk_lun *rlun;
1742 int nr_luns = geo->all_luns; 1847 int num_lun = geo->all_luns;
1743 int bit = -1; 1848 int bit = -1;
1744 1849
1745 while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) { 1850 while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) {
1746 rlun = &pblk->luns[bit]; 1851 rlun = &pblk->luns[bit];
1747 up(&rlun->wr_sem); 1852 up(&rlun->wr_sem);
1748 } 1853 }
@@ -1829,6 +1934,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
1829#endif 1934#endif
1830 /* Invalidate and discard padded entries */ 1935 /* Invalidate and discard padded entries */
1831 if (lba == ADDR_EMPTY) { 1936 if (lba == ADDR_EMPTY) {
1937 atomic64_inc(&pblk->pad_wa);
1832#ifdef CONFIG_NVM_DEBUG 1938#ifdef CONFIG_NVM_DEBUG
1833 atomic_long_inc(&pblk->padded_wb); 1939 atomic_long_inc(&pblk->padded_wb);
1834#endif 1940#endif
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index 3d899383666e..6851a5c67189 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -88,7 +88,7 @@ static void pblk_gc_line_ws(struct work_struct *work)
88 88
89 up(&gc->gc_sem); 89 up(&gc->gc_sem);
90 90
91 gc_rq->data = vmalloc(gc_rq->nr_secs * geo->sec_size); 91 gc_rq->data = vmalloc(gc_rq->nr_secs * geo->csecs);
92 if (!gc_rq->data) { 92 if (!gc_rq->data) {
93 pr_err("pblk: could not GC line:%d (%d/%d)\n", 93 pr_err("pblk: could not GC line:%d (%d/%d)\n",
94 line->id, *line->vsc, gc_rq->nr_secs); 94 line->id, *line->vsc, gc_rq->nr_secs);
@@ -147,10 +147,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
147 int ret; 147 int ret;
148 148
149 invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); 149 invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
150 if (!invalid_bitmap) { 150 if (!invalid_bitmap)
151 pr_err("pblk: could not allocate GC invalid bitmap\n");
152 goto fail_free_ws; 151 goto fail_free_ws;
153 }
154 152
155 emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, 153 emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
156 GFP_KERNEL); 154 GFP_KERNEL);
@@ -666,12 +664,10 @@ void pblk_gc_exit(struct pblk *pblk)
666 kthread_stop(gc->gc_reader_ts); 664 kthread_stop(gc->gc_reader_ts);
667 665
668 flush_workqueue(gc->gc_reader_wq); 666 flush_workqueue(gc->gc_reader_wq);
669 if (gc->gc_reader_wq) 667 destroy_workqueue(gc->gc_reader_wq);
670 destroy_workqueue(gc->gc_reader_wq);
671 668
672 flush_workqueue(gc->gc_line_reader_wq); 669 flush_workqueue(gc->gc_line_reader_wq);
673 if (gc->gc_line_reader_wq) 670 destroy_workqueue(gc->gc_line_reader_wq);
674 destroy_workqueue(gc->gc_line_reader_wq);
675 671
676 if (gc->gc_writer_ts) 672 if (gc->gc_writer_ts)
677 kthread_stop(gc->gc_writer_ts); 673 kthread_stop(gc->gc_writer_ts);
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 93d671ca518e..91a5bc2556a3 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -80,7 +80,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk)
80{ 80{
81 int entry_size = 8; 81 int entry_size = 8;
82 82
83 if (pblk->ppaf_bitsize < 32) 83 if (pblk->addrf_len < 32)
84 entry_size = 4; 84 entry_size = 4;
85 85
86 return entry_size * pblk->rl.nr_secs; 86 return entry_size * pblk->rl.nr_secs;
@@ -103,7 +103,40 @@ static void pblk_l2p_free(struct pblk *pblk)
103 vfree(pblk->trans_map); 103 vfree(pblk->trans_map);
104} 104}
105 105
106static int pblk_l2p_init(struct pblk *pblk) 106static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
107{
108 struct pblk_line *line = NULL;
109
110 if (factory_init) {
111 pblk_setup_uuid(pblk);
112 } else {
113 line = pblk_recov_l2p(pblk);
114 if (IS_ERR(line)) {
115 pr_err("pblk: could not recover l2p table\n");
116 return -EFAULT;
117 }
118 }
119
120#ifdef CONFIG_NVM_DEBUG
121 pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
122#endif
123
124 /* Free full lines directly as GC has not been started yet */
125 pblk_gc_free_full_lines(pblk);
126
127 if (!line) {
128 /* Configure next line for user data */
129 line = pblk_line_get_first_data(pblk);
130 if (!line) {
131 pr_err("pblk: line list corrupted\n");
132 return -EFAULT;
133 }
134 }
135
136 return 0;
137}
138
139static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
107{ 140{
108 sector_t i; 141 sector_t i;
109 struct ppa_addr ppa; 142 struct ppa_addr ppa;
@@ -119,7 +152,7 @@ static int pblk_l2p_init(struct pblk *pblk)
119 for (i = 0; i < pblk->rl.nr_secs; i++) 152 for (i = 0; i < pblk->rl.nr_secs; i++)
120 pblk_trans_map_set(pblk, i, ppa); 153 pblk_trans_map_set(pblk, i, ppa);
121 154
122 return 0; 155 return pblk_l2p_recover(pblk, factory_init);
123} 156}
124 157
125static void pblk_rwb_free(struct pblk *pblk) 158static void pblk_rwb_free(struct pblk *pblk)
@@ -146,7 +179,7 @@ static int pblk_rwb_init(struct pblk *pblk)
146 return -ENOMEM; 179 return -ENOMEM;
147 180
148 power_size = get_count_order(nr_entries); 181 power_size = get_count_order(nr_entries);
149 power_seg_sz = get_count_order(geo->sec_size); 182 power_seg_sz = get_count_order(geo->csecs);
150 183
151 return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); 184 return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
152} 185}
@@ -154,47 +187,103 @@ static int pblk_rwb_init(struct pblk *pblk)
154/* Minimum pages needed within a lun */ 187/* Minimum pages needed within a lun */
155#define ADDR_POOL_SIZE 64 188#define ADDR_POOL_SIZE 64
156 189
157static int pblk_set_ppaf(struct pblk *pblk) 190static int pblk_set_addrf_12(struct nvm_geo *geo, struct nvm_addrf_12 *dst)
158{ 191{
159 struct nvm_tgt_dev *dev = pblk->dev; 192 struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf;
160 struct nvm_geo *geo = &dev->geo;
161 struct nvm_addr_format ppaf = geo->ppaf;
162 int power_len; 193 int power_len;
163 194
164 /* Re-calculate channel and lun format to adapt to configuration */ 195 /* Re-calculate channel and lun format to adapt to configuration */
165 power_len = get_count_order(geo->nr_chnls); 196 power_len = get_count_order(geo->num_ch);
166 if (1 << power_len != geo->nr_chnls) { 197 if (1 << power_len != geo->num_ch) {
167 pr_err("pblk: supports only power-of-two channel config.\n"); 198 pr_err("pblk: supports only power-of-two channel config.\n");
168 return -EINVAL; 199 return -EINVAL;
169 } 200 }
170 ppaf.ch_len = power_len; 201 dst->ch_len = power_len;
171 202
172 power_len = get_count_order(geo->nr_luns); 203 power_len = get_count_order(geo->num_lun);
173 if (1 << power_len != geo->nr_luns) { 204 if (1 << power_len != geo->num_lun) {
174 pr_err("pblk: supports only power-of-two LUN config.\n"); 205 pr_err("pblk: supports only power-of-two LUN config.\n");
175 return -EINVAL; 206 return -EINVAL;
176 } 207 }
177 ppaf.lun_len = power_len; 208 dst->lun_len = power_len;
178 209
179 pblk->ppaf.sec_offset = 0; 210 dst->blk_len = src->blk_len;
180 pblk->ppaf.pln_offset = ppaf.sect_len; 211 dst->pg_len = src->pg_len;
181 pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len; 212 dst->pln_len = src->pln_len;
182 pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len; 213 dst->sec_len = src->sec_len;
183 pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len; 214
184 pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len; 215 dst->sec_offset = 0;
185 pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1; 216 dst->pln_offset = dst->sec_len;
186 pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) << 217 dst->ch_offset = dst->pln_offset + dst->pln_len;
187 pblk->ppaf.pln_offset; 218 dst->lun_offset = dst->ch_offset + dst->ch_len;
188 pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) << 219 dst->pg_offset = dst->lun_offset + dst->lun_len;
189 pblk->ppaf.ch_offset; 220 dst->blk_offset = dst->pg_offset + dst->pg_len;
190 pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) << 221
191 pblk->ppaf.lun_offset; 222 dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
192 pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) << 223 dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
193 pblk->ppaf.pg_offset; 224 dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
194 pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) << 225 dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
195 pblk->ppaf.blk_offset; 226 dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
196 227 dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
197 pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len; 228
229 return dst->blk_offset + src->blk_len;
230}
231
232static int pblk_set_addrf_20(struct nvm_geo *geo, struct nvm_addrf *adst,
233 struct pblk_addrf *udst)
234{
235 struct nvm_addrf *src = &geo->addrf;
236
237 adst->ch_len = get_count_order(geo->num_ch);
238 adst->lun_len = get_count_order(geo->num_lun);
239 adst->chk_len = src->chk_len;
240 adst->sec_len = src->sec_len;
241
242 adst->sec_offset = 0;
243 adst->ch_offset = adst->sec_len;
244 adst->lun_offset = adst->ch_offset + adst->ch_len;
245 adst->chk_offset = adst->lun_offset + adst->lun_len;
246
247 adst->sec_mask = ((1ULL << adst->sec_len) - 1) << adst->sec_offset;
248 adst->chk_mask = ((1ULL << adst->chk_len) - 1) << adst->chk_offset;
249 adst->lun_mask = ((1ULL << adst->lun_len) - 1) << adst->lun_offset;
250 adst->ch_mask = ((1ULL << adst->ch_len) - 1) << adst->ch_offset;
251
252 udst->sec_stripe = geo->ws_opt;
253 udst->ch_stripe = geo->num_ch;
254 udst->lun_stripe = geo->num_lun;
255
256 udst->sec_lun_stripe = udst->sec_stripe * udst->ch_stripe;
257 udst->sec_ws_stripe = udst->sec_lun_stripe * udst->lun_stripe;
258
259 return adst->chk_offset + adst->chk_len;
260}
261
262static int pblk_set_addrf(struct pblk *pblk)
263{
264 struct nvm_tgt_dev *dev = pblk->dev;
265 struct nvm_geo *geo = &dev->geo;
266 int mod;
267
268 switch (geo->version) {
269 case NVM_OCSSD_SPEC_12:
270 div_u64_rem(geo->clba, pblk->min_write_pgs, &mod);
271 if (mod) {
272 pr_err("pblk: bad configuration of sectors/pages\n");
273 return -EINVAL;
274 }
275
276 pblk->addrf_len = pblk_set_addrf_12(geo, (void *)&pblk->addrf);
277 break;
278 case NVM_OCSSD_SPEC_20:
279 pblk->addrf_len = pblk_set_addrf_20(geo, (void *)&pblk->addrf,
280 &pblk->uaddrf);
281 break;
282 default:
283 pr_err("pblk: OCSSD revision not supported (%d)\n",
284 geo->version);
285 return -EINVAL;
286 }
198 287
199 return 0; 288 return 0;
200} 289}
@@ -252,16 +341,41 @@ static int pblk_core_init(struct pblk *pblk)
252{ 341{
253 struct nvm_tgt_dev *dev = pblk->dev; 342 struct nvm_tgt_dev *dev = pblk->dev;
254 struct nvm_geo *geo = &dev->geo; 343 struct nvm_geo *geo = &dev->geo;
344 int max_write_ppas;
255 345
256 pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * 346 atomic64_set(&pblk->user_wa, 0);
257 geo->nr_planes * geo->all_luns; 347 atomic64_set(&pblk->pad_wa, 0);
348 atomic64_set(&pblk->gc_wa, 0);
349 pblk->user_rst_wa = 0;
350 pblk->pad_rst_wa = 0;
351 pblk->gc_rst_wa = 0;
258 352
259 if (pblk_init_global_caches(pblk)) 353 atomic64_set(&pblk->nr_flush, 0);
354 pblk->nr_flush_rst = 0;
355
356 pblk->pgs_in_buffer = geo->mw_cunits * geo->all_luns;
357
358 pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE);
359 max_write_ppas = pblk->min_write_pgs * geo->all_luns;
360 pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
361 pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
362
363 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
364 pr_err("pblk: vector list too big(%u > %u)\n",
365 pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS);
366 return -EINVAL;
367 }
368
369 pblk->pad_dist = kzalloc((pblk->min_write_pgs - 1) * sizeof(atomic64_t),
370 GFP_KERNEL);
371 if (!pblk->pad_dist)
260 return -ENOMEM; 372 return -ENOMEM;
261 373
374 if (pblk_init_global_caches(pblk))
375 goto fail_free_pad_dist;
376
262 /* Internal bios can be at most the sectors signaled by the device. */ 377 /* Internal bios can be at most the sectors signaled by the device. */
263 pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), 378 pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
264 0);
265 if (!pblk->page_bio_pool) 379 if (!pblk->page_bio_pool)
266 goto free_global_caches; 380 goto free_global_caches;
267 381
@@ -305,13 +419,11 @@ static int pblk_core_init(struct pblk *pblk)
305 if (!pblk->r_end_wq) 419 if (!pblk->r_end_wq)
306 goto free_bb_wq; 420 goto free_bb_wq;
307 421
308 if (pblk_set_ppaf(pblk)) 422 if (pblk_set_addrf(pblk))
309 goto free_r_end_wq;
310
311 if (pblk_rwb_init(pblk))
312 goto free_r_end_wq; 423 goto free_r_end_wq;
313 424
314 INIT_LIST_HEAD(&pblk->compl_list); 425 INIT_LIST_HEAD(&pblk->compl_list);
426
315 return 0; 427 return 0;
316 428
317free_r_end_wq: 429free_r_end_wq:
@@ -334,6 +446,8 @@ free_page_bio_pool:
334 mempool_destroy(pblk->page_bio_pool); 446 mempool_destroy(pblk->page_bio_pool);
335free_global_caches: 447free_global_caches:
336 pblk_free_global_caches(pblk); 448 pblk_free_global_caches(pblk);
449fail_free_pad_dist:
450 kfree(pblk->pad_dist);
337 return -ENOMEM; 451 return -ENOMEM;
338} 452}
339 453
@@ -355,20 +469,31 @@ static void pblk_core_free(struct pblk *pblk)
355 mempool_destroy(pblk->e_rq_pool); 469 mempool_destroy(pblk->e_rq_pool);
356 mempool_destroy(pblk->w_rq_pool); 470 mempool_destroy(pblk->w_rq_pool);
357 471
358 pblk_rwb_free(pblk);
359
360 pblk_free_global_caches(pblk); 472 pblk_free_global_caches(pblk);
473 kfree(pblk->pad_dist);
361} 474}
362 475
363static void pblk_luns_free(struct pblk *pblk) 476static void pblk_line_mg_free(struct pblk *pblk)
364{ 477{
365 kfree(pblk->luns); 478 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
479 int i;
480
481 kfree(l_mg->bb_template);
482 kfree(l_mg->bb_aux);
483 kfree(l_mg->vsc_list);
484
485 for (i = 0; i < PBLK_DATA_LINES; i++) {
486 kfree(l_mg->sline_meta[i]);
487 pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
488 kfree(l_mg->eline_meta[i]);
489 }
366} 490}
367 491
368static void pblk_free_line_bitmaps(struct pblk_line *line) 492static void pblk_line_meta_free(struct pblk_line *line)
369{ 493{
370 kfree(line->blk_bitmap); 494 kfree(line->blk_bitmap);
371 kfree(line->erase_bitmap); 495 kfree(line->erase_bitmap);
496 kfree(line->chks);
372} 497}
373 498
374static void pblk_lines_free(struct pblk *pblk) 499static void pblk_lines_free(struct pblk *pblk)
@@ -382,40 +507,21 @@ static void pblk_lines_free(struct pblk *pblk)
382 line = &pblk->lines[i]; 507 line = &pblk->lines[i];
383 508
384 pblk_line_free(pblk, line); 509 pblk_line_free(pblk, line);
385 pblk_free_line_bitmaps(line); 510 pblk_line_meta_free(line);
386 } 511 }
387 spin_unlock(&l_mg->free_lock); 512 spin_unlock(&l_mg->free_lock);
388}
389
390static void pblk_line_meta_free(struct pblk *pblk)
391{
392 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
393 int i;
394
395 kfree(l_mg->bb_template);
396 kfree(l_mg->bb_aux);
397 kfree(l_mg->vsc_list);
398 513
399 for (i = 0; i < PBLK_DATA_LINES; i++) { 514 pblk_line_mg_free(pblk);
400 kfree(l_mg->sline_meta[i]);
401 pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
402 kfree(l_mg->eline_meta[i]);
403 }
404 515
516 kfree(pblk->luns);
405 kfree(pblk->lines); 517 kfree(pblk->lines);
406} 518}
407 519
408static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) 520static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun,
521 u8 *blks, int nr_blks)
409{ 522{
410 struct nvm_geo *geo = &dev->geo;
411 struct ppa_addr ppa; 523 struct ppa_addr ppa;
412 u8 *blks; 524 int ret;
413 int nr_blks, ret;
414
415 nr_blks = geo->nr_chks * geo->plane_mode;
416 blks = kmalloc(nr_blks, GFP_KERNEL);
417 if (!blks)
418 return -ENOMEM;
419 525
420 ppa.ppa = 0; 526 ppa.ppa = 0;
421 ppa.g.ch = rlun->bppa.g.ch; 527 ppa.g.ch = rlun->bppa.g.ch;
@@ -423,69 +529,64 @@ static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
423 529
424 ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); 530 ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
425 if (ret) 531 if (ret)
426 goto out; 532 return ret;
427 533
428 nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); 534 nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
429 if (nr_blks < 0) { 535 if (nr_blks < 0)
430 ret = nr_blks; 536 return -EIO;
431 goto out;
432 }
433
434 rlun->bb_list = blks;
435 537
436 return 0; 538 return 0;
437out:
438 kfree(blks);
439 return ret;
440} 539}
441 540
442static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line, 541static void *pblk_bb_get_meta(struct pblk *pblk)
443 int blk_per_line)
444{ 542{
445 struct nvm_tgt_dev *dev = pblk->dev; 543 struct nvm_tgt_dev *dev = pblk->dev;
446 struct nvm_geo *geo = &dev->geo; 544 struct nvm_geo *geo = &dev->geo;
447 struct pblk_lun *rlun; 545 u8 *meta;
448 int bb_cnt = 0; 546 int i, nr_blks, blk_per_lun;
449 int i; 547 int ret;
450 548
451 for (i = 0; i < blk_per_line; i++) { 549 blk_per_lun = geo->num_chk * geo->pln_mode;
452 rlun = &pblk->luns[i]; 550 nr_blks = blk_per_lun * geo->all_luns;
453 if (rlun->bb_list[line->id] == NVM_BLK_T_FREE) 551
454 continue; 552 meta = kmalloc(nr_blks, GFP_KERNEL);
553 if (!meta)
554 return ERR_PTR(-ENOMEM);
555
556 for (i = 0; i < geo->all_luns; i++) {
557 struct pblk_lun *rlun = &pblk->luns[i];
558 u8 *meta_pos = meta + i * blk_per_lun;
455 559
456 set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap); 560 ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun);
457 bb_cnt++; 561 if (ret) {
562 kfree(meta);
563 return ERR_PTR(-EIO);
564 }
458 } 565 }
459 566
460 return bb_cnt; 567 return meta;
461} 568}
462 569
463static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line) 570static void *pblk_chunk_get_meta(struct pblk *pblk)
464{ 571{
465 struct pblk_line_meta *lm = &pblk->lm; 572 struct nvm_tgt_dev *dev = pblk->dev;
466 573 struct nvm_geo *geo = &dev->geo;
467 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
468 if (!line->blk_bitmap)
469 return -ENOMEM;
470
471 line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
472 if (!line->erase_bitmap) {
473 kfree(line->blk_bitmap);
474 return -ENOMEM;
475 }
476 574
477 return 0; 575 if (geo->version == NVM_OCSSD_SPEC_12)
576 return pblk_bb_get_meta(pblk);
577 else
578 return pblk_chunk_get_info(pblk);
478} 579}
479 580
480static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) 581static int pblk_luns_init(struct pblk *pblk)
481{ 582{
482 struct nvm_tgt_dev *dev = pblk->dev; 583 struct nvm_tgt_dev *dev = pblk->dev;
483 struct nvm_geo *geo = &dev->geo; 584 struct nvm_geo *geo = &dev->geo;
484 struct pblk_lun *rlun; 585 struct pblk_lun *rlun;
485 int i, ret; 586 int i;
486 587
487 /* TODO: Implement unbalanced LUN support */ 588 /* TODO: Implement unbalanced LUN support */
488 if (geo->nr_luns < 0) { 589 if (geo->num_lun < 0) {
489 pr_err("pblk: unbalanced LUN config.\n"); 590 pr_err("pblk: unbalanced LUN config.\n");
490 return -EINVAL; 591 return -EINVAL;
491 } 592 }
@@ -497,58 +598,19 @@ static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
497 598
498 for (i = 0; i < geo->all_luns; i++) { 599 for (i = 0; i < geo->all_luns; i++) {
499 /* Stripe across channels */ 600 /* Stripe across channels */
500 int ch = i % geo->nr_chnls; 601 int ch = i % geo->num_ch;
501 int lun_raw = i / geo->nr_chnls; 602 int lun_raw = i / geo->num_ch;
502 int lunid = lun_raw + ch * geo->nr_luns; 603 int lunid = lun_raw + ch * geo->num_lun;
503 604
504 rlun = &pblk->luns[i]; 605 rlun = &pblk->luns[i];
505 rlun->bppa = luns[lunid]; 606 rlun->bppa = dev->luns[lunid];
506 607
507 sema_init(&rlun->wr_sem, 1); 608 sema_init(&rlun->wr_sem, 1);
508
509 ret = pblk_bb_discovery(dev, rlun);
510 if (ret) {
511 while (--i >= 0)
512 kfree(pblk->luns[i].bb_list);
513 return ret;
514 }
515 } 609 }
516 610
517 return 0; 611 return 0;
518} 612}
519 613
520static int pblk_lines_configure(struct pblk *pblk, int flags)
521{
522 struct pblk_line *line = NULL;
523 int ret = 0;
524
525 if (!(flags & NVM_TARGET_FACTORY)) {
526 line = pblk_recov_l2p(pblk);
527 if (IS_ERR(line)) {
528 pr_err("pblk: could not recover l2p table\n");
529 ret = -EFAULT;
530 }
531 }
532
533#ifdef CONFIG_NVM_DEBUG
534 pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
535#endif
536
537 /* Free full lines directly as GC has not been started yet */
538 pblk_gc_free_full_lines(pblk);
539
540 if (!line) {
541 /* Configure next line for user data */
542 line = pblk_line_get_first_data(pblk);
543 if (!line) {
544 pr_err("pblk: line list corrupted\n");
545 ret = -EFAULT;
546 }
547 }
548
549 return ret;
550}
551
552/* See comment over struct line_emeta definition */ 614/* See comment over struct line_emeta definition */
553static unsigned int calc_emeta_len(struct pblk *pblk) 615static unsigned int calc_emeta_len(struct pblk *pblk)
554{ 616{
@@ -559,19 +621,19 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
559 621
560 /* Round to sector size so that lba_list starts on its own sector */ 622 /* Round to sector size so that lba_list starts on its own sector */
561 lm->emeta_sec[1] = DIV_ROUND_UP( 623 lm->emeta_sec[1] = DIV_ROUND_UP(
562 sizeof(struct line_emeta) + lm->blk_bitmap_len, 624 sizeof(struct line_emeta) + lm->blk_bitmap_len +
563 geo->sec_size); 625 sizeof(struct wa_counters), geo->csecs);
564 lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size; 626 lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs;
565 627
566 /* Round to sector size so that vsc_list starts on its own sector */ 628 /* Round to sector size so that vsc_list starts on its own sector */
567 lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0]; 629 lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
568 lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64), 630 lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
569 geo->sec_size); 631 geo->csecs);
570 lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size; 632 lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs;
571 633
572 lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32), 634 lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
573 geo->sec_size); 635 geo->csecs);
574 lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size; 636 lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs;
575 637
576 lm->vsc_list_len = l_mg->nr_lines * sizeof(u32); 638 lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
577 639
@@ -602,23 +664,211 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
602 * on user capacity consider only provisioned blocks 664 * on user capacity consider only provisioned blocks
603 */ 665 */
604 pblk->rl.total_blocks = nr_free_blks; 666 pblk->rl.total_blocks = nr_free_blks;
605 pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk; 667 pblk->rl.nr_secs = nr_free_blks * geo->clba;
606 668
607 /* Consider sectors used for metadata */ 669 /* Consider sectors used for metadata */
608 sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; 670 sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
609 blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk); 671 blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
610 672
611 pblk->capacity = (provisioned - blk_meta) * geo->sec_per_chk; 673 pblk->capacity = (provisioned - blk_meta) * geo->clba;
612 674
613 atomic_set(&pblk->rl.free_blocks, nr_free_blks); 675 atomic_set(&pblk->rl.free_blocks, nr_free_blks);
614 atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); 676 atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
615} 677}
616 678
617static int pblk_lines_alloc_metadata(struct pblk *pblk) 679static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
680 void *chunk_meta)
681{
682 struct nvm_tgt_dev *dev = pblk->dev;
683 struct nvm_geo *geo = &dev->geo;
684 struct pblk_line_meta *lm = &pblk->lm;
685 int i, chk_per_lun, nr_bad_chks = 0;
686
687 chk_per_lun = geo->num_chk * geo->pln_mode;
688
689 for (i = 0; i < lm->blk_per_line; i++) {
690 struct pblk_lun *rlun = &pblk->luns[i];
691 struct nvm_chk_meta *chunk;
692 int pos = pblk_ppa_to_pos(geo, rlun->bppa);
693 u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
694
695 chunk = &line->chks[pos];
696
697 /*
698 * In 1.2 spec. chunk state is not persisted by the device. Thus
699 * some of the values are reset each time pblk is instantiated.
700 */
701 if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
702 chunk->state = NVM_CHK_ST_FREE;
703 else
704 chunk->state = NVM_CHK_ST_OFFLINE;
705
706 chunk->type = NVM_CHK_TP_W_SEQ;
707 chunk->wi = 0;
708 chunk->slba = -1;
709 chunk->cnlb = geo->clba;
710 chunk->wp = 0;
711
712 if (!(chunk->state & NVM_CHK_ST_OFFLINE))
713 continue;
714
715 set_bit(pos, line->blk_bitmap);
716 nr_bad_chks++;
717 }
718
719 return nr_bad_chks;
720}
721
722static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
723 struct nvm_chk_meta *meta)
724{
725 struct nvm_tgt_dev *dev = pblk->dev;
726 struct nvm_geo *geo = &dev->geo;
727 struct pblk_line_meta *lm = &pblk->lm;
728 int i, nr_bad_chks = 0;
729
730 for (i = 0; i < lm->blk_per_line; i++) {
731 struct pblk_lun *rlun = &pblk->luns[i];
732 struct nvm_chk_meta *chunk;
733 struct nvm_chk_meta *chunk_meta;
734 struct ppa_addr ppa;
735 int pos;
736
737 ppa = rlun->bppa;
738 pos = pblk_ppa_to_pos(geo, ppa);
739 chunk = &line->chks[pos];
740
741 ppa.m.chk = line->id;
742 chunk_meta = pblk_chunk_get_off(pblk, meta, ppa);
743
744 chunk->state = chunk_meta->state;
745 chunk->type = chunk_meta->type;
746 chunk->wi = chunk_meta->wi;
747 chunk->slba = chunk_meta->slba;
748 chunk->cnlb = chunk_meta->cnlb;
749 chunk->wp = chunk_meta->wp;
750
751 if (!(chunk->state & NVM_CHK_ST_OFFLINE))
752 continue;
753
754 if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
755 WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
756 continue;
757 }
758
759 set_bit(pos, line->blk_bitmap);
760 nr_bad_chks++;
761 }
762
763 return nr_bad_chks;
764}
765
766static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
767 void *chunk_meta, int line_id)
618{ 768{
769 struct nvm_tgt_dev *dev = pblk->dev;
770 struct nvm_geo *geo = &dev->geo;
619 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 771 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
620 struct pblk_line_meta *lm = &pblk->lm; 772 struct pblk_line_meta *lm = &pblk->lm;
621 int i; 773 long nr_bad_chks, chk_in_line;
774
775 line->pblk = pblk;
776 line->id = line_id;
777 line->type = PBLK_LINETYPE_FREE;
778 line->state = PBLK_LINESTATE_NEW;
779 line->gc_group = PBLK_LINEGC_NONE;
780 line->vsc = &l_mg->vsc_list[line_id];
781 spin_lock_init(&line->lock);
782
783 if (geo->version == NVM_OCSSD_SPEC_12)
784 nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta);
785 else
786 nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta);
787
788 chk_in_line = lm->blk_per_line - nr_bad_chks;
789 if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line ||
790 chk_in_line < lm->min_blk_line) {
791 line->state = PBLK_LINESTATE_BAD;
792 list_add_tail(&line->list, &l_mg->bad_list);
793 return 0;
794 }
795
796 atomic_set(&line->blk_in_line, chk_in_line);
797 list_add_tail(&line->list, &l_mg->free_list);
798 l_mg->nr_free_lines++;
799
800 return chk_in_line;
801}
802
803static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
804{
805 struct pblk_line_meta *lm = &pblk->lm;
806
807 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
808 if (!line->blk_bitmap)
809 return -ENOMEM;
810
811 line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
812 if (!line->erase_bitmap) {
813 kfree(line->blk_bitmap);
814 return -ENOMEM;
815 }
816
817 line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
818 GFP_KERNEL);
819 if (!line->chks) {
820 kfree(line->erase_bitmap);
821 kfree(line->blk_bitmap);
822 return -ENOMEM;
823 }
824
825 return 0;
826}
827
828static int pblk_line_mg_init(struct pblk *pblk)
829{
830 struct nvm_tgt_dev *dev = pblk->dev;
831 struct nvm_geo *geo = &dev->geo;
832 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
833 struct pblk_line_meta *lm = &pblk->lm;
834 int i, bb_distance;
835
836 l_mg->nr_lines = geo->num_chk;
837 l_mg->log_line = l_mg->data_line = NULL;
838 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
839 l_mg->nr_free_lines = 0;
840 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
841
842 INIT_LIST_HEAD(&l_mg->free_list);
843 INIT_LIST_HEAD(&l_mg->corrupt_list);
844 INIT_LIST_HEAD(&l_mg->bad_list);
845 INIT_LIST_HEAD(&l_mg->gc_full_list);
846 INIT_LIST_HEAD(&l_mg->gc_high_list);
847 INIT_LIST_HEAD(&l_mg->gc_mid_list);
848 INIT_LIST_HEAD(&l_mg->gc_low_list);
849 INIT_LIST_HEAD(&l_mg->gc_empty_list);
850
851 INIT_LIST_HEAD(&l_mg->emeta_list);
852
853 l_mg->gc_lists[0] = &l_mg->gc_high_list;
854 l_mg->gc_lists[1] = &l_mg->gc_mid_list;
855 l_mg->gc_lists[2] = &l_mg->gc_low_list;
856
857 spin_lock_init(&l_mg->free_lock);
858 spin_lock_init(&l_mg->close_lock);
859 spin_lock_init(&l_mg->gc_lock);
860
861 l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
862 if (!l_mg->vsc_list)
863 goto fail;
864
865 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
866 if (!l_mg->bb_template)
867 goto fail_free_vsc_list;
868
869 l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
870 if (!l_mg->bb_aux)
871 goto fail_free_bb_template;
622 872
623 /* smeta is always small enough to fit on a kmalloc memory allocation, 873 /* smeta is always small enough to fit on a kmalloc memory allocation,
624 * emeta depends on the number of LUNs allocated to the pblk instance 874 * emeta depends on the number of LUNs allocated to the pblk instance
@@ -664,13 +914,13 @@ static int pblk_lines_alloc_metadata(struct pblk *pblk)
664 } 914 }
665 } 915 }
666 916
667 l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
668 if (!l_mg->vsc_list)
669 goto fail_free_emeta;
670
671 for (i = 0; i < l_mg->nr_lines; i++) 917 for (i = 0; i < l_mg->nr_lines; i++)
672 l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY); 918 l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
673 919
920 bb_distance = (geo->all_luns) * geo->ws_opt;
921 for (i = 0; i < lm->sec_per_line; i += bb_distance)
922 bitmap_set(l_mg->bb_template, i, geo->ws_opt);
923
674 return 0; 924 return 0;
675 925
676fail_free_emeta: 926fail_free_emeta:
@@ -681,50 +931,27 @@ fail_free_emeta:
681 kfree(l_mg->eline_meta[i]->buf); 931 kfree(l_mg->eline_meta[i]->buf);
682 kfree(l_mg->eline_meta[i]); 932 kfree(l_mg->eline_meta[i]);
683 } 933 }
684
685fail_free_smeta: 934fail_free_smeta:
686 for (i = 0; i < PBLK_DATA_LINES; i++) 935 for (i = 0; i < PBLK_DATA_LINES; i++)
687 kfree(l_mg->sline_meta[i]); 936 kfree(l_mg->sline_meta[i]);
688 937 kfree(l_mg->bb_aux);
938fail_free_bb_template:
939 kfree(l_mg->bb_template);
940fail_free_vsc_list:
941 kfree(l_mg->vsc_list);
942fail:
689 return -ENOMEM; 943 return -ENOMEM;
690} 944}
691 945
692static int pblk_lines_init(struct pblk *pblk) 946static int pblk_line_meta_init(struct pblk *pblk)
693{ 947{
694 struct nvm_tgt_dev *dev = pblk->dev; 948 struct nvm_tgt_dev *dev = pblk->dev;
695 struct nvm_geo *geo = &dev->geo; 949 struct nvm_geo *geo = &dev->geo;
696 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
697 struct pblk_line_meta *lm = &pblk->lm; 950 struct pblk_line_meta *lm = &pblk->lm;
698 struct pblk_line *line;
699 unsigned int smeta_len, emeta_len; 951 unsigned int smeta_len, emeta_len;
700 long nr_bad_blks, nr_free_blks; 952 int i;
701 int bb_distance, max_write_ppas, mod;
702 int i, ret;
703
704 pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
705 max_write_ppas = pblk->min_write_pgs * geo->all_luns;
706 pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
707 max_write_ppas : nvm_max_phys_sects(dev);
708 pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
709
710 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
711 pr_err("pblk: cannot support device max_phys_sect\n");
712 return -EINVAL;
713 }
714
715 div_u64_rem(geo->sec_per_chk, pblk->min_write_pgs, &mod);
716 if (mod) {
717 pr_err("pblk: bad configuration of sectors/pages\n");
718 return -EINVAL;
719 }
720
721 l_mg->nr_lines = geo->nr_chks;
722 l_mg->log_line = l_mg->data_line = NULL;
723 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
724 l_mg->nr_free_lines = 0;
725 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
726 953
727 lm->sec_per_line = geo->sec_per_chk * geo->all_luns; 954 lm->sec_per_line = geo->clba * geo->all_luns;
728 lm->blk_per_line = geo->all_luns; 955 lm->blk_per_line = geo->all_luns;
729 lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); 956 lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
730 lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); 957 lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
@@ -738,8 +965,8 @@ static int pblk_lines_init(struct pblk *pblk)
738 */ 965 */
739 i = 1; 966 i = 1;
740add_smeta_page: 967add_smeta_page:
741 lm->smeta_sec = i * geo->sec_per_pl; 968 lm->smeta_sec = i * geo->ws_opt;
742 lm->smeta_len = lm->smeta_sec * geo->sec_size; 969 lm->smeta_len = lm->smeta_sec * geo->csecs;
743 970
744 smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len; 971 smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
745 if (smeta_len > lm->smeta_len) { 972 if (smeta_len > lm->smeta_len) {
@@ -752,8 +979,8 @@ add_smeta_page:
752 */ 979 */
753 i = 1; 980 i = 1;
754add_emeta_page: 981add_emeta_page:
755 lm->emeta_sec[0] = i * geo->sec_per_pl; 982 lm->emeta_sec[0] = i * geo->ws_opt;
756 lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size; 983 lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs;
757 984
758 emeta_len = calc_emeta_len(pblk); 985 emeta_len = calc_emeta_len(pblk);
759 if (emeta_len > lm->emeta_len[0]) { 986 if (emeta_len > lm->emeta_len[0]) {
@@ -766,119 +993,75 @@ add_emeta_page:
766 lm->min_blk_line = 1; 993 lm->min_blk_line = 1;
767 if (geo->all_luns > 1) 994 if (geo->all_luns > 1)
768 lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + 995 lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
769 lm->emeta_sec[0], geo->sec_per_chk); 996 lm->emeta_sec[0], geo->clba);
770 997
771 if (lm->min_blk_line > lm->blk_per_line) { 998 if (lm->min_blk_line > lm->blk_per_line) {
772 pr_err("pblk: config. not supported. Min. LUN in line:%d\n", 999 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
773 lm->blk_per_line); 1000 lm->blk_per_line);
774 ret = -EINVAL; 1001 return -EINVAL;
775 goto fail;
776 }
777
778 ret = pblk_lines_alloc_metadata(pblk);
779 if (ret)
780 goto fail;
781
782 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
783 if (!l_mg->bb_template) {
784 ret = -ENOMEM;
785 goto fail_free_meta;
786 } 1002 }
787 1003
788 l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); 1004 return 0;
789 if (!l_mg->bb_aux) { 1005}
790 ret = -ENOMEM;
791 goto fail_free_bb_template;
792 }
793 1006
794 bb_distance = (geo->all_luns) * geo->sec_per_pl; 1007static int pblk_lines_init(struct pblk *pblk)
795 for (i = 0; i < lm->sec_per_line; i += bb_distance) 1008{
796 bitmap_set(l_mg->bb_template, i, geo->sec_per_pl); 1009 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1010 struct pblk_line *line;
1011 void *chunk_meta;
1012 long nr_free_chks = 0;
1013 int i, ret;
797 1014
798 INIT_LIST_HEAD(&l_mg->free_list); 1015 ret = pblk_line_meta_init(pblk);
799 INIT_LIST_HEAD(&l_mg->corrupt_list); 1016 if (ret)
800 INIT_LIST_HEAD(&l_mg->bad_list); 1017 return ret;
801 INIT_LIST_HEAD(&l_mg->gc_full_list);
802 INIT_LIST_HEAD(&l_mg->gc_high_list);
803 INIT_LIST_HEAD(&l_mg->gc_mid_list);
804 INIT_LIST_HEAD(&l_mg->gc_low_list);
805 INIT_LIST_HEAD(&l_mg->gc_empty_list);
806 1018
807 INIT_LIST_HEAD(&l_mg->emeta_list); 1019 ret = pblk_line_mg_init(pblk);
1020 if (ret)
1021 return ret;
808 1022
809 l_mg->gc_lists[0] = &l_mg->gc_high_list; 1023 ret = pblk_luns_init(pblk);
810 l_mg->gc_lists[1] = &l_mg->gc_mid_list; 1024 if (ret)
811 l_mg->gc_lists[2] = &l_mg->gc_low_list; 1025 goto fail_free_meta;
812 1026
813 spin_lock_init(&l_mg->free_lock); 1027 chunk_meta = pblk_chunk_get_meta(pblk);
814 spin_lock_init(&l_mg->close_lock); 1028 if (IS_ERR(chunk_meta)) {
815 spin_lock_init(&l_mg->gc_lock); 1029 ret = PTR_ERR(chunk_meta);
1030 goto fail_free_luns;
1031 }
816 1032
817 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), 1033 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
818 GFP_KERNEL); 1034 GFP_KERNEL);
819 if (!pblk->lines) { 1035 if (!pblk->lines) {
820 ret = -ENOMEM; 1036 ret = -ENOMEM;
821 goto fail_free_bb_aux; 1037 goto fail_free_chunk_meta;
822 } 1038 }
823 1039
824 nr_free_blks = 0;
825 for (i = 0; i < l_mg->nr_lines; i++) { 1040 for (i = 0; i < l_mg->nr_lines; i++) {
826 int blk_in_line;
827
828 line = &pblk->lines[i]; 1041 line = &pblk->lines[i];
829 1042
830 line->pblk = pblk; 1043 ret = pblk_alloc_line_meta(pblk, line);
831 line->id = i;
832 line->type = PBLK_LINETYPE_FREE;
833 line->state = PBLK_LINESTATE_FREE;
834 line->gc_group = PBLK_LINEGC_NONE;
835 line->vsc = &l_mg->vsc_list[i];
836 spin_lock_init(&line->lock);
837
838 ret = pblk_alloc_line_bitmaps(pblk, line);
839 if (ret) 1044 if (ret)
840 goto fail_free_lines; 1045 goto fail_free_lines;
841 1046
842 nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line); 1047 nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
843 if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
844 pblk_free_line_bitmaps(line);
845 ret = -EINVAL;
846 goto fail_free_lines;
847 }
848
849 blk_in_line = lm->blk_per_line - nr_bad_blks;
850 if (blk_in_line < lm->min_blk_line) {
851 line->state = PBLK_LINESTATE_BAD;
852 list_add_tail(&line->list, &l_mg->bad_list);
853 continue;
854 }
855
856 nr_free_blks += blk_in_line;
857 atomic_set(&line->blk_in_line, blk_in_line);
858
859 l_mg->nr_free_lines++;
860 list_add_tail(&line->list, &l_mg->free_list);
861 } 1048 }
862 1049
863 pblk_set_provision(pblk, nr_free_blks); 1050 pblk_set_provision(pblk, nr_free_chks);
864
865 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
866 for (i = 0; i < geo->all_luns; i++)
867 kfree(pblk->luns[i].bb_list);
868 1051
1052 kfree(chunk_meta);
869 return 0; 1053 return 0;
1054
870fail_free_lines: 1055fail_free_lines:
871 while (--i >= 0) 1056 while (--i >= 0)
872 pblk_free_line_bitmaps(&pblk->lines[i]); 1057 pblk_line_meta_free(&pblk->lines[i]);
873fail_free_bb_aux: 1058 kfree(pblk->lines);
874 kfree(l_mg->bb_aux); 1059fail_free_chunk_meta:
875fail_free_bb_template: 1060 kfree(chunk_meta);
876 kfree(l_mg->bb_template); 1061fail_free_luns:
1062 kfree(pblk->luns);
877fail_free_meta: 1063fail_free_meta:
878 pblk_line_meta_free(pblk); 1064 pblk_line_mg_free(pblk);
879fail:
880 for (i = 0; i < geo->all_luns; i++)
881 kfree(pblk->luns[i].bb_list);
882 1065
883 return ret; 1066 return ret;
884} 1067}
@@ -912,18 +1095,17 @@ static void pblk_writer_stop(struct pblk *pblk)
912 WARN(pblk_rb_sync_count(&pblk->rwb), 1095 WARN(pblk_rb_sync_count(&pblk->rwb),
913 "Stopping not fully synced write buffer\n"); 1096 "Stopping not fully synced write buffer\n");
914 1097
1098 del_timer_sync(&pblk->wtimer);
915 if (pblk->writer_ts) 1099 if (pblk->writer_ts)
916 kthread_stop(pblk->writer_ts); 1100 kthread_stop(pblk->writer_ts);
917 del_timer(&pblk->wtimer);
918} 1101}
919 1102
920static void pblk_free(struct pblk *pblk) 1103static void pblk_free(struct pblk *pblk)
921{ 1104{
922 pblk_luns_free(pblk);
923 pblk_lines_free(pblk); 1105 pblk_lines_free(pblk);
924 pblk_line_meta_free(pblk);
925 pblk_core_free(pblk);
926 pblk_l2p_free(pblk); 1106 pblk_l2p_free(pblk);
1107 pblk_rwb_free(pblk);
1108 pblk_core_free(pblk);
927 1109
928 kfree(pblk); 1110 kfree(pblk);
929} 1111}
@@ -970,9 +1152,17 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
970 struct pblk *pblk; 1152 struct pblk *pblk;
971 int ret; 1153 int ret;
972 1154
973 if (dev->identity.dom & NVM_RSP_L2P) { 1155 /* pblk supports 1.2 and 2.0 versions */
1156 if (!(geo->version == NVM_OCSSD_SPEC_12 ||
1157 geo->version == NVM_OCSSD_SPEC_20)) {
1158 pr_err("pblk: OCSSD version not supported (%u)\n",
1159 geo->version);
1160 return ERR_PTR(-EINVAL);
1161 }
1162
1163 if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) {
974 pr_err("pblk: host-side L2P table not supported. (%x)\n", 1164 pr_err("pblk: host-side L2P table not supported. (%x)\n",
975 dev->identity.dom); 1165 geo->dom);
976 return ERR_PTR(-EINVAL); 1166 return ERR_PTR(-EINVAL);
977 } 1167 }
978 1168
@@ -988,14 +1178,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
988 spin_lock_init(&pblk->trans_lock); 1178 spin_lock_init(&pblk->trans_lock);
989 spin_lock_init(&pblk->lock); 1179 spin_lock_init(&pblk->lock);
990 1180
991 if (flags & NVM_TARGET_FACTORY)
992 pblk_setup_uuid(pblk);
993
994#ifdef CONFIG_NVM_DEBUG 1181#ifdef CONFIG_NVM_DEBUG
995 atomic_long_set(&pblk->inflight_writes, 0); 1182 atomic_long_set(&pblk->inflight_writes, 0);
996 atomic_long_set(&pblk->padded_writes, 0); 1183 atomic_long_set(&pblk->padded_writes, 0);
997 atomic_long_set(&pblk->padded_wb, 0); 1184 atomic_long_set(&pblk->padded_wb, 0);
998 atomic_long_set(&pblk->nr_flush, 0);
999 atomic_long_set(&pblk->req_writes, 0); 1185 atomic_long_set(&pblk->req_writes, 0);
1000 atomic_long_set(&pblk->sub_writes, 0); 1186 atomic_long_set(&pblk->sub_writes, 0);
1001 atomic_long_set(&pblk->sync_writes, 0); 1187 atomic_long_set(&pblk->sync_writes, 0);
@@ -1015,41 +1201,35 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
1015 atomic_long_set(&pblk->write_failed, 0); 1201 atomic_long_set(&pblk->write_failed, 0);
1016 atomic_long_set(&pblk->erase_failed, 0); 1202 atomic_long_set(&pblk->erase_failed, 0);
1017 1203
1018 ret = pblk_luns_init(pblk, dev->luns); 1204 ret = pblk_core_init(pblk);
1019 if (ret) { 1205 if (ret) {
1020 pr_err("pblk: could not initialize luns\n"); 1206 pr_err("pblk: could not initialize core\n");
1021 goto fail; 1207 goto fail;
1022 } 1208 }
1023 1209
1024 ret = pblk_lines_init(pblk); 1210 ret = pblk_lines_init(pblk);
1025 if (ret) { 1211 if (ret) {
1026 pr_err("pblk: could not initialize lines\n"); 1212 pr_err("pblk: could not initialize lines\n");
1027 goto fail_free_luns; 1213 goto fail_free_core;
1028 } 1214 }
1029 1215
1030 ret = pblk_core_init(pblk); 1216 ret = pblk_rwb_init(pblk);
1031 if (ret) { 1217 if (ret) {
1032 pr_err("pblk: could not initialize core\n"); 1218 pr_err("pblk: could not initialize write buffer\n");
1033 goto fail_free_line_meta; 1219 goto fail_free_lines;
1034 } 1220 }
1035 1221
1036 ret = pblk_l2p_init(pblk); 1222 ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY);
1037 if (ret) { 1223 if (ret) {
1038 pr_err("pblk: could not initialize maps\n"); 1224 pr_err("pblk: could not initialize maps\n");
1039 goto fail_free_core; 1225 goto fail_free_rwb;
1040 }
1041
1042 ret = pblk_lines_configure(pblk, flags);
1043 if (ret) {
1044 pr_err("pblk: could not configure lines\n");
1045 goto fail_free_l2p;
1046 } 1226 }
1047 1227
1048 ret = pblk_writer_init(pblk); 1228 ret = pblk_writer_init(pblk);
1049 if (ret) { 1229 if (ret) {
1050 if (ret != -EINTR) 1230 if (ret != -EINTR)
1051 pr_err("pblk: could not initialize write thread\n"); 1231 pr_err("pblk: could not initialize write thread\n");
1052 goto fail_free_lines; 1232 goto fail_free_l2p;
1053 } 1233 }
1054 1234
1055 ret = pblk_gc_init(pblk); 1235 ret = pblk_gc_init(pblk);
@@ -1064,10 +1244,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
1064 1244
1065 blk_queue_write_cache(tqueue, true, false); 1245 blk_queue_write_cache(tqueue, true, false);
1066 1246
1067 tqueue->limits.discard_granularity = geo->sec_per_chk * geo->sec_size; 1247 tqueue->limits.discard_granularity = geo->clba * geo->csecs;
1068 tqueue->limits.discard_alignment = 0; 1248 tqueue->limits.discard_alignment = 0;
1069 blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); 1249 blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
1070 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); 1250 blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue);
1071 1251
1072 pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n", 1252 pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1073 tdisk->disk_name, 1253 tdisk->disk_name,
@@ -1084,16 +1264,14 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
1084 1264
1085fail_stop_writer: 1265fail_stop_writer:
1086 pblk_writer_stop(pblk); 1266 pblk_writer_stop(pblk);
1087fail_free_lines:
1088 pblk_lines_free(pblk);
1089fail_free_l2p: 1267fail_free_l2p:
1090 pblk_l2p_free(pblk); 1268 pblk_l2p_free(pblk);
1269fail_free_rwb:
1270 pblk_rwb_free(pblk);
1271fail_free_lines:
1272 pblk_lines_free(pblk);
1091fail_free_core: 1273fail_free_core:
1092 pblk_core_free(pblk); 1274 pblk_core_free(pblk);
1093fail_free_line_meta:
1094 pblk_line_meta_free(pblk);
1095fail_free_luns:
1096 pblk_luns_free(pblk);
1097fail: 1275fail:
1098 kfree(pblk); 1276 kfree(pblk);
1099 return ERR_PTR(ret); 1277 return ERR_PTR(ret);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 7445e6430c52..20dbaa89c9df 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -65,6 +65,8 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
65 lba_list[paddr] = cpu_to_le64(w_ctx->lba); 65 lba_list[paddr] = cpu_to_le64(w_ctx->lba);
66 if (lba_list[paddr] != addr_empty) 66 if (lba_list[paddr] != addr_empty)
67 line->nr_valid_lbas++; 67 line->nr_valid_lbas++;
68 else
69 atomic64_inc(&pblk->pad_wa);
68 } else { 70 } else {
69 lba_list[paddr] = meta_list[i].lba = addr_empty; 71 lba_list[paddr] = meta_list[i].lba = addr_empty;
70 __pblk_map_invalidate(pblk, line, paddr); 72 __pblk_map_invalidate(pblk, line, paddr);
@@ -125,7 +127,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
125 atomic_dec(&e_line->left_eblks); 127 atomic_dec(&e_line->left_eblks);
126 128
127 *erase_ppa = rqd->ppa_list[i]; 129 *erase_ppa = rqd->ppa_list[i];
128 erase_ppa->g.blk = e_line->id; 130 erase_ppa->a.blk = e_line->id;
129 131
130 spin_unlock(&e_line->lock); 132 spin_unlock(&e_line->lock);
131 133
@@ -166,6 +168,6 @@ retry:
166 set_bit(bit, e_line->erase_bitmap); 168 set_bit(bit, e_line->erase_bitmap);
167 atomic_dec(&e_line->left_eblks); 169 atomic_dec(&e_line->left_eblks);
168 *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */ 170 *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
169 erase_ppa->g.blk = e_line->id; 171 erase_ppa->a.blk = e_line->id;
170 } 172 }
171} 173}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index ec8fc314646b..52fdd85dbc97 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -355,10 +355,13 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
355 struct pblk_rb_entry *entry; 355 struct pblk_rb_entry *entry;
356 unsigned int sync, flush_point; 356 unsigned int sync, flush_point;
357 357
358 pblk_rb_sync_init(rb, NULL);
358 sync = READ_ONCE(rb->sync); 359 sync = READ_ONCE(rb->sync);
359 360
360 if (pos == sync) 361 if (pos == sync) {
362 pblk_rb_sync_end(rb, NULL);
361 return 0; 363 return 0;
364 }
362 365
363#ifdef CONFIG_NVM_DEBUG 366#ifdef CONFIG_NVM_DEBUG
364 atomic_inc(&rb->inflight_flush_point); 367 atomic_inc(&rb->inflight_flush_point);
@@ -367,8 +370,6 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
367 flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); 370 flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
368 entry = &rb->entries[flush_point]; 371 entry = &rb->entries[flush_point];
369 372
370 pblk_rb_sync_init(rb, NULL);
371
372 /* Protect flush points */ 373 /* Protect flush points */
373 smp_store_release(&rb->flush_point, flush_point); 374 smp_store_release(&rb->flush_point, flush_point);
374 375
@@ -437,9 +438,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
437 if (bio->bi_opf & REQ_PREFLUSH) { 438 if (bio->bi_opf & REQ_PREFLUSH) {
438 struct pblk *pblk = container_of(rb, struct pblk, rwb); 439 struct pblk *pblk = container_of(rb, struct pblk, rwb);
439 440
440#ifdef CONFIG_NVM_DEBUG 441 atomic64_inc(&pblk->nr_flush);
441 atomic_long_inc(&pblk->nr_flush);
442#endif
443 if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem)) 442 if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
444 *io_ret = NVM_IO_OK; 443 *io_ret = NVM_IO_OK;
445 } 444 }
@@ -620,11 +619,17 @@ try:
620 pr_err("pblk: could not pad page in write bio\n"); 619 pr_err("pblk: could not pad page in write bio\n");
621 return NVM_IO_ERR; 620 return NVM_IO_ERR;
622 } 621 }
622
623 if (pad < pblk->min_write_pgs)
624 atomic64_inc(&pblk->pad_dist[pad - 1]);
625 else
626 pr_warn("pblk: padding more than min. sectors\n");
627
628 atomic64_add(pad, &pblk->pad_wa);
623 } 629 }
624 630
625#ifdef CONFIG_NVM_DEBUG 631#ifdef CONFIG_NVM_DEBUG
626 atomic_long_add(pad, &((struct pblk *) 632 atomic_long_add(pad, &pblk->padded_writes);
627 (container_of(rb, struct pblk, rwb)))->padded_writes);
628#endif 633#endif
629 634
630 return NVM_IO_OK; 635 return NVM_IO_OK;
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 2f761283f43e..9eee10f69df0 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -563,7 +563,7 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
563 if (!(gc_rq->secs_to_gc)) 563 if (!(gc_rq->secs_to_gc))
564 goto out; 564 goto out;
565 565
566 data_len = (gc_rq->secs_to_gc) * geo->sec_size; 566 data_len = (gc_rq->secs_to_gc) * geo->csecs;
567 bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len, 567 bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len,
568 PBLK_VMALLOC_META, GFP_KERNEL); 568 PBLK_VMALLOC_META, GFP_KERNEL);
569 if (IS_ERR(bio)) { 569 if (IS_ERR(bio)) {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 1d5e961bf5e0..3e079c2afa6e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -21,17 +21,15 @@ void pblk_submit_rec(struct work_struct *work)
21 struct pblk_rec_ctx *recovery = 21 struct pblk_rec_ctx *recovery =
22 container_of(work, struct pblk_rec_ctx, ws_rec); 22 container_of(work, struct pblk_rec_ctx, ws_rec);
23 struct pblk *pblk = recovery->pblk; 23 struct pblk *pblk = recovery->pblk;
24 struct nvm_tgt_dev *dev = pblk->dev;
25 struct nvm_rq *rqd = recovery->rqd; 24 struct nvm_rq *rqd = recovery->rqd;
26 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); 25 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
27 int max_secs = nvm_max_phys_sects(dev);
28 struct bio *bio; 26 struct bio *bio;
29 unsigned int nr_rec_secs; 27 unsigned int nr_rec_secs;
30 unsigned int pgs_read; 28 unsigned int pgs_read;
31 int ret; 29 int ret;
32 30
33 nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status, 31 nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
34 max_secs); 32 NVM_MAX_VLBA);
35 33
36 bio = bio_alloc(GFP_KERNEL, nr_rec_secs); 34 bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
37 35
@@ -74,8 +72,6 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
74 struct pblk_rec_ctx *recovery, u64 *comp_bits, 72 struct pblk_rec_ctx *recovery, u64 *comp_bits,
75 unsigned int comp) 73 unsigned int comp)
76{ 74{
77 struct nvm_tgt_dev *dev = pblk->dev;
78 int max_secs = nvm_max_phys_sects(dev);
79 struct nvm_rq *rec_rqd; 75 struct nvm_rq *rec_rqd;
80 struct pblk_c_ctx *rec_ctx; 76 struct pblk_c_ctx *rec_ctx;
81 int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; 77 int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
@@ -86,7 +82,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
86 /* Copy completion bitmap, but exclude the first X completed entries */ 82 /* Copy completion bitmap, but exclude the first X completed entries */
87 bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status, 83 bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
88 (unsigned long int *)comp_bits, 84 (unsigned long int *)comp_bits,
89 comp, max_secs); 85 comp, NVM_MAX_VLBA);
90 86
91 /* Save the context for the entries that need to be re-written and 87 /* Save the context for the entries that need to be re-written and
92 * update current context with the completed entries. 88 * update current context with the completed entries.
@@ -188,7 +184,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
188 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); 184 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
189 185
190 return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] - 186 return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
191 nr_bb * geo->sec_per_chk; 187 nr_bb * geo->clba;
192} 188}
193 189
194struct pblk_recov_alloc { 190struct pblk_recov_alloc {
@@ -236,7 +232,7 @@ next_read_rq:
236 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 232 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
237 if (!rq_ppas) 233 if (!rq_ppas)
238 rq_ppas = pblk->min_write_pgs; 234 rq_ppas = pblk->min_write_pgs;
239 rq_len = rq_ppas * geo->sec_size; 235 rq_len = rq_ppas * geo->csecs;
240 236
241 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); 237 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
242 if (IS_ERR(bio)) 238 if (IS_ERR(bio))
@@ -355,7 +351,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
355 if (!pad_rq) 351 if (!pad_rq)
356 return -ENOMEM; 352 return -ENOMEM;
357 353
358 data = vzalloc(pblk->max_write_pgs * geo->sec_size); 354 data = vzalloc(pblk->max_write_pgs * geo->csecs);
359 if (!data) { 355 if (!data) {
360 ret = -ENOMEM; 356 ret = -ENOMEM;
361 goto free_rq; 357 goto free_rq;
@@ -372,7 +368,7 @@ next_pad_rq:
372 goto fail_free_pad; 368 goto fail_free_pad;
373 } 369 }
374 370
375 rq_len = rq_ppas * geo->sec_size; 371 rq_len = rq_ppas * geo->csecs;
376 372
377 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); 373 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
378 if (!meta_list) { 374 if (!meta_list) {
@@ -513,7 +509,7 @@ next_rq:
513 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 509 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
514 if (!rq_ppas) 510 if (!rq_ppas)
515 rq_ppas = pblk->min_write_pgs; 511 rq_ppas = pblk->min_write_pgs;
516 rq_len = rq_ppas * geo->sec_size; 512 rq_len = rq_ppas * geo->csecs;
517 513
518 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); 514 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
519 if (IS_ERR(bio)) 515 if (IS_ERR(bio))
@@ -644,7 +640,7 @@ next_rq:
644 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 640 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
645 if (!rq_ppas) 641 if (!rq_ppas)
646 rq_ppas = pblk->min_write_pgs; 642 rq_ppas = pblk->min_write_pgs;
647 rq_len = rq_ppas * geo->sec_size; 643 rq_len = rq_ppas * geo->csecs;
648 644
649 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); 645 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
650 if (IS_ERR(bio)) 646 if (IS_ERR(bio))
@@ -749,7 +745,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
749 ppa_list = (void *)(meta_list) + pblk_dma_meta_size; 745 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
750 dma_ppa_list = dma_meta_list + pblk_dma_meta_size; 746 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
751 747
752 data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); 748 data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
753 if (!data) { 749 if (!data) {
754 ret = -ENOMEM; 750 ret = -ENOMEM;
755 goto free_meta_list; 751 goto free_meta_list;
@@ -826,6 +822,63 @@ static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
826 return emeta_start; 822 return emeta_start;
827} 823}
828 824
825static int pblk_recov_check_line_version(struct pblk *pblk,
826 struct line_emeta *emeta)
827{
828 struct line_header *header = &emeta->header;
829
830 if (header->version_major != EMETA_VERSION_MAJOR) {
831 pr_err("pblk: line major version mismatch: %d, expected: %d\n",
832 header->version_major, EMETA_VERSION_MAJOR);
833 return 1;
834 }
835
836#ifdef NVM_DEBUG
837 if (header->version_minor > EMETA_VERSION_MINOR)
838 pr_info("pblk: newer line minor version found: %d\n", line_v);
839#endif
840
841 return 0;
842}
843
844static void pblk_recov_wa_counters(struct pblk *pblk,
845 struct line_emeta *emeta)
846{
847 struct pblk_line_meta *lm = &pblk->lm;
848 struct line_header *header = &emeta->header;
849 struct wa_counters *wa = emeta_to_wa(lm, emeta);
850
851 /* WA counters were introduced in emeta version 0.2 */
852 if (header->version_major > 0 || header->version_minor >= 2) {
853 u64 user = le64_to_cpu(wa->user);
854 u64 pad = le64_to_cpu(wa->pad);
855 u64 gc = le64_to_cpu(wa->gc);
856
857 atomic64_set(&pblk->user_wa, user);
858 atomic64_set(&pblk->pad_wa, pad);
859 atomic64_set(&pblk->gc_wa, gc);
860
861 pblk->user_rst_wa = user;
862 pblk->pad_rst_wa = pad;
863 pblk->gc_rst_wa = gc;
864 }
865}
866
867static int pblk_line_was_written(struct pblk_line *line,
868 struct pblk_line_meta *lm)
869{
870
871 int i;
872 int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
873
874 for (i = 0; i < lm->blk_per_line; i++) {
875 if (!(line->chks[i].state & state_mask))
876 return 1;
877 }
878
879 return 0;
880}
881
829struct pblk_line *pblk_recov_l2p(struct pblk *pblk) 882struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
830{ 883{
831 struct pblk_line_meta *lm = &pblk->lm; 884 struct pblk_line_meta *lm = &pblk->lm;
@@ -862,6 +915,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
862 line->lun_bitmap = ((void *)(smeta_buf)) + 915 line->lun_bitmap = ((void *)(smeta_buf)) +
863 sizeof(struct line_smeta); 916 sizeof(struct line_smeta);
864 917
918 if (!pblk_line_was_written(line, lm))
919 continue;
920
865 /* Lines that cannot be read are assumed as not written here */ 921 /* Lines that cannot be read are assumed as not written here */
866 if (pblk_line_read_smeta(pblk, line)) 922 if (pblk_line_read_smeta(pblk, line))
867 continue; 923 continue;
@@ -873,9 +929,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
873 if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC) 929 if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
874 continue; 930 continue;
875 931
876 if (smeta_buf->header.version != SMETA_VERSION) { 932 if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
877 pr_err("pblk: found incompatible line version %u\n", 933 pr_err("pblk: found incompatible line version %u\n",
878 le16_to_cpu(smeta_buf->header.version)); 934 smeta_buf->header.version_major);
879 return ERR_PTR(-EINVAL); 935 return ERR_PTR(-EINVAL);
880 } 936 }
881 937
@@ -943,6 +999,11 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
943 goto next; 999 goto next;
944 } 1000 }
945 1001
1002 if (pblk_recov_check_line_version(pblk, line->emeta->buf))
1003 return ERR_PTR(-EINVAL);
1004
1005 pblk_recov_wa_counters(pblk, line->emeta->buf);
1006
946 if (pblk_recov_l2p_from_emeta(pblk, line)) 1007 if (pblk_recov_l2p_from_emeta(pblk, line))
947 pblk_recov_l2p_from_oob(pblk, line); 1008 pblk_recov_l2p_from_oob(pblk, line);
948 1009
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index 0d457b162f23..883a7113b19d 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -200,7 +200,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
200 200
201 /* Consider sectors used for metadata */ 201 /* Consider sectors used for metadata */
202 sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; 202 sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
203 blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk); 203 blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
204 204
205 rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; 205 rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
206 rl->high_pw = get_count_order(rl->high); 206 rl->high_pw = get_count_order(rl->high);
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index 620bab853579..e61909af23a5 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -39,8 +39,8 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
39 sz += snprintf(page + sz, PAGE_SIZE - sz, 39 sz += snprintf(page + sz, PAGE_SIZE - sz,
40 "pblk: pos:%d, ch:%d, lun:%d - %d\n", 40 "pblk: pos:%d, ch:%d, lun:%d - %d\n",
41 i, 41 i,
42 rlun->bppa.g.ch, 42 rlun->bppa.a.ch,
43 rlun->bppa.g.lun, 43 rlun->bppa.a.lun,
44 active); 44 active);
45 } 45 }
46 46
@@ -115,24 +115,47 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
115 struct nvm_geo *geo = &dev->geo; 115 struct nvm_geo *geo = &dev->geo;
116 ssize_t sz = 0; 116 ssize_t sz = 0;
117 117
118 sz = snprintf(page, PAGE_SIZE - sz, 118 if (geo->version == NVM_OCSSD_SPEC_12) {
119 "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", 119 struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
120 pblk->ppaf_bitsize, 120 struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
121 pblk->ppaf.blk_offset, geo->ppaf.blk_len,
122 pblk->ppaf.pg_offset, geo->ppaf.pg_len,
123 pblk->ppaf.lun_offset, geo->ppaf.lun_len,
124 pblk->ppaf.ch_offset, geo->ppaf.ch_len,
125 pblk->ppaf.pln_offset, geo->ppaf.pln_len,
126 pblk->ppaf.sec_offset, geo->ppaf.sect_len);
127 121
128 sz += snprintf(page + sz, PAGE_SIZE - sz, 122 sz = snprintf(page, PAGE_SIZE,
129 "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", 123 "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
130 geo->ppaf.blk_offset, geo->ppaf.blk_len, 124 pblk->addrf_len,
131 geo->ppaf.pg_offset, geo->ppaf.pg_len, 125 ppaf->blk_offset, ppaf->blk_len,
132 geo->ppaf.lun_offset, geo->ppaf.lun_len, 126 ppaf->pg_offset, ppaf->pg_len,
133 geo->ppaf.ch_offset, geo->ppaf.ch_len, 127 ppaf->lun_offset, ppaf->lun_len,
134 geo->ppaf.pln_offset, geo->ppaf.pln_len, 128 ppaf->ch_offset, ppaf->ch_len,
135 geo->ppaf.sect_offset, geo->ppaf.sect_len); 129 ppaf->pln_offset, ppaf->pln_len,
130 ppaf->sec_offset, ppaf->sec_len);
131
132 sz += snprintf(page + sz, PAGE_SIZE - sz,
133 "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
134 gppaf->blk_offset, gppaf->blk_len,
135 gppaf->pg_offset, gppaf->pg_len,
136 gppaf->lun_offset, gppaf->lun_len,
137 gppaf->ch_offset, gppaf->ch_len,
138 gppaf->pln_offset, gppaf->pln_len,
139 gppaf->sec_offset, gppaf->sec_len);
140 } else {
141 struct nvm_addrf *ppaf = &pblk->addrf;
142 struct nvm_addrf *gppaf = &geo->addrf;
143
144 sz = snprintf(page, PAGE_SIZE,
145 "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
146 pblk->addrf_len,
147 ppaf->ch_offset, ppaf->ch_len,
148 ppaf->lun_offset, ppaf->lun_len,
149 ppaf->chk_offset, ppaf->chk_len,
150 ppaf->sec_offset, ppaf->sec_len);
151
152 sz += snprintf(page + sz, PAGE_SIZE - sz,
153 "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
154 gppaf->ch_offset, gppaf->ch_len,
155 gppaf->lun_offset, gppaf->lun_len,
156 gppaf->chk_offset, gppaf->chk_len,
157 gppaf->sec_offset, gppaf->sec_len);
158 }
136 159
137 return sz; 160 return sz;
138} 161}
@@ -288,7 +311,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
288 "blk_line:%d, sec_line:%d, sec_blk:%d\n", 311 "blk_line:%d, sec_line:%d, sec_blk:%d\n",
289 lm->blk_per_line, 312 lm->blk_per_line,
290 lm->sec_per_line, 313 lm->sec_per_line,
291 geo->sec_per_chk); 314 geo->clba);
292 315
293 return sz; 316 return sz;
294} 317}
@@ -298,15 +321,104 @@ static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
298 return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write); 321 return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
299} 322}
300 323
324static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
325 char *page)
326{
327 int sz;
328
329
330 sz = snprintf(page, PAGE_SIZE,
331 "user:%lld gc:%lld pad:%lld WA:",
332 user, gc, pad);
333
334 if (!user) {
335 sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
336 } else {
337 u64 wa_int;
338 u32 wa_frac;
339
340 wa_int = (user + gc + pad) * 100000;
341 wa_int = div_u64(wa_int, user);
342 wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
343
344 sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
345 wa_int, wa_frac);
346 }
347
348 return sz;
349}
350
351static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
352{
353 return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
354 atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
355 page);
356}
357
358static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
359{
360 return pblk_get_write_amp(
361 atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
362 atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
363 atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
364}
365
366static long long bucket_percentage(unsigned long long bucket,
367 unsigned long long total)
368{
369 int p = bucket * 100;
370
371 p = div_u64(p, total);
372
373 return p;
374}
375
376static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
377{
378 int sz = 0;
379 unsigned long long total;
380 unsigned long long total_buckets = 0;
381 int buckets = pblk->min_write_pgs - 1;
382 int i;
383
384 total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
385 if (!total) {
386 for (i = 0; i < (buckets + 1); i++)
387 sz += snprintf(page + sz, PAGE_SIZE - sz,
388 "%d:0 ", i);
389 sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
390
391 return sz;
392 }
393
394 for (i = 0; i < buckets; i++)
395 total_buckets += atomic64_read(&pblk->pad_dist[i]);
396
397 sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
398 bucket_percentage(total - total_buckets, total));
399
400 for (i = 0; i < buckets; i++) {
401 unsigned long long p;
402
403 p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
404 total);
405 sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
406 i + 1, p);
407 }
408 sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
409
410 return sz;
411}
412
301#ifdef CONFIG_NVM_DEBUG 413#ifdef CONFIG_NVM_DEBUG
302static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) 414static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
303{ 415{
304 return snprintf(page, PAGE_SIZE, 416 return snprintf(page, PAGE_SIZE,
305 "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", 417 "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
306 atomic_long_read(&pblk->inflight_writes), 418 atomic_long_read(&pblk->inflight_writes),
307 atomic_long_read(&pblk->inflight_reads), 419 atomic_long_read(&pblk->inflight_reads),
308 atomic_long_read(&pblk->req_writes), 420 atomic_long_read(&pblk->req_writes),
309 atomic_long_read(&pblk->nr_flush), 421 (u64)atomic64_read(&pblk->nr_flush),
310 atomic_long_read(&pblk->padded_writes), 422 atomic_long_read(&pblk->padded_writes),
311 atomic_long_read(&pblk->padded_wb), 423 atomic_long_read(&pblk->padded_wb),
312 atomic_long_read(&pblk->sub_writes), 424 atomic_long_read(&pblk->sub_writes),
@@ -360,6 +472,56 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
360 return len; 472 return len;
361} 473}
362 474
475static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
476 const char *page, size_t len)
477{
478 size_t c_len;
479 int reset_value;
480
481 c_len = strcspn(page, "\n");
482 if (c_len >= len)
483 return -EINVAL;
484
485 if (kstrtouint(page, 0, &reset_value))
486 return -EINVAL;
487
488 if (reset_value != 0)
489 return -EINVAL;
490
491 pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
492 pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
493 pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
494
495 return len;
496}
497
498
499static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
500 const char *page, size_t len)
501{
502 size_t c_len;
503 int reset_value;
504 int buckets = pblk->min_write_pgs - 1;
505 int i;
506
507 c_len = strcspn(page, "\n");
508 if (c_len >= len)
509 return -EINVAL;
510
511 if (kstrtouint(page, 0, &reset_value))
512 return -EINVAL;
513
514 if (reset_value != 0)
515 return -EINVAL;
516
517 for (i = 0; i < buckets; i++)
518 atomic64_set(&pblk->pad_dist[i], 0);
519
520 pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
521
522 return len;
523}
524
363static struct attribute sys_write_luns = { 525static struct attribute sys_write_luns = {
364 .name = "write_luns", 526 .name = "write_luns",
365 .mode = 0444, 527 .mode = 0444,
@@ -410,6 +572,21 @@ static struct attribute sys_max_sec_per_write = {
410 .mode = 0644, 572 .mode = 0644,
411}; 573};
412 574
575static struct attribute sys_write_amp_mileage = {
576 .name = "write_amp_mileage",
577 .mode = 0444,
578};
579
580static struct attribute sys_write_amp_trip = {
581 .name = "write_amp_trip",
582 .mode = 0644,
583};
584
585static struct attribute sys_padding_dist = {
586 .name = "padding_dist",
587 .mode = 0644,
588};
589
413#ifdef CONFIG_NVM_DEBUG 590#ifdef CONFIG_NVM_DEBUG
414static struct attribute sys_stats_debug_attr = { 591static struct attribute sys_stats_debug_attr = {
415 .name = "stats", 592 .name = "stats",
@@ -428,6 +605,9 @@ static struct attribute *pblk_attrs[] = {
428 &sys_stats_ppaf_attr, 605 &sys_stats_ppaf_attr,
429 &sys_lines_attr, 606 &sys_lines_attr,
430 &sys_lines_info_attr, 607 &sys_lines_info_attr,
608 &sys_write_amp_mileage,
609 &sys_write_amp_trip,
610 &sys_padding_dist,
431#ifdef CONFIG_NVM_DEBUG 611#ifdef CONFIG_NVM_DEBUG
432 &sys_stats_debug_attr, 612 &sys_stats_debug_attr,
433#endif 613#endif
@@ -457,6 +637,12 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
457 return pblk_sysfs_lines_info(pblk, buf); 637 return pblk_sysfs_lines_info(pblk, buf);
458 else if (strcmp(attr->name, "max_sec_per_write") == 0) 638 else if (strcmp(attr->name, "max_sec_per_write") == 0)
459 return pblk_sysfs_get_sec_per_write(pblk, buf); 639 return pblk_sysfs_get_sec_per_write(pblk, buf);
640 else if (strcmp(attr->name, "write_amp_mileage") == 0)
641 return pblk_sysfs_get_write_amp_mileage(pblk, buf);
642 else if (strcmp(attr->name, "write_amp_trip") == 0)
643 return pblk_sysfs_get_write_amp_trip(pblk, buf);
644 else if (strcmp(attr->name, "padding_dist") == 0)
645 return pblk_sysfs_get_padding_dist(pblk, buf);
460#ifdef CONFIG_NVM_DEBUG 646#ifdef CONFIG_NVM_DEBUG
461 else if (strcmp(attr->name, "stats") == 0) 647 else if (strcmp(attr->name, "stats") == 0)
462 return pblk_sysfs_stats_debug(pblk, buf); 648 return pblk_sysfs_stats_debug(pblk, buf);
@@ -473,7 +659,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
473 return pblk_sysfs_gc_force(pblk, buf, len); 659 return pblk_sysfs_gc_force(pblk, buf, len);
474 else if (strcmp(attr->name, "max_sec_per_write") == 0) 660 else if (strcmp(attr->name, "max_sec_per_write") == 0)
475 return pblk_sysfs_set_sec_per_write(pblk, buf, len); 661 return pblk_sysfs_set_sec_per_write(pblk, buf, len);
476 662 else if (strcmp(attr->name, "write_amp_trip") == 0)
663 return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
664 else if (strcmp(attr->name, "padding_dist") == 0)
665 return pblk_sysfs_set_padding_dist(pblk, buf, len);
477 return 0; 666 return 0;
478} 667}
479 668
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aae86ed60b98..3e6f1ebd743a 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -333,7 +333,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
333 m_ctx = nvm_rq_to_pdu(rqd); 333 m_ctx = nvm_rq_to_pdu(rqd);
334 m_ctx->private = meta_line; 334 m_ctx->private = meta_line;
335 335
336 rq_len = rq_ppas * geo->sec_size; 336 rq_len = rq_ppas * geo->csecs;
337 data = ((void *)emeta->buf) + emeta->mem; 337 data = ((void *)emeta->buf) + emeta->mem;
338 338
339 bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, 339 bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 8c357fb6538e..9c682acfc5d1 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -201,12 +201,6 @@ struct pblk_rb {
201 201
202struct pblk_lun { 202struct pblk_lun {
203 struct ppa_addr bppa; 203 struct ppa_addr bppa;
204
205 u8 *bb_list; /* Bad block list for LUN. Only used on
206 * bring up. Bad blocks are managed
207 * within lines on run-time.
208 */
209
210 struct semaphore wr_sem; 204 struct semaphore wr_sem;
211}; 205};
212 206
@@ -303,6 +297,7 @@ enum {
303 PBLK_LINETYPE_DATA = 2, 297 PBLK_LINETYPE_DATA = 2,
304 298
305 /* Line state */ 299 /* Line state */
300 PBLK_LINESTATE_NEW = 9,
306 PBLK_LINESTATE_FREE = 10, 301 PBLK_LINESTATE_FREE = 10,
307 PBLK_LINESTATE_OPEN = 11, 302 PBLK_LINESTATE_OPEN = 11,
308 PBLK_LINESTATE_CLOSED = 12, 303 PBLK_LINESTATE_CLOSED = 12,
@@ -320,14 +315,26 @@ enum {
320}; 315};
321 316
322#define PBLK_MAGIC 0x70626c6b /*pblk*/ 317#define PBLK_MAGIC 0x70626c6b /*pblk*/
323#define SMETA_VERSION cpu_to_le16(1) 318
319/* emeta/smeta persistent storage format versions:
320 * Changes in major version requires offline migration.
321 * Changes in minor version are handled automatically during
322 * recovery.
323 */
324
325#define SMETA_VERSION_MAJOR (0)
326#define SMETA_VERSION_MINOR (1)
327
328#define EMETA_VERSION_MAJOR (0)
329#define EMETA_VERSION_MINOR (2)
324 330
325struct line_header { 331struct line_header {
326 __le32 crc; 332 __le32 crc;
327 __le32 identifier; /* pblk identifier */ 333 __le32 identifier; /* pblk identifier */
328 __u8 uuid[16]; /* instance uuid */ 334 __u8 uuid[16]; /* instance uuid */
329 __le16 type; /* line type */ 335 __le16 type; /* line type */
330 __le16 version; /* type version */ 336 __u8 version_major; /* version major */
337 __u8 version_minor; /* version minor */
331 __le32 id; /* line id for current line */ 338 __le32 id; /* line id for current line */
332}; 339};
333 340
@@ -349,11 +356,13 @@ struct line_smeta {
349 __le64 lun_bitmap[]; 356 __le64 lun_bitmap[];
350}; 357};
351 358
359
352/* 360/*
353 * Metadata layout in media: 361 * Metadata layout in media:
354 * First sector: 362 * First sector:
355 * 1. struct line_emeta 363 * 1. struct line_emeta
356 * 2. bad block bitmap (u64 * window_wr_lun) 364 * 2. bad block bitmap (u64 * window_wr_lun)
365 * 3. write amplification counters
357 * Mid sectors (start at lbas_sector): 366 * Mid sectors (start at lbas_sector):
358 * 3. nr_lbas (u64) forming lba list 367 * 3. nr_lbas (u64) forming lba list
359 * Last sectors (start at vsc_sector): 368 * Last sectors (start at vsc_sector):
@@ -377,7 +386,15 @@ struct line_emeta {
377 __le32 next_id; /* Line id for next line */ 386 __le32 next_id; /* Line id for next line */
378 __le64 nr_lbas; /* Number of lbas mapped in line */ 387 __le64 nr_lbas; /* Number of lbas mapped in line */
379 __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ 388 __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
380 __le64 bb_bitmap[]; /* Updated bad block bitmap for line */ 389 __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
390};
391
392
393/* Write amplification counters stored on media */
394struct wa_counters {
395 __le64 user; /* Number of user written sectors */
396 __le64 gc; /* Number of sectors written by GC*/
397 __le64 pad; /* Number of padded sectors */
381}; 398};
382 399
383struct pblk_emeta { 400struct pblk_emeta {
@@ -410,6 +427,8 @@ struct pblk_line {
410 427
411 unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ 428 unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
412 429
430 struct nvm_chk_meta *chks; /* Chunks forming line */
431
413 struct pblk_smeta *smeta; /* Start metadata */ 432 struct pblk_smeta *smeta; /* Start metadata */
414 struct pblk_emeta *emeta; /* End medatada */ 433 struct pblk_emeta *emeta; /* End medatada */
415 434
@@ -507,10 +526,11 @@ struct pblk_line_meta {
507 unsigned int smeta_sec; /* Sectors needed for smeta */ 526 unsigned int smeta_sec; /* Sectors needed for smeta */
508 527
509 unsigned int emeta_len[4]; /* Lengths for emeta: 528 unsigned int emeta_len[4]; /* Lengths for emeta:
510 * [0]: Total length 529 * [0]: Total
511 * [1]: struct line_emeta length 530 * [1]: struct line_emeta +
512 * [2]: L2P portion length 531 * bb_bitmap + struct wa_counters
513 * [3]: vsc list length 532 * [2]: L2P portion
533 * [3]: vsc
514 */ 534 */
515 unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout 535 unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
516 * as emeta_len 536 * as emeta_len
@@ -534,21 +554,6 @@ struct pblk_line_meta {
534 unsigned int meta_distance; /* Distance between data and metadata */ 554 unsigned int meta_distance; /* Distance between data and metadata */
535}; 555};
536 556
537struct pblk_addr_format {
538 u64 ch_mask;
539 u64 lun_mask;
540 u64 pln_mask;
541 u64 blk_mask;
542 u64 pg_mask;
543 u64 sec_mask;
544 u8 ch_offset;
545 u8 lun_offset;
546 u8 pln_offset;
547 u8 blk_offset;
548 u8 pg_offset;
549 u8 sec_offset;
550};
551
552enum { 557enum {
553 PBLK_STATE_RUNNING = 0, 558 PBLK_STATE_RUNNING = 0,
554 PBLK_STATE_STOPPING = 1, 559 PBLK_STATE_STOPPING = 1,
@@ -556,6 +561,18 @@ enum {
556 PBLK_STATE_STOPPED = 3, 561 PBLK_STATE_STOPPED = 3,
557}; 562};
558 563
564/* Internal format to support not power-of-2 device formats */
565struct pblk_addrf {
566 /* gen to dev */
567 int sec_stripe;
568 int ch_stripe;
569 int lun_stripe;
570
571 /* dev to gen */
572 int sec_lun_stripe;
573 int sec_ws_stripe;
574};
575
559struct pblk { 576struct pblk {
560 struct nvm_tgt_dev *dev; 577 struct nvm_tgt_dev *dev;
561 struct gendisk *disk; 578 struct gendisk *disk;
@@ -568,8 +585,9 @@ struct pblk {
568 struct pblk_line_mgmt l_mg; /* Line management */ 585 struct pblk_line_mgmt l_mg; /* Line management */
569 struct pblk_line_meta lm; /* Line metadata */ 586 struct pblk_line_meta lm; /* Line metadata */
570 587
571 int ppaf_bitsize; 588 struct nvm_addrf addrf; /* Aligned address format */
572 struct pblk_addr_format ppaf; 589 struct pblk_addrf uaddrf; /* Unaligned address format */
590 int addrf_len;
573 591
574 struct pblk_rb rwb; 592 struct pblk_rb rwb;
575 593
@@ -592,12 +610,27 @@ struct pblk {
592 int sec_per_write; 610 int sec_per_write;
593 611
594 unsigned char instance_uuid[16]; 612 unsigned char instance_uuid[16];
613
614 /* Persistent write amplification counters, 4kb sector I/Os */
615 atomic64_t user_wa; /* Sectors written by user */
616 atomic64_t gc_wa; /* Sectors written by GC */
617 atomic64_t pad_wa; /* Padded sectors written */
618
619 /* Reset values for delta write amplification measurements */
620 u64 user_rst_wa;
621 u64 gc_rst_wa;
622 u64 pad_rst_wa;
623
624 /* Counters used for calculating padding distribution */
625 atomic64_t *pad_dist; /* Padding distribution buckets */
626 u64 nr_flush_rst; /* Flushes reset value for pad dist.*/
627 atomic64_t nr_flush; /* Number of flush/fua I/O */
628
595#ifdef CONFIG_NVM_DEBUG 629#ifdef CONFIG_NVM_DEBUG
596 /* All debug counters apply to 4kb sector I/Os */ 630 /* Non-persistent debug counters, 4kb sector I/Os */
597 atomic_long_t inflight_writes; /* Inflight writes (user and gc) */ 631 atomic_long_t inflight_writes; /* Inflight writes (user and gc) */
598 atomic_long_t padded_writes; /* Sectors padded due to flush/fua */ 632 atomic_long_t padded_writes; /* Sectors padded due to flush/fua */
599 atomic_long_t padded_wb; /* Sectors padded in write buffer */ 633 atomic_long_t padded_wb; /* Sectors padded in write buffer */
600 atomic_long_t nr_flush; /* Number of flush/fua I/O */
601 atomic_long_t req_writes; /* Sectors stored on write buffer */ 634 atomic_long_t req_writes; /* Sectors stored on write buffer */
602 atomic_long_t sub_writes; /* Sectors submitted from buffer */ 635 atomic_long_t sub_writes; /* Sectors submitted from buffer */
603 atomic_long_t sync_writes; /* Sectors synced to media */ 636 atomic_long_t sync_writes; /* Sectors synced to media */
@@ -712,6 +745,10 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
712int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, 745int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
713 struct pblk_c_ctx *c_ctx); 746 struct pblk_c_ctx *c_ctx);
714void pblk_discard(struct pblk *pblk, struct bio *bio); 747void pblk_discard(struct pblk *pblk, struct bio *bio);
748struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk);
749struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
750 struct nvm_chk_meta *lp,
751 struct ppa_addr ppa);
715void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); 752void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
716void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); 753void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
717int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); 754int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
@@ -888,6 +925,12 @@ static inline void *emeta_to_bb(struct line_emeta *emeta)
888 return emeta->bb_bitmap; 925 return emeta->bb_bitmap;
889} 926}
890 927
928static inline void *emeta_to_wa(struct pblk_line_meta *lm,
929 struct line_emeta *emeta)
930{
931 return emeta->bb_bitmap + lm->blk_bitmap_len;
932}
933
891static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta) 934static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
892{ 935{
893 return ((void *)emeta + pblk->lm.emeta_len[1]); 936 return ((void *)emeta + pblk->lm.emeta_len[1]);
@@ -903,38 +946,60 @@ static inline int pblk_line_vsc(struct pblk_line *line)
903 return le32_to_cpu(*line->vsc); 946 return le32_to_cpu(*line->vsc);
904} 947}
905 948
906#define NVM_MEM_PAGE_WRITE (8)
907
908static inline int pblk_pad_distance(struct pblk *pblk) 949static inline int pblk_pad_distance(struct pblk *pblk)
909{ 950{
910 struct nvm_tgt_dev *dev = pblk->dev; 951 struct nvm_tgt_dev *dev = pblk->dev;
911 struct nvm_geo *geo = &dev->geo; 952 struct nvm_geo *geo = &dev->geo;
912 953
913 return NVM_MEM_PAGE_WRITE * geo->all_luns * geo->sec_per_pl; 954 return geo->mw_cunits * geo->all_luns * geo->ws_opt;
914} 955}
915 956
916static inline int pblk_ppa_to_line(struct ppa_addr p) 957static inline int pblk_ppa_to_line(struct ppa_addr p)
917{ 958{
918 return p.g.blk; 959 return p.a.blk;
919} 960}
920 961
921static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) 962static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
922{ 963{
923 return p.g.lun * geo->nr_chnls + p.g.ch; 964 return p.a.lun * geo->num_ch + p.a.ch;
924} 965}
925 966
926static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, 967static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
927 u64 line_id) 968 u64 line_id)
928{ 969{
970 struct nvm_tgt_dev *dev = pblk->dev;
971 struct nvm_geo *geo = &dev->geo;
929 struct ppa_addr ppa; 972 struct ppa_addr ppa;
930 973
931 ppa.ppa = 0; 974 if (geo->version == NVM_OCSSD_SPEC_12) {
932 ppa.g.blk = line_id; 975 struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
933 ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; 976
934 ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; 977 ppa.ppa = 0;
935 ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; 978 ppa.g.blk = line_id;
936 ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; 979 ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset;
937 ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; 980 ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset;
981 ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset;
982 ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset;
983 ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset;
984 } else {
985 struct pblk_addrf *uaddrf = &pblk->uaddrf;
986 int secs, chnls, luns;
987
988 ppa.ppa = 0;
989
990 ppa.m.chk = line_id;
991
992 paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs);
993 ppa.m.sec = secs;
994
995 paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls);
996 ppa.m.grp = chnls;
997
998 paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns);
999 ppa.m.pu = luns;
1000
1001 ppa.m.sec += uaddrf->sec_stripe * paddr;
1002 }
938 1003
939 return ppa; 1004 return ppa;
940} 1005}
@@ -942,13 +1007,30 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
942static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, 1007static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
943 struct ppa_addr p) 1008 struct ppa_addr p)
944{ 1009{
1010 struct nvm_tgt_dev *dev = pblk->dev;
1011 struct nvm_geo *geo = &dev->geo;
945 u64 paddr; 1012 u64 paddr;
946 1013
947 paddr = (u64)p.g.pg << pblk->ppaf.pg_offset; 1014 if (geo->version == NVM_OCSSD_SPEC_12) {
948 paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; 1015 struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
949 paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; 1016
950 paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; 1017 paddr = (u64)p.g.ch << ppaf->ch_offset;
951 paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; 1018 paddr |= (u64)p.g.lun << ppaf->lun_offset;
1019 paddr |= (u64)p.g.pg << ppaf->pg_offset;
1020 paddr |= (u64)p.g.pl << ppaf->pln_offset;
1021 paddr |= (u64)p.g.sec << ppaf->sec_offset;
1022 } else {
1023 struct pblk_addrf *uaddrf = &pblk->uaddrf;
1024 u64 secs = p.m.sec;
1025 int sec_stripe;
1026
1027 paddr = (u64)p.m.grp * uaddrf->sec_stripe;
1028 paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe;
1029
1030 secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe);
1031 paddr += secs * uaddrf->sec_ws_stripe;
1032 paddr += sec_stripe;
1033 }
952 1034
953 return paddr; 1035 return paddr;
954} 1036}
@@ -965,18 +1047,37 @@ static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
965 ppa64.c.line = ppa32 & ((~0U) >> 1); 1047 ppa64.c.line = ppa32 & ((~0U) >> 1);
966 ppa64.c.is_cached = 1; 1048 ppa64.c.is_cached = 1;
967 } else { 1049 } else {
968 ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >> 1050 struct nvm_tgt_dev *dev = pblk->dev;
969 pblk->ppaf.blk_offset; 1051 struct nvm_geo *geo = &dev->geo;
970 ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >> 1052
971 pblk->ppaf.pg_offset; 1053 if (geo->version == NVM_OCSSD_SPEC_12) {
972 ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >> 1054 struct nvm_addrf_12 *ppaf =
973 pblk->ppaf.lun_offset; 1055 (struct nvm_addrf_12 *)&pblk->addrf;
974 ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >> 1056
975 pblk->ppaf.ch_offset; 1057 ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
976 ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >> 1058 ppaf->ch_offset;
977 pblk->ppaf.pln_offset; 1059 ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
978 ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >> 1060 ppaf->lun_offset;
979 pblk->ppaf.sec_offset; 1061 ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
1062 ppaf->blk_offset;
1063 ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
1064 ppaf->pg_offset;
1065 ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
1066 ppaf->pln_offset;
1067 ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
1068 ppaf->sec_offset;
1069 } else {
1070 struct nvm_addrf *lbaf = &pblk->addrf;
1071
1072 ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
1073 lbaf->ch_offset;
1074 ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
1075 lbaf->lun_offset;
1076 ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
1077 lbaf->chk_offset;
1078 ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
1079 lbaf->sec_offset;
1080 }
980 } 1081 }
981 1082
982 return ppa64; 1083 return ppa64;
@@ -992,12 +1093,27 @@ static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
992 ppa32 |= ppa64.c.line; 1093 ppa32 |= ppa64.c.line;
993 ppa32 |= 1U << 31; 1094 ppa32 |= 1U << 31;
994 } else { 1095 } else {
995 ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset; 1096 struct nvm_tgt_dev *dev = pblk->dev;
996 ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset; 1097 struct nvm_geo *geo = &dev->geo;
997 ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset; 1098
998 ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset; 1099 if (geo->version == NVM_OCSSD_SPEC_12) {
999 ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset; 1100 struct nvm_addrf_12 *ppaf =
1000 ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset; 1101 (struct nvm_addrf_12 *)&pblk->addrf;
1102
1103 ppa32 |= ppa64.g.ch << ppaf->ch_offset;
1104 ppa32 |= ppa64.g.lun << ppaf->lun_offset;
1105 ppa32 |= ppa64.g.blk << ppaf->blk_offset;
1106 ppa32 |= ppa64.g.pg << ppaf->pg_offset;
1107 ppa32 |= ppa64.g.pl << ppaf->pln_offset;
1108 ppa32 |= ppa64.g.sec << ppaf->sec_offset;
1109 } else {
1110 struct nvm_addrf *lbaf = &pblk->addrf;
1111
1112 ppa32 |= ppa64.m.grp << lbaf->ch_offset;
1113 ppa32 |= ppa64.m.pu << lbaf->lun_offset;
1114 ppa32 |= ppa64.m.chk << lbaf->chk_offset;
1115 ppa32 |= ppa64.m.sec << lbaf->sec_offset;
1116 }
1001 } 1117 }
1002 1118
1003 return ppa32; 1119 return ppa32;
@@ -1008,7 +1124,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
1008{ 1124{
1009 struct ppa_addr ppa; 1125 struct ppa_addr ppa;
1010 1126
1011 if (pblk->ppaf_bitsize < 32) { 1127 if (pblk->addrf_len < 32) {
1012 u32 *map = (u32 *)pblk->trans_map; 1128 u32 *map = (u32 *)pblk->trans_map;
1013 1129
1014 ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); 1130 ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
@@ -1024,7 +1140,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
1024static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, 1140static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
1025 struct ppa_addr ppa) 1141 struct ppa_addr ppa)
1026{ 1142{
1027 if (pblk->ppaf_bitsize < 32) { 1143 if (pblk->addrf_len < 32) {
1028 u32 *map = (u32 *)pblk->trans_map; 1144 u32 *map = (u32 *)pblk->trans_map;
1029 1145
1030 map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); 1146 map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
@@ -1115,7 +1231,10 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
1115 struct nvm_geo *geo = &dev->geo; 1231 struct nvm_geo *geo = &dev->geo;
1116 int flags; 1232 int flags;
1117 1233
1118 flags = geo->plane_mode >> 1; 1234 if (geo->version == NVM_OCSSD_SPEC_20)
1235 return 0;
1236
1237 flags = geo->pln_mode >> 1;
1119 1238
1120 if (type == PBLK_WRITE) 1239 if (type == PBLK_WRITE)
1121 flags |= NVM_IO_SCRAMBLE_ENABLE; 1240 flags |= NVM_IO_SCRAMBLE_ENABLE;
@@ -1134,9 +1253,12 @@ static inline int pblk_set_read_mode(struct pblk *pblk, int type)
1134 struct nvm_geo *geo = &dev->geo; 1253 struct nvm_geo *geo = &dev->geo;
1135 int flags; 1254 int flags;
1136 1255
1256 if (geo->version == NVM_OCSSD_SPEC_20)
1257 return 0;
1258
1137 flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; 1259 flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
1138 if (type == PBLK_READ_SEQUENTIAL) 1260 if (type == PBLK_READ_SEQUENTIAL)
1139 flags |= geo->plane_mode >> 1; 1261 flags |= geo->pln_mode >> 1;
1140 1262
1141 return flags; 1263 return flags;
1142} 1264}
@@ -1147,16 +1269,21 @@ static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
1147} 1269}
1148 1270
1149#ifdef CONFIG_NVM_DEBUG 1271#ifdef CONFIG_NVM_DEBUG
1150static inline void print_ppa(struct ppa_addr *p, char *msg, int error) 1272static inline void print_ppa(struct nvm_geo *geo, struct ppa_addr *p,
1273 char *msg, int error)
1151{ 1274{
1152 if (p->c.is_cached) { 1275 if (p->c.is_cached) {
1153 pr_err("ppa: (%s: %x) cache line: %llu\n", 1276 pr_err("ppa: (%s: %x) cache line: %llu\n",
1154 msg, error, (u64)p->c.line); 1277 msg, error, (u64)p->c.line);
1155 } else { 1278 } else if (geo->version == NVM_OCSSD_SPEC_12) {
1156 pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n", 1279 pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
1157 msg, error, 1280 msg, error,
1158 p->g.ch, p->g.lun, p->g.blk, 1281 p->g.ch, p->g.lun, p->g.blk,
1159 p->g.pg, p->g.pl, p->g.sec); 1282 p->g.pg, p->g.pl, p->g.sec);
1283 } else {
1284 pr_err("ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n",
1285 msg, error,
1286 p->m.grp, p->m.pu, p->m.chk, p->m.sec);
1160 } 1287 }
1161} 1288}
1162 1289
@@ -1166,13 +1293,13 @@ static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
1166 int bit = -1; 1293 int bit = -1;
1167 1294
1168 if (rqd->nr_ppas == 1) { 1295 if (rqd->nr_ppas == 1) {
1169 print_ppa(&rqd->ppa_addr, "rqd", error); 1296 print_ppa(&pblk->dev->geo, &rqd->ppa_addr, "rqd", error);
1170 return; 1297 return;
1171 } 1298 }
1172 1299
1173 while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas, 1300 while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
1174 bit + 1)) < rqd->nr_ppas) { 1301 bit + 1)) < rqd->nr_ppas) {
1175 print_ppa(&rqd->ppa_list[bit], "rqd", error); 1302 print_ppa(&pblk->dev->geo, &rqd->ppa_list[bit], "rqd", error);
1176 } 1303 }
1177 1304
1178 pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status); 1305 pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
@@ -1188,16 +1315,25 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
1188 for (i = 0; i < nr_ppas; i++) { 1315 for (i = 0; i < nr_ppas; i++) {
1189 ppa = &ppas[i]; 1316 ppa = &ppas[i];
1190 1317
1191 if (!ppa->c.is_cached && 1318 if (geo->version == NVM_OCSSD_SPEC_12) {
1192 ppa->g.ch < geo->nr_chnls && 1319 if (!ppa->c.is_cached &&
1193 ppa->g.lun < geo->nr_luns && 1320 ppa->g.ch < geo->num_ch &&
1194 ppa->g.pl < geo->nr_planes && 1321 ppa->g.lun < geo->num_lun &&
1195 ppa->g.blk < geo->nr_chks && 1322 ppa->g.pl < geo->num_pln &&
1196 ppa->g.pg < geo->ws_per_chk && 1323 ppa->g.blk < geo->num_chk &&
1197 ppa->g.sec < geo->sec_per_pg) 1324 ppa->g.pg < geo->num_pg &&
1198 continue; 1325 ppa->g.sec < geo->ws_min)
1326 continue;
1327 } else {
1328 if (!ppa->c.is_cached &&
1329 ppa->m.grp < geo->num_ch &&
1330 ppa->m.pu < geo->num_lun &&
1331 ppa->m.chk < geo->num_chk &&
1332 ppa->m.sec < geo->clba)
1333 continue;
1334 }
1199 1335
1200 print_ppa(ppa, "boundary", i); 1336 print_ppa(geo, ppa, "boundary", i);
1201 1337
1202 return 1; 1338 return 1;
1203 } 1339 }
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 458e1d38577d..004cc3cc6123 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -287,7 +287,8 @@ do { \
287 break; \ 287 break; \
288 \ 288 \
289 mutex_unlock(&(ca)->set->bucket_lock); \ 289 mutex_unlock(&(ca)->set->bucket_lock); \
290 if (kthread_should_stop()) { \ 290 if (kthread_should_stop() || \
291 test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
291 set_current_state(TASK_RUNNING); \ 292 set_current_state(TASK_RUNNING); \
292 return 0; \ 293 return 0; \
293 } \ 294 } \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 12e5197f186c..d338b7086013 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -188,6 +188,7 @@
188#include <linux/refcount.h> 188#include <linux/refcount.h>
189#include <linux/types.h> 189#include <linux/types.h>
190#include <linux/workqueue.h> 190#include <linux/workqueue.h>
191#include <linux/kthread.h>
191 192
192#include "bset.h" 193#include "bset.h"
193#include "util.h" 194#include "util.h"
@@ -258,10 +259,11 @@ struct bcache_device {
258 struct gendisk *disk; 259 struct gendisk *disk;
259 260
260 unsigned long flags; 261 unsigned long flags;
261#define BCACHE_DEV_CLOSING 0 262#define BCACHE_DEV_CLOSING 0
262#define BCACHE_DEV_DETACHING 1 263#define BCACHE_DEV_DETACHING 1
263#define BCACHE_DEV_UNLINK_DONE 2 264#define BCACHE_DEV_UNLINK_DONE 2
264 265#define BCACHE_DEV_WB_RUNNING 3
266#define BCACHE_DEV_RATE_DW_RUNNING 4
265 unsigned nr_stripes; 267 unsigned nr_stripes;
266 unsigned stripe_size; 268 unsigned stripe_size;
267 atomic_t *stripe_sectors_dirty; 269 atomic_t *stripe_sectors_dirty;
@@ -286,6 +288,12 @@ struct io {
286 sector_t last; 288 sector_t last;
287}; 289};
288 290
291enum stop_on_failure {
292 BCH_CACHED_DEV_STOP_AUTO = 0,
293 BCH_CACHED_DEV_STOP_ALWAYS,
294 BCH_CACHED_DEV_STOP_MODE_MAX,
295};
296
289struct cached_dev { 297struct cached_dev {
290 struct list_head list; 298 struct list_head list;
291 struct bcache_device disk; 299 struct bcache_device disk;
@@ -359,6 +367,7 @@ struct cached_dev {
359 unsigned sequential_cutoff; 367 unsigned sequential_cutoff;
360 unsigned readahead; 368 unsigned readahead;
361 369
370 unsigned io_disable:1;
362 unsigned verify:1; 371 unsigned verify:1;
363 unsigned bypass_torture_test:1; 372 unsigned bypass_torture_test:1;
364 373
@@ -378,6 +387,11 @@ struct cached_dev {
378 unsigned writeback_rate_i_term_inverse; 387 unsigned writeback_rate_i_term_inverse;
379 unsigned writeback_rate_p_term_inverse; 388 unsigned writeback_rate_p_term_inverse;
380 unsigned writeback_rate_minimum; 389 unsigned writeback_rate_minimum;
390
391 enum stop_on_failure stop_when_cache_set_failed;
392#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
393 atomic_t io_errors;
394 unsigned error_limit;
381}; 395};
382 396
383enum alloc_reserve { 397enum alloc_reserve {
@@ -474,10 +488,15 @@ struct gc_stat {
474 * 488 *
475 * CACHE_SET_RUNNING means all cache devices have been registered and journal 489 * CACHE_SET_RUNNING means all cache devices have been registered and journal
476 * replay is complete. 490 * replay is complete.
491 *
492 * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
493 * external and internal I/O should be denied when this flag is set.
494 *
477 */ 495 */
478#define CACHE_SET_UNREGISTERING 0 496#define CACHE_SET_UNREGISTERING 0
479#define CACHE_SET_STOPPING 1 497#define CACHE_SET_STOPPING 1
480#define CACHE_SET_RUNNING 2 498#define CACHE_SET_RUNNING 2
499#define CACHE_SET_IO_DISABLE 3
481 500
482struct cache_set { 501struct cache_set {
483 struct closure cl; 502 struct closure cl;
@@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c)
867 wake_up_process(ca->alloc_thread); 886 wake_up_process(ca->alloc_thread);
868} 887}
869 888
889static inline void closure_bio_submit(struct cache_set *c,
890 struct bio *bio,
891 struct closure *cl)
892{
893 closure_get(cl);
894 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
895 bio->bi_status = BLK_STS_IOERR;
896 bio_endio(bio);
897 return;
898 }
899 generic_make_request(bio);
900}
901
902/*
903 * Prevent the kthread exits directly, and make sure when kthread_stop()
904 * is called to stop a kthread, it is still alive. If a kthread might be
905 * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
906 * necessary before the kthread returns.
907 */
908static inline void wait_for_kthread_stop(void)
909{
910 while (!kthread_should_stop()) {
911 set_current_state(TASK_INTERRUPTIBLE);
912 schedule();
913 }
914}
915
870/* Forward declarations */ 916/* Forward declarations */
871 917
918void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
872void bch_count_io_errors(struct cache *, blk_status_t, int, const char *); 919void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
873void bch_bbio_count_io_errors(struct cache_set *, struct bio *, 920void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
874 blk_status_t, const char *); 921 blk_status_t, const char *);
@@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
896 struct bkey *, int, bool); 943 struct bkey *, int, bool);
897bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned, 944bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
898 unsigned, unsigned, bool); 945 unsigned, unsigned, bool);
946bool bch_cached_dev_error(struct cached_dev *dc);
899 947
900__printf(2, 3) 948__printf(2, 3)
901bool bch_cache_set_error(struct cache_set *, const char *, ...); 949bool bch_cache_set_error(struct cache_set *, const char *, ...);
@@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
905 953
906extern struct workqueue_struct *bcache_wq; 954extern struct workqueue_struct *bcache_wq;
907extern const char * const bch_cache_modes[]; 955extern const char * const bch_cache_modes[];
956extern const char * const bch_stop_on_failure_modes[];
908extern struct mutex bch_register_lock; 957extern struct mutex bch_register_lock;
909extern struct list_head bch_cache_sets; 958extern struct list_head bch_cache_sets;
910 959
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e56d3ecdbfcb..579c696a5fe0 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init);
1072static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, 1072static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
1073 btree_iter_cmp_fn *cmp) 1073 btree_iter_cmp_fn *cmp)
1074{ 1074{
1075 struct btree_iter_set unused; 1075 struct btree_iter_set b __maybe_unused;
1076 struct bkey *ret = NULL; 1076 struct bkey *ret = NULL;
1077 1077
1078 if (!btree_iter_end(iter)) { 1078 if (!btree_iter_end(iter)) {
@@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
1087 } 1087 }
1088 1088
1089 if (iter->data->k == iter->data->end) 1089 if (iter->data->k == iter->data->end)
1090 heap_pop(iter, unused, cmp); 1090 heap_pop(iter, b, cmp);
1091 else 1091 else
1092 heap_sift(iter, 0, cmp); 1092 heap_sift(iter, 0, cmp);
1093 } 1093 }
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index fa506c1aa524..0c24280f3b98 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
531#ifdef CONFIG_BCACHE_DEBUG 531#ifdef CONFIG_BCACHE_DEBUG
532 532
533int __bch_count_data(struct btree_keys *); 533int __bch_count_data(struct btree_keys *);
534void __bch_check_keys(struct btree_keys *, const char *, ...); 534void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
535void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); 535void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
536void bch_dump_bucket(struct btree_keys *); 536void bch_dump_bucket(struct btree_keys *);
537 537
538#else 538#else
539 539
540static inline int __bch_count_data(struct btree_keys *b) { return -1; } 540static inline int __bch_count_data(struct btree_keys *b) { return -1; }
541static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {} 541static inline void __printf(2, 3)
542 __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
542static inline void bch_dump_bucket(struct btree_keys *b) {} 543static inline void bch_dump_bucket(struct btree_keys *b) {}
543void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); 544void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
544 545
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fad9fe8817eb..17936b2dc7d6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
665 struct btree *b, *t; 665 struct btree *b, *t;
666 unsigned long i, nr = sc->nr_to_scan; 666 unsigned long i, nr = sc->nr_to_scan;
667 unsigned long freed = 0; 667 unsigned long freed = 0;
668 unsigned int btree_cache_used;
668 669
669 if (c->shrinker_disabled) 670 if (c->shrinker_disabled)
670 return SHRINK_STOP; 671 return SHRINK_STOP;
@@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
689 nr = min_t(unsigned long, nr, mca_can_free(c)); 690 nr = min_t(unsigned long, nr, mca_can_free(c));
690 691
691 i = 0; 692 i = 0;
693 btree_cache_used = c->btree_cache_used;
692 list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { 694 list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
693 if (freed >= nr) 695 if (nr <= 0)
694 break; 696 goto out;
695 697
696 if (++i > 3 && 698 if (++i > 3 &&
697 !mca_reap(b, 0, false)) { 699 !mca_reap(b, 0, false)) {
@@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
699 rw_unlock(true, b); 701 rw_unlock(true, b);
700 freed++; 702 freed++;
701 } 703 }
704 nr--;
702 } 705 }
703 706
704 for (i = 0; (nr--) && i < c->btree_cache_used; i++) { 707 for (; (nr--) && i < btree_cache_used; i++) {
705 if (list_empty(&c->btree_cache)) 708 if (list_empty(&c->btree_cache))
706 goto out; 709 goto out;
707 710
@@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
719 } 722 }
720out: 723out:
721 mutex_unlock(&c->bucket_lock); 724 mutex_unlock(&c->bucket_lock);
722 return freed; 725 return freed * c->btree_pages;
723} 726}
724 727
725static unsigned long bch_mca_count(struct shrinker *shrink, 728static unsigned long bch_mca_count(struct shrinker *shrink,
@@ -959,7 +962,7 @@ err:
959 return b; 962 return b;
960} 963}
961 964
962/** 965/*
963 * bch_btree_node_get - find a btree node in the cache and lock it, reading it 966 * bch_btree_node_get - find a btree node in the cache and lock it, reading it
964 * in from disk if necessary. 967 * in from disk if necessary.
965 * 968 *
@@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c)
1744 1747
1745 btree_gc_start(c); 1748 btree_gc_start(c);
1746 1749
1750 /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
1747 do { 1751 do {
1748 ret = btree_root(gc_root, c, &op, &writes, &stats); 1752 ret = btree_root(gc_root, c, &op, &writes, &stats);
1749 closure_sync(&writes); 1753 closure_sync(&writes);
@@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c)
1751 1755
1752 if (ret && ret != -EAGAIN) 1756 if (ret && ret != -EAGAIN)
1753 pr_warn("gc failed!"); 1757 pr_warn("gc failed!");
1754 } while (ret); 1758 } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
1755 1759
1756 bch_btree_gc_finish(c); 1760 bch_btree_gc_finish(c);
1757 wake_up_allocators(c); 1761 wake_up_allocators(c);
@@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg)
1789 1793
1790 while (1) { 1794 while (1) {
1791 wait_event_interruptible(c->gc_wait, 1795 wait_event_interruptible(c->gc_wait,
1792 kthread_should_stop() || gc_should_run(c)); 1796 kthread_should_stop() ||
1797 test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
1798 gc_should_run(c));
1793 1799
1794 if (kthread_should_stop()) 1800 if (kthread_should_stop() ||
1801 test_bit(CACHE_SET_IO_DISABLE, &c->flags))
1795 break; 1802 break;
1796 1803
1797 set_gc_sectors(c); 1804 set_gc_sectors(c);
1798 bch_btree_gc(c); 1805 bch_btree_gc(c);
1799 } 1806 }
1800 1807
1808 wait_for_kthread_stop();
1801 return 0; 1809 return 0;
1802} 1810}
1803 1811
@@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
2170 2178
2171 if (b->key.ptr[0] != btree_ptr || 2179 if (b->key.ptr[0] != btree_ptr ||
2172 b->seq != seq + 1) { 2180 b->seq != seq + 1) {
2173 op->lock = b->level; 2181 op->lock = b->level;
2174 goto out; 2182 goto out;
2175 } 2183 }
2176 } 2184 }
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 7f12920c14f7..0e14969182c6 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v)
46} 46}
47EXPORT_SYMBOL(closure_sub); 47EXPORT_SYMBOL(closure_sub);
48 48
49/** 49/*
50 * closure_put - decrement a closure's refcount 50 * closure_put - decrement a closure's refcount
51 */ 51 */
52void closure_put(struct closure *cl) 52void closure_put(struct closure *cl)
@@ -55,7 +55,7 @@ void closure_put(struct closure *cl)
55} 55}
56EXPORT_SYMBOL(closure_put); 56EXPORT_SYMBOL(closure_put);
57 57
58/** 58/*
59 * closure_wake_up - wake up all closures on a wait list, without memory barrier 59 * closure_wake_up - wake up all closures on a wait list, without memory barrier
60 */ 60 */
61void __closure_wake_up(struct closure_waitlist *wait_list) 61void __closure_wake_up(struct closure_waitlist *wait_list)
@@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up);
79 79
80/** 80/**
81 * closure_wait - add a closure to a waitlist 81 * closure_wait - add a closure to a waitlist
82 * 82 * @waitlist: will own a ref on @cl, which will be released when
83 * @waitlist will own a ref on @cl, which will be released when
84 * closure_wake_up() is called on @waitlist. 83 * closure_wake_up() is called on @waitlist.
84 * @cl: closure pointer.
85 * 85 *
86 */ 86 */
87bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) 87bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
@@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
157} 157}
158EXPORT_SYMBOL(closure_debug_destroy); 158EXPORT_SYMBOL(closure_debug_destroy);
159 159
160static struct dentry *debug; 160static struct dentry *closure_debug;
161 161
162static int debug_seq_show(struct seq_file *f, void *data) 162static int debug_seq_show(struct seq_file *f, void *data)
163{ 163{
@@ -199,11 +199,12 @@ static const struct file_operations debug_ops = {
199 .release = single_release 199 .release = single_release
200}; 200};
201 201
202void __init closure_debug_init(void) 202int __init closure_debug_init(void)
203{ 203{
204 debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops); 204 closure_debug = debugfs_create_file("closures",
205 0400, bcache_debug, NULL, &debug_ops);
206 return IS_ERR_OR_NULL(closure_debug);
205} 207}
206
207#endif 208#endif
208 209
209MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>"); 210MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 3b9dfc9962ad..71427eb5fdae 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -105,6 +105,7 @@
105struct closure; 105struct closure;
106struct closure_syncer; 106struct closure_syncer;
107typedef void (closure_fn) (struct closure *); 107typedef void (closure_fn) (struct closure *);
108extern struct dentry *bcache_debug;
108 109
109struct closure_waitlist { 110struct closure_waitlist {
110 struct llist_head list; 111 struct llist_head list;
@@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
185 186
186#ifdef CONFIG_BCACHE_CLOSURES_DEBUG 187#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
187 188
188void closure_debug_init(void); 189int closure_debug_init(void);
189void closure_debug_create(struct closure *cl); 190void closure_debug_create(struct closure *cl);
190void closure_debug_destroy(struct closure *cl); 191void closure_debug_destroy(struct closure *cl);
191 192
192#else 193#else
193 194
194static inline void closure_debug_init(void) {} 195static inline int closure_debug_init(void) { return 0; }
195static inline void closure_debug_create(struct closure *cl) {} 196static inline void closure_debug_create(struct closure *cl) {}
196static inline void closure_debug_destroy(struct closure *cl) {} 197static inline void closure_debug_destroy(struct closure *cl) {}
197 198
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index af89408befe8..028f7b386e01 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -17,7 +17,7 @@
17#include <linux/random.h> 17#include <linux/random.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19 19
20static struct dentry *debug; 20struct dentry *bcache_debug;
21 21
22#ifdef CONFIG_BCACHE_DEBUG 22#ifdef CONFIG_BCACHE_DEBUG
23 23
@@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = {
232 232
233void bch_debug_init_cache_set(struct cache_set *c) 233void bch_debug_init_cache_set(struct cache_set *c)
234{ 234{
235 if (!IS_ERR_OR_NULL(debug)) { 235 if (!IS_ERR_OR_NULL(bcache_debug)) {
236 char name[50]; 236 char name[50];
237 snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); 237 snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
238 238
239 c->debug = debugfs_create_file(name, 0400, debug, c, 239 c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
240 &cache_set_debug_ops); 240 &cache_set_debug_ops);
241 } 241 }
242} 242}
@@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c)
245 245
246void bch_debug_exit(void) 246void bch_debug_exit(void)
247{ 247{
248 if (!IS_ERR_OR_NULL(debug)) 248 if (!IS_ERR_OR_NULL(bcache_debug))
249 debugfs_remove_recursive(debug); 249 debugfs_remove_recursive(bcache_debug);
250} 250}
251 251
252int __init bch_debug_init(struct kobject *kobj) 252int __init bch_debug_init(struct kobject *kobj)
253{ 253{
254 debug = debugfs_create_dir("bcache", NULL); 254 bcache_debug = debugfs_create_dir("bcache", NULL);
255 255
256 return IS_ERR_OR_NULL(debug); 256 return IS_ERR_OR_NULL(bcache_debug);
257} 257}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index f9d391711595..c334e6666461 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -534,7 +534,6 @@ err:
534static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) 534static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
535{ 535{
536 struct btree *b = container_of(bk, struct btree, keys); 536 struct btree *b = container_of(bk, struct btree, keys);
537 struct bucket *g;
538 unsigned i, stale; 537 unsigned i, stale;
539 538
540 if (!KEY_PTRS(k) || 539 if (!KEY_PTRS(k) ||
@@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
549 return false; 548 return false;
550 549
551 for (i = 0; i < KEY_PTRS(k); i++) { 550 for (i = 0; i < KEY_PTRS(k); i++) {
552 g = PTR_BUCKET(b->c, k, i);
553 stale = ptr_stale(b->c, k, i); 551 stale = ptr_stale(b->c, k, i);
554 552
555 btree_bug_on(stale > 96, b, 553 btree_bug_on(stale > 96, b,
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index a783c5a41ff1..7fac97ae036e 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
38 bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); 38 bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
39 39
40 b->submit_time_us = local_clock_us(); 40 b->submit_time_us = local_clock_us();
41 closure_bio_submit(bio, bio->bi_private); 41 closure_bio_submit(c, bio, bio->bi_private);
42} 42}
43 43
44void bch_submit_bbio(struct bio *bio, struct cache_set *c, 44void bch_submit_bbio(struct bio *bio, struct cache_set *c,
@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
50} 50}
51 51
52/* IO errors */ 52/* IO errors */
53void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
54{
55 char buf[BDEVNAME_SIZE];
56 unsigned errors;
57
58 WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
59
60 errors = atomic_add_return(1, &dc->io_errors);
61 if (errors < dc->error_limit)
62 pr_err("%s: IO error on backing device, unrecoverable",
63 bio_devname(bio, buf));
64 else
65 bch_cached_dev_error(dc);
66}
53 67
54void bch_count_io_errors(struct cache *ca, 68void bch_count_io_errors(struct cache *ca,
55 blk_status_t error, 69 blk_status_t error,
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1b736b860739..18f1b5239620 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
62 bio_set_op_attrs(bio, REQ_OP_READ, 0); 62 bio_set_op_attrs(bio, REQ_OP_READ, 0);
63 bch_bio_map(bio, data); 63 bch_bio_map(bio, data);
64 64
65 closure_bio_submit(bio, &cl); 65 closure_bio_submit(ca->set, bio, &cl);
66 closure_sync(&cl); 66 closure_sync(&cl);
67 67
68 /* This function could be simpler now since we no longer write 68 /* This function could be simpler now since we no longer write
@@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c)
493 struct cache *ca; 493 struct cache *ca;
494 uint64_t last_seq; 494 uint64_t last_seq;
495 unsigned iter, n = 0; 495 unsigned iter, n = 0;
496 atomic_t p; 496 atomic_t p __maybe_unused;
497 497
498 atomic_long_inc(&c->reclaim); 498 atomic_long_inc(&c->reclaim);
499 499
@@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl)
594} 594}
595 595
596static void journal_write_unlock(struct closure *cl) 596static void journal_write_unlock(struct closure *cl)
597 __releases(&c->journal.lock)
597{ 598{
598 struct cache_set *c = container_of(cl, struct cache_set, journal.io); 599 struct cache_set *c = container_of(cl, struct cache_set, journal.io);
599 600
@@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl)
674 spin_unlock(&c->journal.lock); 675 spin_unlock(&c->journal.lock);
675 676
676 while ((bio = bio_list_pop(&list))) 677 while ((bio = bio_list_pop(&list)))
677 closure_bio_submit(bio, cl); 678 closure_bio_submit(c, bio, cl);
678 679
679 continue_at(cl, journal_write_done, NULL); 680 continue_at(cl, journal_write_done, NULL);
680} 681}
@@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c)
705 706
706static struct journal_write *journal_wait_for_write(struct cache_set *c, 707static struct journal_write *journal_wait_for_write(struct cache_set *c,
707 unsigned nkeys) 708 unsigned nkeys)
709 __acquires(&c->journal.lock)
708{ 710{
709 size_t sectors; 711 size_t sectors;
710 struct closure cl; 712 struct closure cl;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6422846b546e..a65e3365eeb9 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
139 } 139 }
140 140
141 op->insert_data_done = true; 141 op->insert_data_done = true;
142 /* get in bch_data_insert() */
142 bio_put(bio); 143 bio_put(bio);
143out: 144out:
144 continue_at(cl, bch_data_insert_keys, op->wq); 145 continue_at(cl, bch_data_insert_keys, op->wq);
@@ -295,6 +296,7 @@ err:
295 296
296/** 297/**
297 * bch_data_insert - stick some data in the cache 298 * bch_data_insert - stick some data in the cache
299 * @cl: closure pointer.
298 * 300 *
299 * This is the starting point for any data to end up in a cache device; it could 301 * This is the starting point for any data to end up in a cache device; it could
300 * be from a normal write, or a writeback write, or a write to a flash only 302 * be from a normal write, or a writeback write, or a write to a flash only
@@ -630,6 +632,41 @@ static void request_endio(struct bio *bio)
630 closure_put(cl); 632 closure_put(cl);
631} 633}
632 634
635static void backing_request_endio(struct bio *bio)
636{
637 struct closure *cl = bio->bi_private;
638
639 if (bio->bi_status) {
640 struct search *s = container_of(cl, struct search, cl);
641 struct cached_dev *dc = container_of(s->d,
642 struct cached_dev, disk);
643 /*
644 * If a bio has REQ_PREFLUSH for writeback mode, it is
645 * speically assembled in cached_dev_write() for a non-zero
646 * write request which has REQ_PREFLUSH. we don't set
647 * s->iop.status by this failure, the status will be decided
648 * by result of bch_data_insert() operation.
649 */
650 if (unlikely(s->iop.writeback &&
651 bio->bi_opf & REQ_PREFLUSH)) {
652 char buf[BDEVNAME_SIZE];
653
654 bio_devname(bio, buf);
655 pr_err("Can't flush %s: returned bi_status %i",
656 buf, bio->bi_status);
657 } else {
658 /* set to orig_bio->bi_status in bio_complete() */
659 s->iop.status = bio->bi_status;
660 }
661 s->recoverable = false;
662 /* should count I/O error for backing device here */
663 bch_count_backing_io_errors(dc, bio);
664 }
665
666 bio_put(bio);
667 closure_put(cl);
668}
669
633static void bio_complete(struct search *s) 670static void bio_complete(struct search *s)
634{ 671{
635 if (s->orig_bio) { 672 if (s->orig_bio) {
@@ -644,13 +681,21 @@ static void bio_complete(struct search *s)
644 } 681 }
645} 682}
646 683
647static void do_bio_hook(struct search *s, struct bio *orig_bio) 684static void do_bio_hook(struct search *s,
685 struct bio *orig_bio,
686 bio_end_io_t *end_io_fn)
648{ 687{
649 struct bio *bio = &s->bio.bio; 688 struct bio *bio = &s->bio.bio;
650 689
651 bio_init(bio, NULL, 0); 690 bio_init(bio, NULL, 0);
652 __bio_clone_fast(bio, orig_bio); 691 __bio_clone_fast(bio, orig_bio);
653 bio->bi_end_io = request_endio; 692 /*
693 * bi_end_io can be set separately somewhere else, e.g. the
694 * variants in,
695 * - cache_bio->bi_end_io from cached_dev_cache_miss()
696 * - n->bi_end_io from cache_lookup_fn()
697 */
698 bio->bi_end_io = end_io_fn;
654 bio->bi_private = &s->cl; 699 bio->bi_private = &s->cl;
655 700
656 bio_cnt_set(bio, 3); 701 bio_cnt_set(bio, 3);
@@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio,
676 s = mempool_alloc(d->c->search, GFP_NOIO); 721 s = mempool_alloc(d->c->search, GFP_NOIO);
677 722
678 closure_init(&s->cl, NULL); 723 closure_init(&s->cl, NULL);
679 do_bio_hook(s, bio); 724 do_bio_hook(s, bio, request_endio);
680 725
681 s->orig_bio = bio; 726 s->orig_bio = bio;
682 s->cache_miss = NULL; 727 s->cache_miss = NULL;
@@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl)
743 trace_bcache_read_retry(s->orig_bio); 788 trace_bcache_read_retry(s->orig_bio);
744 789
745 s->iop.status = 0; 790 s->iop.status = 0;
746 do_bio_hook(s, s->orig_bio); 791 do_bio_hook(s, s->orig_bio, backing_request_endio);
747 792
748 /* XXX: invalidate cache */ 793 /* XXX: invalidate cache */
749 794
750 closure_bio_submit(bio, cl); 795 /* I/O request sent to backing device */
796 closure_bio_submit(s->iop.c, bio, cl);
751 } 797 }
752 798
753 continue_at(cl, cached_dev_cache_miss_done, NULL); 799 continue_at(cl, cached_dev_cache_miss_done, NULL);
@@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
859 bio_copy_dev(cache_bio, miss); 905 bio_copy_dev(cache_bio, miss);
860 cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; 906 cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
861 907
862 cache_bio->bi_end_io = request_endio; 908 cache_bio->bi_end_io = backing_request_endio;
863 cache_bio->bi_private = &s->cl; 909 cache_bio->bi_private = &s->cl;
864 910
865 bch_bio_map(cache_bio, NULL); 911 bch_bio_map(cache_bio, NULL);
@@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
872 s->cache_miss = miss; 918 s->cache_miss = miss;
873 s->iop.bio = cache_bio; 919 s->iop.bio = cache_bio;
874 bio_get(cache_bio); 920 bio_get(cache_bio);
875 closure_bio_submit(cache_bio, &s->cl); 921 /* I/O request sent to backing device */
922 closure_bio_submit(s->iop.c, cache_bio, &s->cl);
876 923
877 return ret; 924 return ret;
878out_put: 925out_put:
879 bio_put(cache_bio); 926 bio_put(cache_bio);
880out_submit: 927out_submit:
881 miss->bi_end_io = request_endio; 928 miss->bi_end_io = backing_request_endio;
882 miss->bi_private = &s->cl; 929 miss->bi_private = &s->cl;
883 closure_bio_submit(miss, &s->cl); 930 /* I/O request sent to backing device */
931 closure_bio_submit(s->iop.c, miss, &s->cl);
884 return ret; 932 return ret;
885} 933}
886 934
@@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
943 s->iop.bio = s->orig_bio; 991 s->iop.bio = s->orig_bio;
944 bio_get(s->iop.bio); 992 bio_get(s->iop.bio);
945 993
946 if ((bio_op(bio) != REQ_OP_DISCARD) || 994 if (bio_op(bio) == REQ_OP_DISCARD &&
947 blk_queue_discard(bdev_get_queue(dc->bdev))) 995 !blk_queue_discard(bdev_get_queue(dc->bdev)))
948 closure_bio_submit(bio, cl); 996 goto insert_data;
997
998 /* I/O request sent to backing device */
999 bio->bi_end_io = backing_request_endio;
1000 closure_bio_submit(s->iop.c, bio, cl);
1001
949 } else if (s->iop.writeback) { 1002 } else if (s->iop.writeback) {
950 bch_writeback_add(dc); 1003 bch_writeback_add(dc);
951 s->iop.bio = bio; 1004 s->iop.bio = bio;
952 1005
953 if (bio->bi_opf & REQ_PREFLUSH) { 1006 if (bio->bi_opf & REQ_PREFLUSH) {
954 /* Also need to send a flush to the backing device */ 1007 /*
955 struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, 1008 * Also need to send a flush to the backing
956 dc->disk.bio_split); 1009 * device.
957 1010 */
1011 struct bio *flush;
1012
1013 flush = bio_alloc_bioset(GFP_NOIO, 0,
1014 dc->disk.bio_split);
1015 if (!flush) {
1016 s->iop.status = BLK_STS_RESOURCE;
1017 goto insert_data;
1018 }
958 bio_copy_dev(flush, bio); 1019 bio_copy_dev(flush, bio);
959 flush->bi_end_io = request_endio; 1020 flush->bi_end_io = backing_request_endio;
960 flush->bi_private = cl; 1021 flush->bi_private = cl;
961 flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 1022 flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
962 1023 /* I/O request sent to backing device */
963 closure_bio_submit(flush, cl); 1024 closure_bio_submit(s->iop.c, flush, cl);
964 } 1025 }
965 } else { 1026 } else {
966 s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); 1027 s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
967 1028 /* I/O request sent to backing device */
968 closure_bio_submit(bio, cl); 1029 bio->bi_end_io = backing_request_endio;
1030 closure_bio_submit(s->iop.c, bio, cl);
969 } 1031 }
970 1032
1033insert_data:
971 closure_call(&s->iop.cl, bch_data_insert, NULL, cl); 1034 closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
972 continue_at(cl, cached_dev_write_complete, NULL); 1035 continue_at(cl, cached_dev_write_complete, NULL);
973} 1036}
@@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl)
981 bch_journal_meta(s->iop.c, cl); 1044 bch_journal_meta(s->iop.c, cl);
982 1045
983 /* If it's a flush, we send the flush to the backing device too */ 1046 /* If it's a flush, we send the flush to the backing device too */
984 closure_bio_submit(bio, cl); 1047 bio->bi_end_io = backing_request_endio;
1048 closure_bio_submit(s->iop.c, bio, cl);
985 1049
986 continue_at(cl, cached_dev_bio_complete, NULL); 1050 continue_at(cl, cached_dev_bio_complete, NULL);
987} 1051}
988 1052
1053struct detached_dev_io_private {
1054 struct bcache_device *d;
1055 unsigned long start_time;
1056 bio_end_io_t *bi_end_io;
1057 void *bi_private;
1058};
1059
1060static void detached_dev_end_io(struct bio *bio)
1061{
1062 struct detached_dev_io_private *ddip;
1063
1064 ddip = bio->bi_private;
1065 bio->bi_end_io = ddip->bi_end_io;
1066 bio->bi_private = ddip->bi_private;
1067
1068 generic_end_io_acct(ddip->d->disk->queue,
1069 bio_data_dir(bio),
1070 &ddip->d->disk->part0, ddip->start_time);
1071
1072 if (bio->bi_status) {
1073 struct cached_dev *dc = container_of(ddip->d,
1074 struct cached_dev, disk);
1075 /* should count I/O error for backing device here */
1076 bch_count_backing_io_errors(dc, bio);
1077 }
1078
1079 kfree(ddip);
1080 bio->bi_end_io(bio);
1081}
1082
1083static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
1084{
1085 struct detached_dev_io_private *ddip;
1086 struct cached_dev *dc = container_of(d, struct cached_dev, disk);
1087
1088 /*
1089 * no need to call closure_get(&dc->disk.cl),
1090 * because upper layer had already opened bcache device,
1091 * which would call closure_get(&dc->disk.cl)
1092 */
1093 ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
1094 ddip->d = d;
1095 ddip->start_time = jiffies;
1096 ddip->bi_end_io = bio->bi_end_io;
1097 ddip->bi_private = bio->bi_private;
1098 bio->bi_end_io = detached_dev_end_io;
1099 bio->bi_private = ddip;
1100
1101 if ((bio_op(bio) == REQ_OP_DISCARD) &&
1102 !blk_queue_discard(bdev_get_queue(dc->bdev)))
1103 bio->bi_end_io(bio);
1104 else
1105 generic_make_request(bio);
1106}
1107
989/* Cached devices - read & write stuff */ 1108/* Cached devices - read & write stuff */
990 1109
991static blk_qc_t cached_dev_make_request(struct request_queue *q, 1110static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
996 struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1115 struct cached_dev *dc = container_of(d, struct cached_dev, disk);
997 int rw = bio_data_dir(bio); 1116 int rw = bio_data_dir(bio);
998 1117
1118 if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
1119 dc->io_disable)) {
1120 bio->bi_status = BLK_STS_IOERR;
1121 bio_endio(bio);
1122 return BLK_QC_T_NONE;
1123 }
1124
999 atomic_set(&dc->backing_idle, 0); 1125 atomic_set(&dc->backing_idle, 0);
1000 generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); 1126 generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
1001 1127
@@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
1022 else 1148 else
1023 cached_dev_read(dc, s); 1149 cached_dev_read(dc, s);
1024 } 1150 }
1025 } else { 1151 } else
1026 if ((bio_op(bio) == REQ_OP_DISCARD) && 1152 /* I/O request sent to backing device */
1027 !blk_queue_discard(bdev_get_queue(dc->bdev))) 1153 detached_dev_do_request(d, bio);
1028 bio_endio(bio);
1029 else
1030 generic_make_request(bio);
1031 }
1032 1154
1033 return BLK_QC_T_NONE; 1155 return BLK_QC_T_NONE;
1034} 1156}
@@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
1112 struct bcache_device *d = bio->bi_disk->private_data; 1234 struct bcache_device *d = bio->bi_disk->private_data;
1113 int rw = bio_data_dir(bio); 1235 int rw = bio_data_dir(bio);
1114 1236
1237 if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
1238 bio->bi_status = BLK_STS_IOERR;
1239 bio_endio(bio);
1240 return BLK_QC_T_NONE;
1241 }
1242
1115 generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); 1243 generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
1116 1244
1117 s = search_alloc(bio, d); 1245 s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f2273143b3cb..d90d9e59ca00 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
47 NULL 47 NULL
48}; 48};
49 49
50/* Default is -1; we skip past it for stop_when_cache_set_failed */
51const char * const bch_stop_on_failure_modes[] = {
52 "default",
53 "auto",
54 "always",
55 NULL
56};
57
50static struct kobject *bcache_kobj; 58static struct kobject *bcache_kobj;
51struct mutex bch_register_lock; 59struct mutex bch_register_lock;
52LIST_HEAD(bch_cache_sets); 60LIST_HEAD(bch_cache_sets);
@@ -265,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
265 bio->bi_private = dc; 273 bio->bi_private = dc;
266 274
267 closure_get(cl); 275 closure_get(cl);
276 /* I/O request sent to backing device */
268 __write_super(&dc->sb, bio); 277 __write_super(&dc->sb, bio);
269 278
270 closure_return_with_destructor(cl, bch_write_bdev_super_unlock); 279 closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
@@ -521,7 +530,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
521 bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); 530 bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
522 bch_bio_map(bio, ca->disk_buckets); 531 bch_bio_map(bio, ca->disk_buckets);
523 532
524 closure_bio_submit(bio, &ca->prio); 533 closure_bio_submit(ca->set, bio, &ca->prio);
525 closure_sync(cl); 534 closure_sync(cl);
526} 535}
527 536
@@ -769,6 +778,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
769 sector_t sectors) 778 sector_t sectors)
770{ 779{
771 struct request_queue *q; 780 struct request_queue *q;
781 const size_t max_stripes = min_t(size_t, INT_MAX,
782 SIZE_MAX / sizeof(atomic_t));
772 size_t n; 783 size_t n;
773 int idx; 784 int idx;
774 785
@@ -777,9 +788,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
777 788
778 d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); 789 d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
779 790
780 if (!d->nr_stripes || 791 if (!d->nr_stripes || d->nr_stripes > max_stripes) {
781 d->nr_stripes > INT_MAX ||
782 d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
783 pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", 792 pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
784 (unsigned)d->nr_stripes); 793 (unsigned)d->nr_stripes);
785 return -ENOMEM; 794 return -ENOMEM;
@@ -833,9 +842,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
833 q->limits.io_min = block_size; 842 q->limits.io_min = block_size;
834 q->limits.logical_block_size = block_size; 843 q->limits.logical_block_size = block_size;
835 q->limits.physical_block_size = block_size; 844 q->limits.physical_block_size = block_size;
836 set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); 845 blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
837 clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags); 846 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
838 set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); 847 blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
839 848
840 blk_queue_write_cache(q, true, true); 849 blk_queue_write_cache(q, true, true);
841 850
@@ -899,6 +908,31 @@ void bch_cached_dev_run(struct cached_dev *dc)
899 pr_debug("error creating sysfs link"); 908 pr_debug("error creating sysfs link");
900} 909}
901 910
911/*
912 * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
913 * work dc->writeback_rate_update is running. Wait until the routine
914 * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
915 * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
916 * seconds, give up waiting here and continue to cancel it too.
917 */
918static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
919{
920 int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
921
922 do {
923 if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
924 &dc->disk.flags))
925 break;
926 time_out--;
927 schedule_timeout_interruptible(1);
928 } while (time_out > 0);
929
930 if (time_out == 0)
931 pr_warn("give up waiting for dc->writeback_write_update to quit");
932
933 cancel_delayed_work_sync(&dc->writeback_rate_update);
934}
935
902static void cached_dev_detach_finish(struct work_struct *w) 936static void cached_dev_detach_finish(struct work_struct *w)
903{ 937{
904 struct cached_dev *dc = container_of(w, struct cached_dev, detach); 938 struct cached_dev *dc = container_of(w, struct cached_dev, detach);
@@ -911,7 +945,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
911 945
912 mutex_lock(&bch_register_lock); 946 mutex_lock(&bch_register_lock);
913 947
914 cancel_delayed_work_sync(&dc->writeback_rate_update); 948 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
949 cancel_writeback_rate_update_dwork(dc);
950
915 if (!IS_ERR_OR_NULL(dc->writeback_thread)) { 951 if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
916 kthread_stop(dc->writeback_thread); 952 kthread_stop(dc->writeback_thread);
917 dc->writeback_thread = NULL; 953 dc->writeback_thread = NULL;
@@ -954,6 +990,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
954 closure_get(&dc->disk.cl); 990 closure_get(&dc->disk.cl);
955 991
956 bch_writeback_queue(dc); 992 bch_writeback_queue(dc);
993
957 cached_dev_put(dc); 994 cached_dev_put(dc);
958} 995}
959 996
@@ -1065,7 +1102,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
1065 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { 1102 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
1066 bch_sectors_dirty_init(&dc->disk); 1103 bch_sectors_dirty_init(&dc->disk);
1067 atomic_set(&dc->has_dirty, 1); 1104 atomic_set(&dc->has_dirty, 1);
1068 refcount_inc(&dc->count);
1069 bch_writeback_queue(dc); 1105 bch_writeback_queue(dc);
1070 } 1106 }
1071 1107
@@ -1093,14 +1129,16 @@ static void cached_dev_free(struct closure *cl)
1093{ 1129{
1094 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); 1130 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
1095 1131
1096 cancel_delayed_work_sync(&dc->writeback_rate_update); 1132 mutex_lock(&bch_register_lock);
1133
1134 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
1135 cancel_writeback_rate_update_dwork(dc);
1136
1097 if (!IS_ERR_OR_NULL(dc->writeback_thread)) 1137 if (!IS_ERR_OR_NULL(dc->writeback_thread))
1098 kthread_stop(dc->writeback_thread); 1138 kthread_stop(dc->writeback_thread);
1099 if (dc->writeback_write_wq) 1139 if (dc->writeback_write_wq)
1100 destroy_workqueue(dc->writeback_write_wq); 1140 destroy_workqueue(dc->writeback_write_wq);
1101 1141
1102 mutex_lock(&bch_register_lock);
1103
1104 if (atomic_read(&dc->running)) 1142 if (atomic_read(&dc->running))
1105 bd_unlink_disk_holder(dc->bdev, dc->disk.disk); 1143 bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
1106 bcache_device_free(&dc->disk); 1144 bcache_device_free(&dc->disk);
@@ -1170,6 +1208,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
1170 max(dc->disk.disk->queue->backing_dev_info->ra_pages, 1208 max(dc->disk.disk->queue->backing_dev_info->ra_pages,
1171 q->backing_dev_info->ra_pages); 1209 q->backing_dev_info->ra_pages);
1172 1210
1211 atomic_set(&dc->io_errors, 0);
1212 dc->io_disable = false;
1213 dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
1214 /* default to auto */
1215 dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
1216
1173 bch_cached_dev_request_init(dc); 1217 bch_cached_dev_request_init(dc);
1174 bch_cached_dev_writeback_init(dc); 1218 bch_cached_dev_writeback_init(dc);
1175 return 0; 1219 return 0;
@@ -1321,6 +1365,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
1321 return flash_dev_run(c, u); 1365 return flash_dev_run(c, u);
1322} 1366}
1323 1367
1368bool bch_cached_dev_error(struct cached_dev *dc)
1369{
1370 char name[BDEVNAME_SIZE];
1371
1372 if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
1373 return false;
1374
1375 dc->io_disable = true;
1376 /* make others know io_disable is true earlier */
1377 smp_mb();
1378
1379 pr_err("stop %s: too many IO errors on backing device %s\n",
1380 dc->disk.disk->disk_name, bdevname(dc->bdev, name));
1381
1382 bcache_device_stop(&dc->disk);
1383 return true;
1384}
1385
1324/* Cache set */ 1386/* Cache set */
1325 1387
1326__printf(2, 3) 1388__printf(2, 3)
@@ -1332,6 +1394,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
1332 test_bit(CACHE_SET_STOPPING, &c->flags)) 1394 test_bit(CACHE_SET_STOPPING, &c->flags))
1333 return false; 1395 return false;
1334 1396
1397 if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
1398 pr_warn("CACHE_SET_IO_DISABLE already set");
1399
1335 /* XXX: we can be called from atomic context 1400 /* XXX: we can be called from atomic context
1336 acquire_console_sem(); 1401 acquire_console_sem();
1337 */ 1402 */
@@ -1443,25 +1508,72 @@ static void cache_set_flush(struct closure *cl)
1443 closure_return(cl); 1508 closure_return(cl);
1444} 1509}
1445 1510
1511/*
1512 * This function is only called when CACHE_SET_IO_DISABLE is set, which means
1513 * cache set is unregistering due to too many I/O errors. In this condition,
1514 * the bcache device might be stopped, it depends on stop_when_cache_set_failed
1515 * value and whether the broken cache has dirty data:
1516 *
1517 * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
1518 * BCH_CACHED_STOP_AUTO 0 NO
1519 * BCH_CACHED_STOP_AUTO 1 YES
1520 * BCH_CACHED_DEV_STOP_ALWAYS 0 YES
1521 * BCH_CACHED_DEV_STOP_ALWAYS 1 YES
1522 *
1523 * The expected behavior is, if stop_when_cache_set_failed is configured to
1524 * "auto" via sysfs interface, the bcache device will not be stopped if the
1525 * backing device is clean on the broken cache device.
1526 */
1527static void conditional_stop_bcache_device(struct cache_set *c,
1528 struct bcache_device *d,
1529 struct cached_dev *dc)
1530{
1531 if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
1532 pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
1533 d->disk->disk_name, c->sb.set_uuid);
1534 bcache_device_stop(d);
1535 } else if (atomic_read(&dc->has_dirty)) {
1536 /*
1537 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
1538 * and dc->has_dirty == 1
1539 */
1540 pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
1541 d->disk->disk_name);
1542 bcache_device_stop(d);
1543 } else {
1544 /*
1545 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
1546 * and dc->has_dirty == 0
1547 */
1548 pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
1549 d->disk->disk_name);
1550 }
1551}
1552
1446static void __cache_set_unregister(struct closure *cl) 1553static void __cache_set_unregister(struct closure *cl)
1447{ 1554{
1448 struct cache_set *c = container_of(cl, struct cache_set, caching); 1555 struct cache_set *c = container_of(cl, struct cache_set, caching);
1449 struct cached_dev *dc; 1556 struct cached_dev *dc;
1557 struct bcache_device *d;
1450 size_t i; 1558 size_t i;
1451 1559
1452 mutex_lock(&bch_register_lock); 1560 mutex_lock(&bch_register_lock);
1453 1561
1454 for (i = 0; i < c->devices_max_used; i++) 1562 for (i = 0; i < c->devices_max_used; i++) {
1455 if (c->devices[i]) { 1563 d = c->devices[i];
1456 if (!UUID_FLASH_ONLY(&c->uuids[i]) && 1564 if (!d)
1457 test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { 1565 continue;
1458 dc = container_of(c->devices[i], 1566
1459 struct cached_dev, disk); 1567 if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
1460 bch_cached_dev_detach(dc); 1568 test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
1461 } else { 1569 dc = container_of(d, struct cached_dev, disk);
1462 bcache_device_stop(c->devices[i]); 1570 bch_cached_dev_detach(dc);
1463 } 1571 if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
1572 conditional_stop_bcache_device(c, d, dc);
1573 } else {
1574 bcache_device_stop(d);
1464 } 1575 }
1576 }
1465 1577
1466 mutex_unlock(&bch_register_lock); 1578 mutex_unlock(&bch_register_lock);
1467 1579
@@ -1567,6 +1679,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1567 c->congested_read_threshold_us = 2000; 1679 c->congested_read_threshold_us = 2000;
1568 c->congested_write_threshold_us = 20000; 1680 c->congested_write_threshold_us = 20000;
1569 c->error_limit = DEFAULT_IO_ERROR_LIMIT; 1681 c->error_limit = DEFAULT_IO_ERROR_LIMIT;
1682 WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
1570 1683
1571 return c; 1684 return c;
1572err: 1685err:
@@ -2148,7 +2261,6 @@ static int __init bcache_init(void)
2148 mutex_init(&bch_register_lock); 2261 mutex_init(&bch_register_lock);
2149 init_waitqueue_head(&unregister_wait); 2262 init_waitqueue_head(&unregister_wait);
2150 register_reboot_notifier(&reboot); 2263 register_reboot_notifier(&reboot);
2151 closure_debug_init();
2152 2264
2153 bcache_major = register_blkdev(0, "bcache"); 2265 bcache_major = register_blkdev(0, "bcache");
2154 if (bcache_major < 0) { 2266 if (bcache_major < 0) {
@@ -2160,7 +2272,7 @@ static int __init bcache_init(void)
2160 if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || 2272 if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
2161 !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || 2273 !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
2162 bch_request_init() || 2274 bch_request_init() ||
2163 bch_debug_init(bcache_kobj) || 2275 bch_debug_init(bcache_kobj) || closure_debug_init() ||
2164 sysfs_create_files(bcache_kobj, files)) 2276 sysfs_create_files(bcache_kobj, files))
2165 goto err; 2277 goto err;
2166 2278
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 78cd7bd50fdd..dfeef583ee50 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us);
78rw_attribute(sequential_cutoff); 78rw_attribute(sequential_cutoff);
79rw_attribute(data_csum); 79rw_attribute(data_csum);
80rw_attribute(cache_mode); 80rw_attribute(cache_mode);
81rw_attribute(stop_when_cache_set_failed);
81rw_attribute(writeback_metadata); 82rw_attribute(writeback_metadata);
82rw_attribute(writeback_running); 83rw_attribute(writeback_running);
83rw_attribute(writeback_percent); 84rw_attribute(writeback_percent);
@@ -95,6 +96,7 @@ read_attribute(partial_stripes_expensive);
95 96
96rw_attribute(synchronous); 97rw_attribute(synchronous);
97rw_attribute(journal_delay_ms); 98rw_attribute(journal_delay_ms);
99rw_attribute(io_disable);
98rw_attribute(discard); 100rw_attribute(discard);
99rw_attribute(running); 101rw_attribute(running);
100rw_attribute(label); 102rw_attribute(label);
@@ -125,6 +127,12 @@ SHOW(__bch_cached_dev)
125 bch_cache_modes + 1, 127 bch_cache_modes + 1,
126 BDEV_CACHE_MODE(&dc->sb)); 128 BDEV_CACHE_MODE(&dc->sb));
127 129
130 if (attr == &sysfs_stop_when_cache_set_failed)
131 return bch_snprint_string_list(buf, PAGE_SIZE,
132 bch_stop_on_failure_modes + 1,
133 dc->stop_when_cache_set_failed);
134
135
128 sysfs_printf(data_csum, "%i", dc->disk.data_csum); 136 sysfs_printf(data_csum, "%i", dc->disk.data_csum);
129 var_printf(verify, "%i"); 137 var_printf(verify, "%i");
130 var_printf(bypass_torture_test, "%i"); 138 var_printf(bypass_torture_test, "%i");
@@ -133,7 +141,9 @@ SHOW(__bch_cached_dev)
133 var_print(writeback_delay); 141 var_print(writeback_delay);
134 var_print(writeback_percent); 142 var_print(writeback_percent);
135 sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); 143 sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
136 144 sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
145 sysfs_printf(io_error_limit, "%i", dc->error_limit);
146 sysfs_printf(io_disable, "%i", dc->io_disable);
137 var_print(writeback_rate_update_seconds); 147 var_print(writeback_rate_update_seconds);
138 var_print(writeback_rate_i_term_inverse); 148 var_print(writeback_rate_i_term_inverse);
139 var_print(writeback_rate_p_term_inverse); 149 var_print(writeback_rate_p_term_inverse);
@@ -173,7 +183,7 @@ SHOW(__bch_cached_dev)
173 sysfs_hprint(dirty_data, 183 sysfs_hprint(dirty_data,
174 bcache_dev_sectors_dirty(&dc->disk) << 9); 184 bcache_dev_sectors_dirty(&dc->disk) << 9);
175 185
176 sysfs_hprint(stripe_size, dc->disk.stripe_size << 9); 186 sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9);
177 var_printf(partial_stripes_expensive, "%u"); 187 var_printf(partial_stripes_expensive, "%u");
178 188
179 var_hprint(sequential_cutoff); 189 var_hprint(sequential_cutoff);
@@ -224,6 +234,14 @@ STORE(__cached_dev)
224 d_strtoul(writeback_rate_i_term_inverse); 234 d_strtoul(writeback_rate_i_term_inverse);
225 d_strtoul_nonzero(writeback_rate_p_term_inverse); 235 d_strtoul_nonzero(writeback_rate_p_term_inverse);
226 236
237 sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
238
239 if (attr == &sysfs_io_disable) {
240 int v = strtoul_or_return(buf);
241
242 dc->io_disable = v ? 1 : 0;
243 }
244
227 d_strtoi_h(sequential_cutoff); 245 d_strtoi_h(sequential_cutoff);
228 d_strtoi_h(readahead); 246 d_strtoi_h(readahead);
229 247
@@ -246,6 +264,15 @@ STORE(__cached_dev)
246 } 264 }
247 } 265 }
248 266
267 if (attr == &sysfs_stop_when_cache_set_failed) {
268 v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
269
270 if (v < 0)
271 return v;
272
273 dc->stop_when_cache_set_failed = v;
274 }
275
249 if (attr == &sysfs_label) { 276 if (attr == &sysfs_label) {
250 if (size > SB_LABEL_SIZE) 277 if (size > SB_LABEL_SIZE)
251 return -EINVAL; 278 return -EINVAL;
@@ -309,7 +336,8 @@ STORE(bch_cached_dev)
309 bch_writeback_queue(dc); 336 bch_writeback_queue(dc);
310 337
311 if (attr == &sysfs_writeback_percent) 338 if (attr == &sysfs_writeback_percent)
312 schedule_delayed_work(&dc->writeback_rate_update, 339 if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
340 schedule_delayed_work(&dc->writeback_rate_update,
313 dc->writeback_rate_update_seconds * HZ); 341 dc->writeback_rate_update_seconds * HZ);
314 342
315 mutex_unlock(&bch_register_lock); 343 mutex_unlock(&bch_register_lock);
@@ -324,6 +352,7 @@ static struct attribute *bch_cached_dev_files[] = {
324 &sysfs_data_csum, 352 &sysfs_data_csum,
325#endif 353#endif
326 &sysfs_cache_mode, 354 &sysfs_cache_mode,
355 &sysfs_stop_when_cache_set_failed,
327 &sysfs_writeback_metadata, 356 &sysfs_writeback_metadata,
328 &sysfs_writeback_running, 357 &sysfs_writeback_running,
329 &sysfs_writeback_delay, 358 &sysfs_writeback_delay,
@@ -333,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = {
333 &sysfs_writeback_rate_i_term_inverse, 362 &sysfs_writeback_rate_i_term_inverse,
334 &sysfs_writeback_rate_p_term_inverse, 363 &sysfs_writeback_rate_p_term_inverse,
335 &sysfs_writeback_rate_debug, 364 &sysfs_writeback_rate_debug,
365 &sysfs_errors,
366 &sysfs_io_error_limit,
367 &sysfs_io_disable,
336 &sysfs_dirty_data, 368 &sysfs_dirty_data,
337 &sysfs_stripe_size, 369 &sysfs_stripe_size,
338 &sysfs_partial_stripes_expensive, 370 &sysfs_partial_stripes_expensive,
@@ -590,6 +622,8 @@ SHOW(__bch_cache_set)
590 sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); 622 sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
591 sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); 623 sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
592 sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); 624 sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
625 sysfs_printf(io_disable, "%i",
626 test_bit(CACHE_SET_IO_DISABLE, &c->flags));
593 627
594 if (attr == &sysfs_bset_tree_stats) 628 if (attr == &sysfs_bset_tree_stats)
595 return bch_bset_print_stats(c, buf); 629 return bch_bset_print_stats(c, buf);
@@ -679,6 +713,20 @@ STORE(__bch_cache_set)
679 if (attr == &sysfs_io_error_halflife) 713 if (attr == &sysfs_io_error_halflife)
680 c->error_decay = strtoul_or_return(buf) / 88; 714 c->error_decay = strtoul_or_return(buf) / 88;
681 715
716 if (attr == &sysfs_io_disable) {
717 int v = strtoul_or_return(buf);
718
719 if (v) {
720 if (test_and_set_bit(CACHE_SET_IO_DISABLE,
721 &c->flags))
722 pr_warn("CACHE_SET_IO_DISABLE already set");
723 } else {
724 if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
725 &c->flags))
726 pr_warn("CACHE_SET_IO_DISABLE already cleared");
727 }
728 }
729
682 sysfs_strtoul(journal_delay_ms, c->journal_delay_ms); 730 sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
683 sysfs_strtoul(verify, c->verify); 731 sysfs_strtoul(verify, c->verify);
684 sysfs_strtoul(key_merging_disabled, c->key_merging_disabled); 732 sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
@@ -764,6 +812,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
764 &sysfs_gc_always_rewrite, 812 &sysfs_gc_always_rewrite,
765 &sysfs_btree_shrinker_disabled, 813 &sysfs_btree_shrinker_disabled,
766 &sysfs_copy_gc_enabled, 814 &sysfs_copy_gc_enabled,
815 &sysfs_io_disable,
767 NULL 816 NULL
768}; 817};
769KTYPE(bch_cache_set_internal); 818KTYPE(bch_cache_set_internal);
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index a23cd6a14b74..74febd5230df 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -32,20 +32,27 @@ int bch_ ## name ## _h(const char *cp, type *res) \
32 case 'y': \ 32 case 'y': \
33 case 'z': \ 33 case 'z': \
34 u++; \ 34 u++; \
35 /* fall through */ \
35 case 'e': \ 36 case 'e': \
36 u++; \ 37 u++; \
38 /* fall through */ \
37 case 'p': \ 39 case 'p': \
38 u++; \ 40 u++; \
41 /* fall through */ \
39 case 't': \ 42 case 't': \
40 u++; \ 43 u++; \
44 /* fall through */ \
41 case 'g': \ 45 case 'g': \
42 u++; \ 46 u++; \
47 /* fall through */ \
43 case 'm': \ 48 case 'm': \
44 u++; \ 49 u++; \
50 /* fall through */ \
45 case 'k': \ 51 case 'k': \
46 u++; \ 52 u++; \
47 if (e++ == cp) \ 53 if (e++ == cp) \
48 return -EINVAL; \ 54 return -EINVAL; \
55 /* fall through */ \
49 case '\n': \ 56 case '\n': \
50 case '\0': \ 57 case '\0': \
51 if (*e == '\n') \ 58 if (*e == '\n') \
@@ -75,10 +82,9 @@ STRTO_H(strtoll, long long)
75STRTO_H(strtoull, unsigned long long) 82STRTO_H(strtoull, unsigned long long)
76 83
77/** 84/**
78 * bch_hprint() - formats @v to human readable string for sysfs. 85 * bch_hprint - formats @v to human readable string for sysfs.
79 * 86 * @buf: the (at least 8 byte) buffer to format the result into.
80 * @v - signed 64 bit integer 87 * @v: signed 64 bit integer
81 * @buf - the (at least 8 byte) buffer to format the result into.
82 * 88 *
83 * Returns the number of bytes used by format. 89 * Returns the number of bytes used by format.
84 */ 90 */
@@ -218,13 +224,12 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
218} 224}
219 225
220/** 226/**
221 * bch_next_delay() - increment @d by the amount of work done, and return how 227 * bch_next_delay() - update ratelimiting statistics and calculate next delay
222 * long to delay until the next time to do some work. 228 * @d: the struct bch_ratelimit to update
223 * 229 * @done: the amount of work done, in arbitrary units
224 * @d - the struct bch_ratelimit to update
225 * @done - the amount of work done, in arbitrary units
226 * 230 *
227 * Returns the amount of time to delay by, in jiffies 231 * Increment @d by the amount of work done, and return how long to delay in
232 * jiffies until the next time to do some work.
228 */ 233 */
229uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) 234uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
230{ 235{
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index a6763db7f061..268024529edd 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
567 return bdev->bd_inode->i_size >> 9; 567 return bdev->bd_inode->i_size >> 9;
568} 568}
569 569
570#define closure_bio_submit(bio, cl) \
571do { \
572 closure_get(cl); \
573 generic_make_request(bio); \
574} while (0)
575
576uint64_t bch_crc64_update(uint64_t, const void *, size_t); 570uint64_t bch_crc64_update(uint64_t, const void *, size_t);
577uint64_t bch_crc64(const void *, size_t); 571uint64_t bch_crc64(const void *, size_t);
578 572
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f1d2fc15abcc..4a9547cdcdc5 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -114,6 +114,27 @@ static void update_writeback_rate(struct work_struct *work)
114 struct cached_dev *dc = container_of(to_delayed_work(work), 114 struct cached_dev *dc = container_of(to_delayed_work(work),
115 struct cached_dev, 115 struct cached_dev,
116 writeback_rate_update); 116 writeback_rate_update);
117 struct cache_set *c = dc->disk.c;
118
119 /*
120 * should check BCACHE_DEV_RATE_DW_RUNNING before calling
121 * cancel_delayed_work_sync().
122 */
123 set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
124 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
125 smp_mb();
126
127 /*
128 * CACHE_SET_IO_DISABLE might be set via sysfs interface,
129 * check it here too.
130 */
131 if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
132 test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
133 clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
134 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
135 smp_mb();
136 return;
137 }
117 138
118 down_read(&dc->writeback_lock); 139 down_read(&dc->writeback_lock);
119 140
@@ -123,8 +144,23 @@ static void update_writeback_rate(struct work_struct *work)
123 144
124 up_read(&dc->writeback_lock); 145 up_read(&dc->writeback_lock);
125 146
126 schedule_delayed_work(&dc->writeback_rate_update, 147 /*
148 * CACHE_SET_IO_DISABLE might be set via sysfs interface,
149 * check it here too.
150 */
151 if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
152 !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
153 schedule_delayed_work(&dc->writeback_rate_update,
127 dc->writeback_rate_update_seconds * HZ); 154 dc->writeback_rate_update_seconds * HZ);
155 }
156
157 /*
158 * should check BCACHE_DEV_RATE_DW_RUNNING before calling
159 * cancel_delayed_work_sync().
160 */
161 clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
162 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
163 smp_mb();
128} 164}
129 165
130static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) 166static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
@@ -253,7 +289,8 @@ static void write_dirty(struct closure *cl)
253 bio_set_dev(&io->bio, io->dc->bdev); 289 bio_set_dev(&io->bio, io->dc->bdev);
254 io->bio.bi_end_io = dirty_endio; 290 io->bio.bi_end_io = dirty_endio;
255 291
256 closure_bio_submit(&io->bio, cl); 292 /* I/O request sent to backing device */
293 closure_bio_submit(io->dc->disk.c, &io->bio, cl);
257 } 294 }
258 295
259 atomic_set(&dc->writeback_sequence_next, next_sequence); 296 atomic_set(&dc->writeback_sequence_next, next_sequence);
@@ -279,7 +316,7 @@ static void read_dirty_submit(struct closure *cl)
279{ 316{
280 struct dirty_io *io = container_of(cl, struct dirty_io, cl); 317 struct dirty_io *io = container_of(cl, struct dirty_io, cl);
281 318
282 closure_bio_submit(&io->bio, cl); 319 closure_bio_submit(io->dc->disk.c, &io->bio, cl);
283 320
284 continue_at(cl, write_dirty, io->dc->writeback_write_wq); 321 continue_at(cl, write_dirty, io->dc->writeback_write_wq);
285} 322}
@@ -305,7 +342,9 @@ static void read_dirty(struct cached_dev *dc)
305 342
306 next = bch_keybuf_next(&dc->writeback_keys); 343 next = bch_keybuf_next(&dc->writeback_keys);
307 344
308 while (!kthread_should_stop() && next) { 345 while (!kthread_should_stop() &&
346 !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
347 next) {
309 size = 0; 348 size = 0;
310 nk = 0; 349 nk = 0;
311 350
@@ -402,7 +441,9 @@ static void read_dirty(struct cached_dev *dc)
402 } 441 }
403 } 442 }
404 443
405 while (!kthread_should_stop() && delay) { 444 while (!kthread_should_stop() &&
445 !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
446 delay) {
406 schedule_timeout_interruptible(delay); 447 schedule_timeout_interruptible(delay);
407 delay = writeback_delay(dc, 0); 448 delay = writeback_delay(dc, 0);
408 } 449 }
@@ -558,21 +599,30 @@ static bool refill_dirty(struct cached_dev *dc)
558static int bch_writeback_thread(void *arg) 599static int bch_writeback_thread(void *arg)
559{ 600{
560 struct cached_dev *dc = arg; 601 struct cached_dev *dc = arg;
602 struct cache_set *c = dc->disk.c;
561 bool searched_full_index; 603 bool searched_full_index;
562 604
563 bch_ratelimit_reset(&dc->writeback_rate); 605 bch_ratelimit_reset(&dc->writeback_rate);
564 606
565 while (!kthread_should_stop()) { 607 while (!kthread_should_stop() &&
608 !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
566 down_write(&dc->writeback_lock); 609 down_write(&dc->writeback_lock);
567 set_current_state(TASK_INTERRUPTIBLE); 610 set_current_state(TASK_INTERRUPTIBLE);
568 if (!atomic_read(&dc->has_dirty) || 611 /*
569 (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) && 612 * If the bache device is detaching, skip here and continue
570 !dc->writeback_running)) { 613 * to perform writeback. Otherwise, if no dirty data on cache,
614 * or there is dirty data on cache but writeback is disabled,
615 * the writeback thread should sleep here and wait for others
616 * to wake up it.
617 */
618 if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
619 (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
571 up_write(&dc->writeback_lock); 620 up_write(&dc->writeback_lock);
572 621
573 if (kthread_should_stop()) { 622 if (kthread_should_stop() ||
623 test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
574 set_current_state(TASK_RUNNING); 624 set_current_state(TASK_RUNNING);
575 return 0; 625 break;
576 } 626 }
577 627
578 schedule(); 628 schedule();
@@ -585,9 +635,16 @@ static int bch_writeback_thread(void *arg)
585 if (searched_full_index && 635 if (searched_full_index &&
586 RB_EMPTY_ROOT(&dc->writeback_keys.keys)) { 636 RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
587 atomic_set(&dc->has_dirty, 0); 637 atomic_set(&dc->has_dirty, 0);
588 cached_dev_put(dc);
589 SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); 638 SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
590 bch_write_bdev_super(dc, NULL); 639 bch_write_bdev_super(dc, NULL);
640 /*
641 * If bcache device is detaching via sysfs interface,
642 * writeback thread should stop after there is no dirty
643 * data on cache. BCACHE_DEV_DETACHING flag is set in
644 * bch_cached_dev_detach().
645 */
646 if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
647 break;
591 } 648 }
592 649
593 up_write(&dc->writeback_lock); 650 up_write(&dc->writeback_lock);
@@ -599,6 +656,7 @@ static int bch_writeback_thread(void *arg)
599 656
600 while (delay && 657 while (delay &&
601 !kthread_should_stop() && 658 !kthread_should_stop() &&
659 !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
602 !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) 660 !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
603 delay = schedule_timeout_interruptible(delay); 661 delay = schedule_timeout_interruptible(delay);
604 662
@@ -606,6 +664,9 @@ static int bch_writeback_thread(void *arg)
606 } 664 }
607 } 665 }
608 666
667 cached_dev_put(dc);
668 wait_for_kthread_stop();
669
609 return 0; 670 return 0;
610} 671}
611 672
@@ -659,6 +720,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
659 dc->writeback_rate_p_term_inverse = 40; 720 dc->writeback_rate_p_term_inverse = 40;
660 dc->writeback_rate_i_term_inverse = 10000; 721 dc->writeback_rate_i_term_inverse = 10000;
661 722
723 WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
662 INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); 724 INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
663} 725}
664 726
@@ -669,11 +731,15 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
669 if (!dc->writeback_write_wq) 731 if (!dc->writeback_write_wq)
670 return -ENOMEM; 732 return -ENOMEM;
671 733
734 cached_dev_get(dc);
672 dc->writeback_thread = kthread_create(bch_writeback_thread, dc, 735 dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
673 "bcache_writeback"); 736 "bcache_writeback");
674 if (IS_ERR(dc->writeback_thread)) 737 if (IS_ERR(dc->writeback_thread)) {
738 cached_dev_put(dc);
675 return PTR_ERR(dc->writeback_thread); 739 return PTR_ERR(dc->writeback_thread);
740 }
676 741
742 WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
677 schedule_delayed_work(&dc->writeback_rate_update, 743 schedule_delayed_work(&dc->writeback_rate_update,
678 dc->writeback_rate_update_seconds * HZ); 744 dc->writeback_rate_update_seconds * HZ);
679 745
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 587b25599856..610fb01de629 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -39,7 +39,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
39 39
40 if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) 40 if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
41 continue; 41 continue;
42 ret += bcache_dev_sectors_dirty(d); 42 ret += bcache_dev_sectors_dirty(d);
43 } 43 }
44 44
45 mutex_unlock(&bch_register_lock); 45 mutex_unlock(&bch_register_lock);
@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
105{ 105{
106 if (!atomic_read(&dc->has_dirty) && 106 if (!atomic_read(&dc->has_dirty) &&
107 !atomic_xchg(&dc->has_dirty, 1)) { 107 !atomic_xchg(&dc->has_dirty, 1)) {
108 refcount_inc(&dc->count);
109
110 if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) { 108 if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
111 SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY); 109 SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
112 /* XXX: should do this synchronously */ 110 /* XXX: should do this synchronously */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7eb3e2a3c07d..954f4e3b68ac 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1857,7 +1857,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1857 q->limits = *limits; 1857 q->limits = *limits;
1858 1858
1859 if (!dm_table_supports_discards(t)) { 1859 if (!dm_table_supports_discards(t)) {
1860 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 1860 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
1861 /* Must also clear discard limits... */ 1861 /* Must also clear discard limits... */
1862 q->limits.max_discard_sectors = 0; 1862 q->limits.max_discard_sectors = 0;
1863 q->limits.max_hw_discard_sectors = 0; 1863 q->limits.max_hw_discard_sectors = 0;
@@ -1865,7 +1865,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1865 q->limits.discard_alignment = 0; 1865 q->limits.discard_alignment = 0;
1866 q->limits.discard_misaligned = 0; 1866 q->limits.discard_misaligned = 0;
1867 } else 1867 } else
1868 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 1868 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
1869 1869
1870 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { 1870 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
1871 wc = true; 1871 wc = true;
@@ -1875,15 +1875,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1875 blk_queue_write_cache(q, wc, fua); 1875 blk_queue_write_cache(q, wc, fua);
1876 1876
1877 if (dm_table_supports_dax(t)) 1877 if (dm_table_supports_dax(t))
1878 queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); 1878 blk_queue_flag_set(QUEUE_FLAG_DAX, q);
1879 if (dm_table_supports_dax_write_cache(t)) 1879 if (dm_table_supports_dax_write_cache(t))
1880 dax_write_cache(t->md->dax_dev, true); 1880 dax_write_cache(t->md->dax_dev, true);
1881 1881
1882 /* Ensure that all underlying devices are non-rotational. */ 1882 /* Ensure that all underlying devices are non-rotational. */
1883 if (dm_table_all_devices_attribute(t, device_is_nonrot)) 1883 if (dm_table_all_devices_attribute(t, device_is_nonrot))
1884 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 1884 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
1885 else 1885 else
1886 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); 1886 blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
1887 1887
1888 if (!dm_table_supports_write_same(t)) 1888 if (!dm_table_supports_write_same(t))
1889 q->limits.max_write_same_sectors = 0; 1889 q->limits.max_write_same_sectors = 0;
@@ -1891,9 +1891,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1891 q->limits.max_write_zeroes_sectors = 0; 1891 q->limits.max_write_zeroes_sectors = 0;
1892 1892
1893 if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) 1893 if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
1894 queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); 1894 blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
1895 else 1895 else
1896 queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); 1896 blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
1897 1897
1898 dm_table_verify_integrity(t); 1898 dm_table_verify_integrity(t);
1899 1899
@@ -1904,7 +1904,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1904 * have it set. 1904 * have it set.
1905 */ 1905 */
1906 if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) 1906 if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
1907 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); 1907 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
1908} 1908}
1909 1909
1910unsigned int dm_table_get_num_targets(struct dm_table *t) 1910unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 353ea0ede091..ded74e1eb0d1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1848,7 +1848,7 @@ static struct mapped_device *alloc_dev(int minor)
1848 INIT_LIST_HEAD(&md->table_devices); 1848 INIT_LIST_HEAD(&md->table_devices);
1849 spin_lock_init(&md->uevent_lock); 1849 spin_lock_init(&md->uevent_lock);
1850 1850
1851 md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); 1851 md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
1852 if (!md->queue) 1852 if (!md->queue)
1853 goto bad; 1853 goto bad;
1854 md->queue->queuedata = md; 1854 md->queue->queuedata = md;
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 773fc70dced7..4964323d936b 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -138,9 +138,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
138 } 138 }
139 139
140 if (!discard_supported) 140 if (!discard_supported)
141 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 141 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
142 else 142 else
143 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 143 blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
144 144
145 /* 145 /*
146 * Here we calculate the device offsets. 146 * Here we calculate the device offsets.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 254e44e44668..3bea45e8ccff 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5206,12 +5206,12 @@ static void md_free(struct kobject *ko)
5206 if (mddev->sysfs_state) 5206 if (mddev->sysfs_state)
5207 sysfs_put(mddev->sysfs_state); 5207 sysfs_put(mddev->sysfs_state);
5208 5208
5209 if (mddev->gendisk)
5210 del_gendisk(mddev->gendisk);
5209 if (mddev->queue) 5211 if (mddev->queue)
5210 blk_cleanup_queue(mddev->queue); 5212 blk_cleanup_queue(mddev->queue);
5211 if (mddev->gendisk) { 5213 if (mddev->gendisk)
5212 del_gendisk(mddev->gendisk);
5213 put_disk(mddev->gendisk); 5214 put_disk(mddev->gendisk);
5214 }
5215 percpu_ref_exit(&mddev->writes_pending); 5215 percpu_ref_exit(&mddev->writes_pending);
5216 5216
5217 kfree(mddev); 5217 kfree(mddev);
@@ -5619,9 +5619,9 @@ int md_run(struct mddev *mddev)
5619 if (mddev->degraded) 5619 if (mddev->degraded)
5620 nonrot = false; 5620 nonrot = false;
5621 if (nonrot) 5621 if (nonrot)
5622 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue); 5622 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5623 else 5623 else
5624 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue); 5624 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5625 mddev->queue->backing_dev_info->congested_data = mddev; 5625 mddev->queue->backing_dev_info->congested_data = mddev;
5626 mddev->queue->backing_dev_info->congested_fn = md_congested; 5626 mddev->queue->backing_dev_info->congested_fn = md_congested;
5627 } 5627 }
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 5ecba9eef441..584c10347267 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev)
399 discard_supported = true; 399 discard_supported = true;
400 } 400 }
401 if (!discard_supported) 401 if (!discard_supported)
402 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 402 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
403 else 403 else
404 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 404 blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
405 } 405 }
406 406
407 /* calculate array device size */ 407 /* calculate array device size */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fe872dc6712e..e2943fb74056 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1760,7 +1760,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1760 } 1760 }
1761 } 1761 }
1762 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) 1762 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
1763 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 1763 blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
1764 print_conf(conf); 1764 print_conf(conf);
1765 return err; 1765 return err;
1766} 1766}
@@ -3110,10 +3110,10 @@ static int raid1_run(struct mddev *mddev)
3110 3110
3111 if (mddev->queue) { 3111 if (mddev->queue) {
3112 if (discard_supported) 3112 if (discard_supported)
3113 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, 3113 blk_queue_flag_set(QUEUE_FLAG_DISCARD,
3114 mddev->queue); 3114 mddev->queue);
3115 else 3115 else
3116 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, 3116 blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
3117 mddev->queue); 3117 mddev->queue);
3118 } 3118 }
3119 3119
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c5e6c60fc0d4..3c60774c8430 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1845,7 +1845,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1845 break; 1845 break;
1846 } 1846 }
1847 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) 1847 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
1848 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 1848 blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
1849 1849
1850 print_conf(conf); 1850 print_conf(conf);
1851 return err; 1851 return err;
@@ -3846,10 +3846,10 @@ static int raid10_run(struct mddev *mddev)
3846 3846
3847 if (mddev->queue) { 3847 if (mddev->queue) {
3848 if (discard_supported) 3848 if (discard_supported)
3849 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, 3849 blk_queue_flag_set(QUEUE_FLAG_DISCARD,
3850 mddev->queue); 3850 mddev->queue);
3851 else 3851 else
3852 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, 3852 blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
3853 mddev->queue); 3853 mddev->queue);
3854 } 3854 }
3855 /* need to check that every block has at least one working mirror */ 3855 /* need to check that every block has at least one working mirror */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b5d2601483e3..be117d0a65a8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7443,10 +7443,10 @@ static int raid5_run(struct mddev *mddev)
7443 if (devices_handle_discard_safely && 7443 if (devices_handle_discard_safely &&
7444 mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && 7444 mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
7445 mddev->queue->limits.discard_granularity >= stripe) 7445 mddev->queue->limits.discard_granularity >= stripe)
7446 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, 7446 blk_queue_flag_set(QUEUE_FLAG_DISCARD,
7447 mddev->queue); 7447 mddev->queue);
7448 else 7448 else
7449 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, 7449 blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
7450 mddev->queue); 7450 mddev->queue);
7451 7451
7452 blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); 7452 blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index fd09b0960097..e8f1d4bb806a 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -444,12 +444,12 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
444{ 444{
445 u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi; 445 u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
446 u64 val; 446 u64 val;
447 u8 option = SG_VALID | SG_TRANS_DATA; 447 u8 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
448 448
449 pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len); 449 pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len);
450 450
451 if (end) 451 if (end)
452 option |= SG_END; 452 option |= RTSX_SG_END;
453 val = ((u64)addr << 32) | ((u64)len << 12) | option; 453 val = ((u64)addr << 32) | ((u64)len << 12) | option;
454 454
455 put_unaligned_le64(val, ptr); 455 put_unaligned_le64(val, ptr);
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index a2b9c2500c4c..02485e310c81 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2659,7 +2659,6 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
2659 * from being accepted. 2659 * from being accepted.
2660 */ 2660 */
2661 card = md->queue.card; 2661 card = md->queue.card;
2662 mmc_cleanup_queue(&md->queue);
2663 if (md->disk->flags & GENHD_FL_UP) { 2662 if (md->disk->flags & GENHD_FL_UP) {
2664 device_remove_file(disk_to_dev(md->disk), &md->force_ro); 2663 device_remove_file(disk_to_dev(md->disk), &md->force_ro);
2665 if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) && 2664 if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
@@ -2669,6 +2668,7 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
2669 2668
2670 del_gendisk(md->disk); 2669 del_gendisk(md->disk);
2671 } 2670 }
2671 mmc_cleanup_queue(&md->queue);
2672 mmc_blk_put(md); 2672 mmc_blk_put(md);
2673 } 2673 }
2674} 2674}
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 421fab7250ac..56e9a803db21 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -185,14 +185,14 @@ static void mmc_queue_setup_discard(struct request_queue *q,
185 if (!max_discard) 185 if (!max_discard)
186 return; 186 return;
187 187
188 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 188 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
189 blk_queue_max_discard_sectors(q, max_discard); 189 blk_queue_max_discard_sectors(q, max_discard);
190 q->limits.discard_granularity = card->pref_erase << 9; 190 q->limits.discard_granularity = card->pref_erase << 9;
191 /* granularity must not be greater than max. discard */ 191 /* granularity must not be greater than max. discard */
192 if (card->pref_erase > max_discard) 192 if (card->pref_erase > max_discard)
193 q->limits.discard_granularity = 0; 193 q->limits.discard_granularity = 0;
194 if (mmc_can_secure_erase_trim(card)) 194 if (mmc_can_secure_erase_trim(card))
195 queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q); 195 blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
196} 196}
197 197
198/** 198/**
@@ -356,8 +356,8 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
356 if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) 356 if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
357 limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT; 357 limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
358 358
359 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); 359 blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
360 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, mq->queue); 360 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
361 if (mmc_can_erase(card)) 361 if (mmc_can_erase(card))
362 mmc_queue_setup_discard(mq->queue, card); 362 mmc_queue_setup_discard(mq->queue, card);
363 363
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 9ec8f033ac5f..16ae4ae8e8f9 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -419,11 +419,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
419 blk_queue_logical_block_size(new->rq, tr->blksize); 419 blk_queue_logical_block_size(new->rq, tr->blksize);
420 420
421 blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH); 421 blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
422 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq); 422 blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
423 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq); 423 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
424 424
425 if (tr->discard) { 425 if (tr->discard) {
426 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq); 426 blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
427 blk_queue_max_discard_sectors(new->rq, UINT_MAX); 427 blk_queue_max_discard_sectors(new->rq, UINT_MAX);
428 } 428 }
429 429
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 1bd7b3734751..62e9cb167aad 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -266,7 +266,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
266 blk_queue_make_request(q, nd_blk_make_request); 266 blk_queue_make_request(q, nd_blk_make_request);
267 blk_queue_max_hw_sectors(q, UINT_MAX); 267 blk_queue_max_hw_sectors(q, UINT_MAX);
268 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk)); 268 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
269 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 269 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
270 q->queuedata = nsblk; 270 q->queuedata = nsblk;
271 271
272 disk = alloc_disk(0); 272 disk = alloc_disk(0);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 4b95ac513de2..85de8053aa34 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1542,7 +1542,7 @@ static int btt_blk_init(struct btt *btt)
1542 blk_queue_make_request(btt->btt_queue, btt_make_request); 1542 blk_queue_make_request(btt->btt_queue, btt_make_request);
1543 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); 1543 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
1544 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); 1544 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
1545 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); 1545 blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
1546 btt->btt_queue->queuedata = btt; 1546 btt->btt_queue->queuedata = btt;
1547 1547
1548 if (btt_meta_size(btt)) { 1548 if (btt_meta_size(btt)) {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 8d6375ee0fda..184e070d50a2 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -29,7 +29,6 @@ enum {
29 * BTT instance 29 * BTT instance
30 */ 30 */
31 ND_MAX_LANES = 256, 31 ND_MAX_LANES = 256,
32 SECTOR_SHIFT = 9,
33 INT_LBASIZE_ALIGNMENT = 64, 32 INT_LBASIZE_ALIGNMENT = 64,
34 NVDIMM_IO_ATOMIC = 1, 33 NVDIMM_IO_ATOMIC = 1,
35}; 34};
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 06f8dcc52ca6..5a96d30c294a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -343,7 +343,7 @@ static int pmem_attach_disk(struct device *dev,
343 return -EBUSY; 343 return -EBUSY;
344 } 344 }
345 345
346 q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); 346 q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL);
347 if (!q) 347 if (!q)
348 return -ENOMEM; 348 return -ENOMEM;
349 349
@@ -387,8 +387,8 @@ static int pmem_attach_disk(struct device *dev,
387 blk_queue_physical_block_size(q, PAGE_SIZE); 387 blk_queue_physical_block_size(q, PAGE_SIZE);
388 blk_queue_logical_block_size(q, pmem_sector_size(ndns)); 388 blk_queue_logical_block_size(q, pmem_sector_size(ndns));
389 blk_queue_max_hw_sectors(q, UINT_MAX); 389 blk_queue_max_hw_sectors(q, UINT_MAX);
390 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 390 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
391 queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); 391 blk_queue_flag_set(QUEUE_FLAG_DAX, q);
392 q->queuedata = pmem; 392 q->queuedata = pmem;
393 393
394 disk = alloc_disk_node(0, nid); 394 disk = alloc_disk_node(0, nid);
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 441e67e3a9d7..aea459c65ae1 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -12,6 +12,7 @@ nvme-core-y := core.o
12nvme-core-$(CONFIG_TRACING) += trace.o 12nvme-core-$(CONFIG_TRACING) += trace.o
13nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o 13nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
14nvme-core-$(CONFIG_NVM) += lightnvm.o 14nvme-core-$(CONFIG_NVM) += lightnvm.o
15nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
15 16
16nvme-y += pci.o 17nvme-y += pci.o
17 18
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7aeca5db7916..197a6ba9700f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -100,11 +100,6 @@ static struct class *nvme_subsys_class;
100static void nvme_ns_remove(struct nvme_ns *ns); 100static void nvme_ns_remove(struct nvme_ns *ns);
101static int nvme_revalidate_disk(struct gendisk *disk); 101static int nvme_revalidate_disk(struct gendisk *disk);
102 102
103static __le32 nvme_get_log_dw10(u8 lid, size_t size)
104{
105 return cpu_to_le32((((size / 4) - 1) << 16) | lid);
106}
107
108int nvme_reset_ctrl(struct nvme_ctrl *ctrl) 103int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
109{ 104{
110 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) 105 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -135,6 +130,9 @@ static void nvme_delete_ctrl_work(struct work_struct *work)
135 struct nvme_ctrl *ctrl = 130 struct nvme_ctrl *ctrl =
136 container_of(work, struct nvme_ctrl, delete_work); 131 container_of(work, struct nvme_ctrl, delete_work);
137 132
133 dev_info(ctrl->device,
134 "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn);
135
138 flush_work(&ctrl->reset_work); 136 flush_work(&ctrl->reset_work);
139 nvme_stop_ctrl(ctrl); 137 nvme_stop_ctrl(ctrl);
140 nvme_remove_namespaces(ctrl); 138 nvme_remove_namespaces(ctrl);
@@ -948,7 +946,8 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
948 c.identify.opcode = nvme_admin_identify; 946 c.identify.opcode = nvme_admin_identify;
949 c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; 947 c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
950 c.identify.nsid = cpu_to_le32(nsid); 948 c.identify.nsid = cpu_to_le32(nsid);
951 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); 949 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list,
950 NVME_IDENTIFY_DATA_SIZE);
952} 951}
953 952
954static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, 953static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
@@ -1124,13 +1123,13 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
1124 struct nvme_ns *ns, *next; 1123 struct nvme_ns *ns, *next;
1125 LIST_HEAD(rm_list); 1124 LIST_HEAD(rm_list);
1126 1125
1127 mutex_lock(&ctrl->namespaces_mutex); 1126 down_write(&ctrl->namespaces_rwsem);
1128 list_for_each_entry(ns, &ctrl->namespaces, list) { 1127 list_for_each_entry(ns, &ctrl->namespaces, list) {
1129 if (ns->disk && nvme_revalidate_disk(ns->disk)) { 1128 if (ns->disk && nvme_revalidate_disk(ns->disk)) {
1130 list_move_tail(&ns->list, &rm_list); 1129 list_move_tail(&ns->list, &rm_list);
1131 } 1130 }
1132 } 1131 }
1133 mutex_unlock(&ctrl->namespaces_mutex); 1132 up_write(&ctrl->namespaces_rwsem);
1134 1133
1135 list_for_each_entry_safe(ns, next, &rm_list, list) 1134 list_for_each_entry_safe(ns, next, &rm_list, list)
1136 nvme_ns_remove(ns); 1135 nvme_ns_remove(ns);
@@ -1358,7 +1357,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
1358 1357
1359 blk_queue_max_discard_sectors(queue, UINT_MAX); 1358 blk_queue_max_discard_sectors(queue, UINT_MAX);
1360 blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); 1359 blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
1361 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue); 1360 blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
1362 1361
1363 if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) 1362 if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
1364 blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); 1363 blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -1449,6 +1448,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1449 if (ns->noiob) 1448 if (ns->noiob)
1450 nvme_set_chunk_size(ns); 1449 nvme_set_chunk_size(ns);
1451 nvme_update_disk_info(disk, ns, id); 1450 nvme_update_disk_info(disk, ns, id);
1451 if (ns->ndev)
1452 nvme_nvm_update_nvm_info(ns);
1452#ifdef CONFIG_NVME_MULTIPATH 1453#ifdef CONFIG_NVME_MULTIPATH
1453 if (ns->head->disk) 1454 if (ns->head->disk)
1454 nvme_update_disk_info(ns->head->disk, ns, id); 1455 nvme_update_disk_info(ns->head->disk, ns, id);
@@ -2217,18 +2218,35 @@ out_unlock:
2217 return ret; 2218 return ret;
2218} 2219}
2219 2220
2220static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, 2221int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
2221 size_t size) 2222 u8 log_page, void *log,
2223 size_t size, size_t offset)
2222{ 2224{
2223 struct nvme_command c = { }; 2225 struct nvme_command c = { };
2226 unsigned long dwlen = size / 4 - 1;
2227
2228 c.get_log_page.opcode = nvme_admin_get_log_page;
2229
2230 if (ns)
2231 c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
2232 else
2233 c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
2224 2234
2225 c.common.opcode = nvme_admin_get_log_page; 2235 c.get_log_page.lid = log_page;
2226 c.common.nsid = cpu_to_le32(NVME_NSID_ALL); 2236 c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
2227 c.common.cdw10[0] = nvme_get_log_dw10(log_page, size); 2237 c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
2238 c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1));
2239 c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL);
2228 2240
2229 return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); 2241 return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
2230} 2242}
2231 2243
2244static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
2245 size_t size)
2246{
2247 return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
2248}
2249
2232static int nvme_get_effects_log(struct nvme_ctrl *ctrl) 2250static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
2233{ 2251{
2234 int ret; 2252 int ret;
@@ -2440,7 +2458,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
2440 struct nvme_ns *ns; 2458 struct nvme_ns *ns;
2441 int ret; 2459 int ret;
2442 2460
2443 mutex_lock(&ctrl->namespaces_mutex); 2461 down_read(&ctrl->namespaces_rwsem);
2444 if (list_empty(&ctrl->namespaces)) { 2462 if (list_empty(&ctrl->namespaces)) {
2445 ret = -ENOTTY; 2463 ret = -ENOTTY;
2446 goto out_unlock; 2464 goto out_unlock;
@@ -2457,14 +2475,14 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
2457 dev_warn(ctrl->device, 2475 dev_warn(ctrl->device,
2458 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 2476 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
2459 kref_get(&ns->kref); 2477 kref_get(&ns->kref);
2460 mutex_unlock(&ctrl->namespaces_mutex); 2478 up_read(&ctrl->namespaces_rwsem);
2461 2479
2462 ret = nvme_user_cmd(ctrl, ns, argp); 2480 ret = nvme_user_cmd(ctrl, ns, argp);
2463 nvme_put_ns(ns); 2481 nvme_put_ns(ns);
2464 return ret; 2482 return ret;
2465 2483
2466out_unlock: 2484out_unlock:
2467 mutex_unlock(&ctrl->namespaces_mutex); 2485 up_read(&ctrl->namespaces_rwsem);
2468 return ret; 2486 return ret;
2469} 2487}
2470 2488
@@ -2793,6 +2811,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
2793 2811
2794 list_for_each_entry(h, &subsys->nsheads, entry) { 2812 list_for_each_entry(h, &subsys->nsheads, entry) {
2795 if (nvme_ns_ids_valid(&new->ids) && 2813 if (nvme_ns_ids_valid(&new->ids) &&
2814 !list_empty(&h->list) &&
2796 nvme_ns_ids_equal(&new->ids, &h->ids)) 2815 nvme_ns_ids_equal(&new->ids, &h->ids))
2797 return -EINVAL; 2816 return -EINVAL;
2798 } 2817 }
@@ -2893,7 +2912,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2893{ 2912{
2894 struct nvme_ns *ns, *ret = NULL; 2913 struct nvme_ns *ns, *ret = NULL;
2895 2914
2896 mutex_lock(&ctrl->namespaces_mutex); 2915 down_read(&ctrl->namespaces_rwsem);
2897 list_for_each_entry(ns, &ctrl->namespaces, list) { 2916 list_for_each_entry(ns, &ctrl->namespaces, list) {
2898 if (ns->head->ns_id == nsid) { 2917 if (ns->head->ns_id == nsid) {
2899 if (!kref_get_unless_zero(&ns->kref)) 2918 if (!kref_get_unless_zero(&ns->kref))
@@ -2904,7 +2923,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2904 if (ns->head->ns_id > nsid) 2923 if (ns->head->ns_id > nsid)
2905 break; 2924 break;
2906 } 2925 }
2907 mutex_unlock(&ctrl->namespaces_mutex); 2926 up_read(&ctrl->namespaces_rwsem);
2908 return ret; 2927 return ret;
2909} 2928}
2910 2929
@@ -2949,7 +2968,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2949 ns->queue = blk_mq_init_queue(ctrl->tagset); 2968 ns->queue = blk_mq_init_queue(ctrl->tagset);
2950 if (IS_ERR(ns->queue)) 2969 if (IS_ERR(ns->queue))
2951 goto out_free_ns; 2970 goto out_free_ns;
2952 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); 2971 blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
2953 ns->queue->queuedata = ns; 2972 ns->queue->queuedata = ns;
2954 ns->ctrl = ctrl; 2973 ns->ctrl = ctrl;
2955 2974
@@ -3015,9 +3034,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
3015 3034
3016 __nvme_revalidate_disk(disk, id); 3035 __nvme_revalidate_disk(disk, id);
3017 3036
3018 mutex_lock(&ctrl->namespaces_mutex); 3037 down_write(&ctrl->namespaces_rwsem);
3019 list_add_tail(&ns->list, &ctrl->namespaces); 3038 list_add_tail(&ns->list, &ctrl->namespaces);
3020 mutex_unlock(&ctrl->namespaces_mutex); 3039 up_write(&ctrl->namespaces_rwsem);
3021 3040
3022 nvme_get_ctrl(ctrl); 3041 nvme_get_ctrl(ctrl);
3023 3042
@@ -3033,6 +3052,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
3033 ns->disk->disk_name); 3052 ns->disk->disk_name);
3034 3053
3035 nvme_mpath_add_disk(ns->head); 3054 nvme_mpath_add_disk(ns->head);
3055 nvme_fault_inject_init(ns);
3036 return; 3056 return;
3037 out_unlink_ns: 3057 out_unlink_ns:
3038 mutex_lock(&ctrl->subsys->lock); 3058 mutex_lock(&ctrl->subsys->lock);
@@ -3051,6 +3071,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
3051 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) 3071 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
3052 return; 3072 return;
3053 3073
3074 nvme_fault_inject_fini(ns);
3054 if (ns->disk && ns->disk->flags & GENHD_FL_UP) { 3075 if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
3055 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 3076 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
3056 &nvme_ns_id_attr_group); 3077 &nvme_ns_id_attr_group);
@@ -3067,9 +3088,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
3067 list_del_rcu(&ns->siblings); 3088 list_del_rcu(&ns->siblings);
3068 mutex_unlock(&ns->ctrl->subsys->lock); 3089 mutex_unlock(&ns->ctrl->subsys->lock);
3069 3090
3070 mutex_lock(&ns->ctrl->namespaces_mutex); 3091 down_write(&ns->ctrl->namespaces_rwsem);
3071 list_del_init(&ns->list); 3092 list_del_init(&ns->list);
3072 mutex_unlock(&ns->ctrl->namespaces_mutex); 3093 up_write(&ns->ctrl->namespaces_rwsem);
3073 3094
3074 synchronize_srcu(&ns->head->srcu); 3095 synchronize_srcu(&ns->head->srcu);
3075 nvme_mpath_check_last_path(ns); 3096 nvme_mpath_check_last_path(ns);
@@ -3093,11 +3114,18 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
3093 unsigned nsid) 3114 unsigned nsid)
3094{ 3115{
3095 struct nvme_ns *ns, *next; 3116 struct nvme_ns *ns, *next;
3117 LIST_HEAD(rm_list);
3096 3118
3119 down_write(&ctrl->namespaces_rwsem);
3097 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { 3120 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
3098 if (ns->head->ns_id > nsid) 3121 if (ns->head->ns_id > nsid)
3099 nvme_ns_remove(ns); 3122 list_move_tail(&ns->list, &rm_list);
3100 } 3123 }
3124 up_write(&ctrl->namespaces_rwsem);
3125
3126 list_for_each_entry_safe(ns, next, &rm_list, list)
3127 nvme_ns_remove(ns);
3128
3101} 3129}
3102 3130
3103static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) 3131static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
@@ -3107,7 +3135,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
3107 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); 3135 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
3108 int ret = 0; 3136 int ret = 0;
3109 3137
3110 ns_list = kzalloc(0x1000, GFP_KERNEL); 3138 ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
3111 if (!ns_list) 3139 if (!ns_list)
3112 return -ENOMEM; 3140 return -ENOMEM;
3113 3141
@@ -3173,9 +3201,9 @@ static void nvme_scan_work(struct work_struct *work)
3173 } 3201 }
3174 nvme_scan_ns_sequential(ctrl, nn); 3202 nvme_scan_ns_sequential(ctrl, nn);
3175 done: 3203 done:
3176 mutex_lock(&ctrl->namespaces_mutex); 3204 down_write(&ctrl->namespaces_rwsem);
3177 list_sort(NULL, &ctrl->namespaces, ns_cmp); 3205 list_sort(NULL, &ctrl->namespaces, ns_cmp);
3178 mutex_unlock(&ctrl->namespaces_mutex); 3206 up_write(&ctrl->namespaces_rwsem);
3179 kfree(id); 3207 kfree(id);
3180} 3208}
3181 3209
@@ -3197,6 +3225,7 @@ EXPORT_SYMBOL_GPL(nvme_queue_scan);
3197void nvme_remove_namespaces(struct nvme_ctrl *ctrl) 3225void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
3198{ 3226{
3199 struct nvme_ns *ns, *next; 3227 struct nvme_ns *ns, *next;
3228 LIST_HEAD(ns_list);
3200 3229
3201 /* 3230 /*
3202 * The dead states indicates the controller was not gracefully 3231 * The dead states indicates the controller was not gracefully
@@ -3207,7 +3236,11 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
3207 if (ctrl->state == NVME_CTRL_DEAD) 3236 if (ctrl->state == NVME_CTRL_DEAD)
3208 nvme_kill_queues(ctrl); 3237 nvme_kill_queues(ctrl);
3209 3238
3210 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) 3239 down_write(&ctrl->namespaces_rwsem);
3240 list_splice_init(&ctrl->namespaces, &ns_list);
3241 up_write(&ctrl->namespaces_rwsem);
3242
3243 list_for_each_entry_safe(ns, next, &ns_list, list)
3211 nvme_ns_remove(ns); 3244 nvme_ns_remove(ns);
3212} 3245}
3213EXPORT_SYMBOL_GPL(nvme_remove_namespaces); 3246EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
@@ -3337,6 +3370,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
3337 flush_work(&ctrl->async_event_work); 3370 flush_work(&ctrl->async_event_work);
3338 flush_work(&ctrl->scan_work); 3371 flush_work(&ctrl->scan_work);
3339 cancel_work_sync(&ctrl->fw_act_work); 3372 cancel_work_sync(&ctrl->fw_act_work);
3373 if (ctrl->ops->stop_ctrl)
3374 ctrl->ops->stop_ctrl(ctrl);
3340} 3375}
3341EXPORT_SYMBOL_GPL(nvme_stop_ctrl); 3376EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
3342 3377
@@ -3394,7 +3429,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
3394 ctrl->state = NVME_CTRL_NEW; 3429 ctrl->state = NVME_CTRL_NEW;
3395 spin_lock_init(&ctrl->lock); 3430 spin_lock_init(&ctrl->lock);
3396 INIT_LIST_HEAD(&ctrl->namespaces); 3431 INIT_LIST_HEAD(&ctrl->namespaces);
3397 mutex_init(&ctrl->namespaces_mutex); 3432 init_rwsem(&ctrl->namespaces_rwsem);
3398 ctrl->dev = dev; 3433 ctrl->dev = dev;
3399 ctrl->ops = ops; 3434 ctrl->ops = ops;
3400 ctrl->quirks = quirks; 3435 ctrl->quirks = quirks;
@@ -3455,7 +3490,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
3455{ 3490{
3456 struct nvme_ns *ns; 3491 struct nvme_ns *ns;
3457 3492
3458 mutex_lock(&ctrl->namespaces_mutex); 3493 down_read(&ctrl->namespaces_rwsem);
3459 3494
3460 /* Forcibly unquiesce queues to avoid blocking dispatch */ 3495 /* Forcibly unquiesce queues to avoid blocking dispatch */
3461 if (ctrl->admin_q) 3496 if (ctrl->admin_q)
@@ -3474,7 +3509,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
3474 /* Forcibly unquiesce queues to avoid blocking dispatch */ 3509 /* Forcibly unquiesce queues to avoid blocking dispatch */
3475 blk_mq_unquiesce_queue(ns->queue); 3510 blk_mq_unquiesce_queue(ns->queue);
3476 } 3511 }
3477 mutex_unlock(&ctrl->namespaces_mutex); 3512 up_read(&ctrl->namespaces_rwsem);
3478} 3513}
3479EXPORT_SYMBOL_GPL(nvme_kill_queues); 3514EXPORT_SYMBOL_GPL(nvme_kill_queues);
3480 3515
@@ -3482,10 +3517,10 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
3482{ 3517{
3483 struct nvme_ns *ns; 3518 struct nvme_ns *ns;
3484 3519
3485 mutex_lock(&ctrl->namespaces_mutex); 3520 down_read(&ctrl->namespaces_rwsem);
3486 list_for_each_entry(ns, &ctrl->namespaces, list) 3521 list_for_each_entry(ns, &ctrl->namespaces, list)
3487 blk_mq_unfreeze_queue(ns->queue); 3522 blk_mq_unfreeze_queue(ns->queue);
3488 mutex_unlock(&ctrl->namespaces_mutex); 3523 up_read(&ctrl->namespaces_rwsem);
3489} 3524}
3490EXPORT_SYMBOL_GPL(nvme_unfreeze); 3525EXPORT_SYMBOL_GPL(nvme_unfreeze);
3491 3526
@@ -3493,13 +3528,13 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
3493{ 3528{
3494 struct nvme_ns *ns; 3529 struct nvme_ns *ns;
3495 3530
3496 mutex_lock(&ctrl->namespaces_mutex); 3531 down_read(&ctrl->namespaces_rwsem);
3497 list_for_each_entry(ns, &ctrl->namespaces, list) { 3532 list_for_each_entry(ns, &ctrl->namespaces, list) {
3498 timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); 3533 timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
3499 if (timeout <= 0) 3534 if (timeout <= 0)
3500 break; 3535 break;
3501 } 3536 }
3502 mutex_unlock(&ctrl->namespaces_mutex); 3537 up_read(&ctrl->namespaces_rwsem);
3503} 3538}
3504EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); 3539EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
3505 3540
@@ -3507,10 +3542,10 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl)
3507{ 3542{
3508 struct nvme_ns *ns; 3543 struct nvme_ns *ns;
3509 3544
3510 mutex_lock(&ctrl->namespaces_mutex); 3545 down_read(&ctrl->namespaces_rwsem);
3511 list_for_each_entry(ns, &ctrl->namespaces, list) 3546 list_for_each_entry(ns, &ctrl->namespaces, list)
3512 blk_mq_freeze_queue_wait(ns->queue); 3547 blk_mq_freeze_queue_wait(ns->queue);
3513 mutex_unlock(&ctrl->namespaces_mutex); 3548 up_read(&ctrl->namespaces_rwsem);
3514} 3549}
3515EXPORT_SYMBOL_GPL(nvme_wait_freeze); 3550EXPORT_SYMBOL_GPL(nvme_wait_freeze);
3516 3551
@@ -3518,10 +3553,10 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
3518{ 3553{
3519 struct nvme_ns *ns; 3554 struct nvme_ns *ns;
3520 3555
3521 mutex_lock(&ctrl->namespaces_mutex); 3556 down_read(&ctrl->namespaces_rwsem);
3522 list_for_each_entry(ns, &ctrl->namespaces, list) 3557 list_for_each_entry(ns, &ctrl->namespaces, list)
3523 blk_freeze_queue_start(ns->queue); 3558 blk_freeze_queue_start(ns->queue);
3524 mutex_unlock(&ctrl->namespaces_mutex); 3559 up_read(&ctrl->namespaces_rwsem);
3525} 3560}
3526EXPORT_SYMBOL_GPL(nvme_start_freeze); 3561EXPORT_SYMBOL_GPL(nvme_start_freeze);
3527 3562
@@ -3529,10 +3564,10 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
3529{ 3564{
3530 struct nvme_ns *ns; 3565 struct nvme_ns *ns;
3531 3566
3532 mutex_lock(&ctrl->namespaces_mutex); 3567 down_read(&ctrl->namespaces_rwsem);
3533 list_for_each_entry(ns, &ctrl->namespaces, list) 3568 list_for_each_entry(ns, &ctrl->namespaces, list)
3534 blk_mq_quiesce_queue(ns->queue); 3569 blk_mq_quiesce_queue(ns->queue);
3535 mutex_unlock(&ctrl->namespaces_mutex); 3570 up_read(&ctrl->namespaces_rwsem);
3536} 3571}
3537EXPORT_SYMBOL_GPL(nvme_stop_queues); 3572EXPORT_SYMBOL_GPL(nvme_stop_queues);
3538 3573
@@ -3540,10 +3575,10 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
3540{ 3575{
3541 struct nvme_ns *ns; 3576 struct nvme_ns *ns;
3542 3577
3543 mutex_lock(&ctrl->namespaces_mutex); 3578 down_read(&ctrl->namespaces_rwsem);
3544 list_for_each_entry(ns, &ctrl->namespaces, list) 3579 list_for_each_entry(ns, &ctrl->namespaces, list)
3545 blk_mq_unquiesce_queue(ns->queue); 3580 blk_mq_unquiesce_queue(ns->queue);
3546 mutex_unlock(&ctrl->namespaces_mutex); 3581 up_read(&ctrl->namespaces_rwsem);
3547} 3582}
3548EXPORT_SYMBOL_GPL(nvme_start_queues); 3583EXPORT_SYMBOL_GPL(nvme_start_queues);
3549 3584
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
new file mode 100644
index 000000000000..02632266ac06
--- /dev/null
+++ b/drivers/nvme/host/fault_inject.c
@@ -0,0 +1,79 @@
1/*
2 * fault injection support for nvme.
3 *
4 * Copyright (c) 2018, Oracle and/or its affiliates
5 *
6 */
7
8#include <linux/moduleparam.h>
9#include "nvme.h"
10
11static DECLARE_FAULT_ATTR(fail_default_attr);
12/* optional fault injection attributes boot time option:
13 * nvme_core.fail_request=<interval>,<probability>,<space>,<times>
14 */
15static char *fail_request;
16module_param(fail_request, charp, 0000);
17
18void nvme_fault_inject_init(struct nvme_ns *ns)
19{
20 struct dentry *dir, *parent;
21 char *name = ns->disk->disk_name;
22 struct nvme_fault_inject *fault_inj = &ns->fault_inject;
23 struct fault_attr *attr = &fault_inj->attr;
24
25 /* set default fault injection attribute */
26 if (fail_request)
27 setup_fault_attr(&fail_default_attr, fail_request);
28
29 /* create debugfs directory and attribute */
30 parent = debugfs_create_dir(name, NULL);
31 if (!parent) {
32 pr_warn("%s: failed to create debugfs directory\n", name);
33 return;
34 }
35
36 *attr = fail_default_attr;
37 dir = fault_create_debugfs_attr("fault_inject", parent, attr);
38 if (IS_ERR(dir)) {
39 pr_warn("%s: failed to create debugfs attr\n", name);
40 debugfs_remove_recursive(parent);
41 return;
42 }
43 ns->fault_inject.parent = parent;
44
45 /* create debugfs for status code and dont_retry */
46 fault_inj->status = NVME_SC_INVALID_OPCODE;
47 fault_inj->dont_retry = true;
48 debugfs_create_x16("status", 0600, dir, &fault_inj->status);
49 debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry);
50}
51
52void nvme_fault_inject_fini(struct nvme_ns *ns)
53{
54 /* remove debugfs directories */
55 debugfs_remove_recursive(ns->fault_inject.parent);
56}
57
58void nvme_should_fail(struct request *req)
59{
60 struct gendisk *disk = req->rq_disk;
61 struct nvme_ns *ns = NULL;
62 u16 status;
63
64 /*
65 * make sure this request is coming from a valid namespace
66 */
67 if (!disk)
68 return;
69
70 ns = disk->private_data;
71 if (ns && should_fail(&ns->fault_inject.attr, 1)) {
72 /* inject status code and DNR bit */
73 status = ns->fault_inject.status;
74 if (ns->fault_inject.dont_retry)
75 status |= NVME_SC_DNR;
76 nvme_req(req)->status = status;
77 }
78}
79EXPORT_SYMBOL_GPL(nvme_should_fail);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 1dc1387b7134..c6e719b2f3ca 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -588,6 +588,8 @@ nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
588 return ERR_PTR(-ESTALE); 588 return ERR_PTR(-ESTALE);
589 } 589 }
590 590
591 rport->remoteport.port_role = pinfo->port_role;
592 rport->remoteport.port_id = pinfo->port_id;
591 rport->remoteport.port_state = FC_OBJSTATE_ONLINE; 593 rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
592 rport->dev_loss_end = 0; 594 rport->dev_loss_end = 0;
593 595
@@ -768,8 +770,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
768 */ 770 */
769 if (nvme_reset_ctrl(&ctrl->ctrl)) { 771 if (nvme_reset_ctrl(&ctrl->ctrl)) {
770 dev_warn(ctrl->ctrl.device, 772 dev_warn(ctrl->ctrl.device,
771 "NVME-FC{%d}: Couldn't schedule reset. " 773 "NVME-FC{%d}: Couldn't schedule reset.\n",
772 "Deleting controller.\n",
773 ctrl->cnum); 774 ctrl->cnum);
774 nvme_delete_ctrl(&ctrl->ctrl); 775 nvme_delete_ctrl(&ctrl->ctrl);
775 } 776 }
@@ -836,8 +837,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
836 /* if dev_loss_tmo==0, dev loss is immediate */ 837 /* if dev_loss_tmo==0, dev loss is immediate */
837 if (!portptr->dev_loss_tmo) { 838 if (!portptr->dev_loss_tmo) {
838 dev_warn(ctrl->ctrl.device, 839 dev_warn(ctrl->ctrl.device,
839 "NVME-FC{%d}: controller connectivity lost. " 840 "NVME-FC{%d}: controller connectivity lost.\n",
840 "Deleting controller.\n",
841 ctrl->cnum); 841 ctrl->cnum);
842 nvme_delete_ctrl(&ctrl->ctrl); 842 nvme_delete_ctrl(&ctrl->ctrl);
843 } else 843 } else
@@ -2076,20 +2076,10 @@ nvme_fc_timeout(struct request *rq, bool reserved)
2076{ 2076{
2077 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2077 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2078 struct nvme_fc_ctrl *ctrl = op->ctrl; 2078 struct nvme_fc_ctrl *ctrl = op->ctrl;
2079 int ret;
2080
2081 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
2082 atomic_read(&op->state) == FCPOP_STATE_ABORTED)
2083 return BLK_EH_RESET_TIMER;
2084
2085 ret = __nvme_fc_abort_op(ctrl, op);
2086 if (ret)
2087 /* io wasn't active to abort */
2088 return BLK_EH_NOT_HANDLED;
2089 2079
2090 /* 2080 /*
2091 * we can't individually ABTS an io without affecting the queue, 2081 * we can't individually ABTS an io without affecting the queue,
2092 * thus killing the queue, adn thus the association. 2082 * thus killing the queue, and thus the association.
2093 * So resolve by performing a controller reset, which will stop 2083 * So resolve by performing a controller reset, which will stop
2094 * the host/io stack, terminate the association on the link, 2084 * the host/io stack, terminate the association on the link,
2095 * and recreate an association on the link. 2085 * and recreate an association on the link.
@@ -2191,7 +2181,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
2191 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2181 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2192 struct nvme_command *sqe = &cmdiu->sqe; 2182 struct nvme_command *sqe = &cmdiu->sqe;
2193 u32 csn; 2183 u32 csn;
2194 int ret; 2184 int ret, opstate;
2195 2185
2196 /* 2186 /*
2197 * before attempting to send the io, check to see if we believe 2187 * before attempting to send the io, check to see if we believe
@@ -2269,6 +2259,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
2269 queue->lldd_handle, &op->fcp_req); 2259 queue->lldd_handle, &op->fcp_req);
2270 2260
2271 if (ret) { 2261 if (ret) {
2262 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
2263 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
2264
2272 if (!(op->flags & FCOP_FLAGS_AEN)) 2265 if (!(op->flags & FCOP_FLAGS_AEN))
2273 nvme_fc_unmap_data(ctrl, op->rq, op); 2266 nvme_fc_unmap_data(ctrl, op->rq, op);
2274 2267
@@ -2889,14 +2882,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
2889 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2882 if (portptr->port_state == FC_OBJSTATE_ONLINE)
2890 dev_warn(ctrl->ctrl.device, 2883 dev_warn(ctrl->ctrl.device,
2891 "NVME-FC{%d}: Max reconnect attempts (%d) " 2884 "NVME-FC{%d}: Max reconnect attempts (%d) "
2892 "reached. Removing controller\n", 2885 "reached.\n",
2893 ctrl->cnum, ctrl->ctrl.nr_reconnects); 2886 ctrl->cnum, ctrl->ctrl.nr_reconnects);
2894 else 2887 else
2895 dev_warn(ctrl->ctrl.device, 2888 dev_warn(ctrl->ctrl.device,
2896 "NVME-FC{%d}: dev_loss_tmo (%d) expired " 2889 "NVME-FC{%d}: dev_loss_tmo (%d) expired "
2897 "while waiting for remoteport connectivity. " 2890 "while waiting for remoteport connectivity.\n",
2898 "Removing controller\n", ctrl->cnum, 2891 ctrl->cnum, portptr->dev_loss_tmo);
2899 portptr->dev_loss_tmo);
2900 WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); 2892 WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
2901 } 2893 }
2902} 2894}
@@ -3133,6 +3125,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3133 } 3125 }
3134 3126
3135 if (ret) { 3127 if (ret) {
3128 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
3129 cancel_work_sync(&ctrl->ctrl.reset_work);
3130 cancel_delayed_work_sync(&ctrl->connect_work);
3131
3136 /* couldn't schedule retry - fail out */ 3132 /* couldn't schedule retry - fail out */
3137 dev_err(ctrl->ctrl.device, 3133 dev_err(ctrl->ctrl.device,
3138 "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); 3134 "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 50ef71ee3d86..41279da799ed 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -35,6 +35,10 @@ enum nvme_nvm_admin_opcode {
35 nvme_nvm_admin_set_bb_tbl = 0xf1, 35 nvme_nvm_admin_set_bb_tbl = 0xf1,
36}; 36};
37 37
38enum nvme_nvm_log_page {
39 NVME_NVM_LOG_REPORT_CHUNK = 0xca,
40};
41
38struct nvme_nvm_ph_rw { 42struct nvme_nvm_ph_rw {
39 __u8 opcode; 43 __u8 opcode;
40 __u8 flags; 44 __u8 flags;
@@ -51,6 +55,21 @@ struct nvme_nvm_ph_rw {
51 __le64 resv; 55 __le64 resv;
52}; 56};
53 57
58struct nvme_nvm_erase_blk {
59 __u8 opcode;
60 __u8 flags;
61 __u16 command_id;
62 __le32 nsid;
63 __u64 rsvd[2];
64 __le64 prp1;
65 __le64 prp2;
66 __le64 spba;
67 __le16 length;
68 __le16 control;
69 __le32 dsmgmt;
70 __le64 resv;
71};
72
54struct nvme_nvm_identity { 73struct nvme_nvm_identity {
55 __u8 opcode; 74 __u8 opcode;
56 __u8 flags; 75 __u8 flags;
@@ -59,8 +78,7 @@ struct nvme_nvm_identity {
59 __u64 rsvd[2]; 78 __u64 rsvd[2];
60 __le64 prp1; 79 __le64 prp1;
61 __le64 prp2; 80 __le64 prp2;
62 __le32 chnl_off; 81 __u32 rsvd11[6];
63 __u32 rsvd11[5];
64}; 82};
65 83
66struct nvme_nvm_getbbtbl { 84struct nvme_nvm_getbbtbl {
@@ -90,44 +108,18 @@ struct nvme_nvm_setbbtbl {
90 __u32 rsvd4[3]; 108 __u32 rsvd4[3];
91}; 109};
92 110
93struct nvme_nvm_erase_blk {
94 __u8 opcode;
95 __u8 flags;
96 __u16 command_id;
97 __le32 nsid;
98 __u64 rsvd[2];
99 __le64 prp1;
100 __le64 prp2;
101 __le64 spba;
102 __le16 length;
103 __le16 control;
104 __le32 dsmgmt;
105 __le64 resv;
106};
107
108struct nvme_nvm_command { 111struct nvme_nvm_command {
109 union { 112 union {
110 struct nvme_common_command common; 113 struct nvme_common_command common;
111 struct nvme_nvm_identity identity;
112 struct nvme_nvm_ph_rw ph_rw; 114 struct nvme_nvm_ph_rw ph_rw;
115 struct nvme_nvm_erase_blk erase;
116 struct nvme_nvm_identity identity;
113 struct nvme_nvm_getbbtbl get_bb; 117 struct nvme_nvm_getbbtbl get_bb;
114 struct nvme_nvm_setbbtbl set_bb; 118 struct nvme_nvm_setbbtbl set_bb;
115 struct nvme_nvm_erase_blk erase;
116 }; 119 };
117}; 120};
118 121
119#define NVME_NVM_LP_MLC_PAIRS 886 122struct nvme_nvm_id12_grp {
120struct nvme_nvm_lp_mlc {
121 __le16 num_pairs;
122 __u8 pairs[NVME_NVM_LP_MLC_PAIRS];
123};
124
125struct nvme_nvm_lp_tbl {
126 __u8 id[8];
127 struct nvme_nvm_lp_mlc mlc;
128};
129
130struct nvme_nvm_id_group {
131 __u8 mtype; 123 __u8 mtype;
132 __u8 fmtype; 124 __u8 fmtype;
133 __le16 res16; 125 __le16 res16;
@@ -150,11 +142,10 @@ struct nvme_nvm_id_group {
150 __le32 mpos; 142 __le32 mpos;
151 __le32 mccap; 143 __le32 mccap;
152 __le16 cpar; 144 __le16 cpar;
153 __u8 reserved[10]; 145 __u8 reserved[906];
154 struct nvme_nvm_lp_tbl lptbl;
155} __packed; 146} __packed;
156 147
157struct nvme_nvm_addr_format { 148struct nvme_nvm_id12_addrf {
158 __u8 ch_offset; 149 __u8 ch_offset;
159 __u8 ch_len; 150 __u8 ch_len;
160 __u8 lun_offset; 151 __u8 lun_offset;
@@ -165,21 +156,22 @@ struct nvme_nvm_addr_format {
165 __u8 blk_len; 156 __u8 blk_len;
166 __u8 pg_offset; 157 __u8 pg_offset;
167 __u8 pg_len; 158 __u8 pg_len;
168 __u8 sect_offset; 159 __u8 sec_offset;
169 __u8 sect_len; 160 __u8 sec_len;
170 __u8 res[4]; 161 __u8 res[4];
171} __packed; 162} __packed;
172 163
173struct nvme_nvm_id { 164struct nvme_nvm_id12 {
174 __u8 ver_id; 165 __u8 ver_id;
175 __u8 vmnt; 166 __u8 vmnt;
176 __u8 cgrps; 167 __u8 cgrps;
177 __u8 res; 168 __u8 res;
178 __le32 cap; 169 __le32 cap;
179 __le32 dom; 170 __le32 dom;
180 struct nvme_nvm_addr_format ppaf; 171 struct nvme_nvm_id12_addrf ppaf;
181 __u8 resv[228]; 172 __u8 resv[228];
182 struct nvme_nvm_id_group groups[4]; 173 struct nvme_nvm_id12_grp grp;
174 __u8 resv2[2880];
183} __packed; 175} __packed;
184 176
185struct nvme_nvm_bb_tbl { 177struct nvme_nvm_bb_tbl {
@@ -196,6 +188,68 @@ struct nvme_nvm_bb_tbl {
196 __u8 blk[0]; 188 __u8 blk[0];
197}; 189};
198 190
191struct nvme_nvm_id20_addrf {
192 __u8 grp_len;
193 __u8 pu_len;
194 __u8 chk_len;
195 __u8 lba_len;
196 __u8 resv[4];
197};
198
199struct nvme_nvm_id20 {
200 __u8 mjr;
201 __u8 mnr;
202 __u8 resv[6];
203
204 struct nvme_nvm_id20_addrf lbaf;
205
206 __le32 mccap;
207 __u8 resv2[12];
208
209 __u8 wit;
210 __u8 resv3[31];
211
212 /* Geometry */
213 __le16 num_grp;
214 __le16 num_pu;
215 __le32 num_chk;
216 __le32 clba;
217 __u8 resv4[52];
218
219 /* Write data requirements */
220 __le32 ws_min;
221 __le32 ws_opt;
222 __le32 mw_cunits;
223 __le32 maxoc;
224 __le32 maxocpu;
225 __u8 resv5[44];
226
227 /* Performance related metrics */
228 __le32 trdt;
229 __le32 trdm;
230 __le32 twrt;
231 __le32 twrm;
232 __le32 tcrst;
233 __le32 tcrsm;
234 __u8 resv6[40];
235
236 /* Reserved area */
237 __u8 resv7[2816];
238
239 /* Vendor specific */
240 __u8 vs[1024];
241};
242
243struct nvme_nvm_chk_meta {
244 __u8 state;
245 __u8 type;
246 __u8 wi;
247 __u8 rsvd[5];
248 __le64 slba;
249 __le64 cnlb;
250 __le64 wp;
251};
252
199/* 253/*
200 * Check we didn't inadvertently grow the command struct 254 * Check we didn't inadvertently grow the command struct
201 */ 255 */
@@ -203,105 +257,238 @@ static inline void _nvme_nvm_check_size(void)
203{ 257{
204 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); 258 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
205 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); 259 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
260 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
206 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); 261 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
207 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); 262 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
208 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); 263 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960);
209 BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); 264 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16);
210 BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); 265 BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE);
211 BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
212 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); 266 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
267 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8);
268 BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE);
269 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32);
270 BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) !=
271 sizeof(struct nvm_chk_meta));
272}
273
274static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst,
275 struct nvme_nvm_id12_addrf *src)
276{
277 dst->ch_len = src->ch_len;
278 dst->lun_len = src->lun_len;
279 dst->blk_len = src->blk_len;
280 dst->pg_len = src->pg_len;
281 dst->pln_len = src->pln_len;
282 dst->sec_len = src->sec_len;
283
284 dst->ch_offset = src->ch_offset;
285 dst->lun_offset = src->lun_offset;
286 dst->blk_offset = src->blk_offset;
287 dst->pg_offset = src->pg_offset;
288 dst->pln_offset = src->pln_offset;
289 dst->sec_offset = src->sec_offset;
290
291 dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
292 dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
293 dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
294 dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
295 dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
296 dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
213} 297}
214 298
215static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) 299static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id,
300 struct nvm_geo *geo)
216{ 301{
217 struct nvme_nvm_id_group *src; 302 struct nvme_nvm_id12_grp *src;
218 struct nvm_id_group *grp;
219 int sec_per_pg, sec_per_pl, pg_per_blk; 303 int sec_per_pg, sec_per_pl, pg_per_blk;
220 304
221 if (nvme_nvm_id->cgrps != 1) 305 if (id->cgrps != 1)
306 return -EINVAL;
307
308 src = &id->grp;
309
310 if (src->mtype != 0) {
311 pr_err("nvm: memory type not supported\n");
222 return -EINVAL; 312 return -EINVAL;
313 }
314
315 /* 1.2 spec. only reports a single version id - unfold */
316 geo->major_ver_id = id->ver_id;
317 geo->minor_ver_id = 2;
223 318
224 src = &nvme_nvm_id->groups[0]; 319 /* Set compacted version for upper layers */
225 grp = &nvm_id->grp; 320 geo->version = NVM_OCSSD_SPEC_12;
226 321
227 grp->mtype = src->mtype; 322 geo->num_ch = src->num_ch;
228 grp->fmtype = src->fmtype; 323 geo->num_lun = src->num_lun;
324 geo->all_luns = geo->num_ch * geo->num_lun;
229 325
230 grp->num_ch = src->num_ch; 326 geo->num_chk = le16_to_cpu(src->num_chk);
231 grp->num_lun = src->num_lun;
232 327
233 grp->num_chk = le16_to_cpu(src->num_chk); 328 geo->csecs = le16_to_cpu(src->csecs);
234 grp->csecs = le16_to_cpu(src->csecs); 329 geo->sos = le16_to_cpu(src->sos);
235 grp->sos = le16_to_cpu(src->sos);
236 330
237 pg_per_blk = le16_to_cpu(src->num_pg); 331 pg_per_blk = le16_to_cpu(src->num_pg);
238 sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs; 332 sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs;
239 sec_per_pl = sec_per_pg * src->num_pln; 333 sec_per_pl = sec_per_pg * src->num_pln;
240 grp->clba = sec_per_pl * pg_per_blk; 334 geo->clba = sec_per_pl * pg_per_blk;
241 grp->ws_per_chk = pg_per_blk; 335
242 336 geo->all_chunks = geo->all_luns * geo->num_chk;
243 grp->mpos = le32_to_cpu(src->mpos); 337 geo->total_secs = geo->clba * geo->all_chunks;
244 grp->cpar = le16_to_cpu(src->cpar); 338
245 grp->mccap = le32_to_cpu(src->mccap); 339 geo->ws_min = sec_per_pg;
246 340 geo->ws_opt = sec_per_pg;
247 grp->ws_opt = grp->ws_min = sec_per_pg; 341 geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */
248 grp->ws_seq = NVM_IO_SNGL_ACCESS;
249
250 if (grp->mpos & 0x020202) {
251 grp->ws_seq = NVM_IO_DUAL_ACCESS;
252 grp->ws_opt <<= 1;
253 } else if (grp->mpos & 0x040404) {
254 grp->ws_seq = NVM_IO_QUAD_ACCESS;
255 grp->ws_opt <<= 2;
256 }
257 342
258 grp->trdt = le32_to_cpu(src->trdt); 343 /* Do not impose values for maximum number of open blocks as it is
259 grp->trdm = le32_to_cpu(src->trdm); 344 * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
260 grp->tprt = le32_to_cpu(src->tprt); 345 * specify these values through a quirk if restrictions apply.
261 grp->tprm = le32_to_cpu(src->tprm); 346 */
262 grp->tbet = le32_to_cpu(src->tbet); 347 geo->maxoc = geo->all_luns * geo->num_chk;
263 grp->tbem = le32_to_cpu(src->tbem); 348 geo->maxocpu = geo->num_chk;
349
350 geo->mccap = le32_to_cpu(src->mccap);
351
352 geo->trdt = le32_to_cpu(src->trdt);
353 geo->trdm = le32_to_cpu(src->trdm);
354 geo->tprt = le32_to_cpu(src->tprt);
355 geo->tprm = le32_to_cpu(src->tprm);
356 geo->tbet = le32_to_cpu(src->tbet);
357 geo->tbem = le32_to_cpu(src->tbem);
264 358
265 /* 1.2 compatibility */ 359 /* 1.2 compatibility */
266 grp->num_pln = src->num_pln; 360 geo->vmnt = id->vmnt;
267 grp->num_pg = le16_to_cpu(src->num_pg); 361 geo->cap = le32_to_cpu(id->cap);
268 grp->fpg_sz = le16_to_cpu(src->fpg_sz); 362 geo->dom = le32_to_cpu(id->dom);
363
364 geo->mtype = src->mtype;
365 geo->fmtype = src->fmtype;
366
367 geo->cpar = le16_to_cpu(src->cpar);
368 geo->mpos = le32_to_cpu(src->mpos);
369
370 geo->pln_mode = NVM_PLANE_SINGLE;
371
372 if (geo->mpos & 0x020202) {
373 geo->pln_mode = NVM_PLANE_DOUBLE;
374 geo->ws_opt <<= 1;
375 } else if (geo->mpos & 0x040404) {
376 geo->pln_mode = NVM_PLANE_QUAD;
377 geo->ws_opt <<= 2;
378 }
379
380 geo->num_pln = src->num_pln;
381 geo->num_pg = le16_to_cpu(src->num_pg);
382 geo->fpg_sz = le16_to_cpu(src->fpg_sz);
383
384 nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf);
269 385
270 return 0; 386 return 0;
271} 387}
272 388
273static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) 389static void nvme_nvm_set_addr_20(struct nvm_addrf *dst,
390 struct nvme_nvm_id20_addrf *src)
391{
392 dst->ch_len = src->grp_len;
393 dst->lun_len = src->pu_len;
394 dst->chk_len = src->chk_len;
395 dst->sec_len = src->lba_len;
396
397 dst->sec_offset = 0;
398 dst->chk_offset = dst->sec_len;
399 dst->lun_offset = dst->chk_offset + dst->chk_len;
400 dst->ch_offset = dst->lun_offset + dst->lun_len;
401
402 dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
403 dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
404 dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset;
405 dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
406}
407
408static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id,
409 struct nvm_geo *geo)
410{
411 geo->major_ver_id = id->mjr;
412 geo->minor_ver_id = id->mnr;
413
414 /* Set compacted version for upper layers */
415 geo->version = NVM_OCSSD_SPEC_20;
416
417 if (!(geo->major_ver_id == 2 && geo->minor_ver_id == 0)) {
418 pr_err("nvm: OCSSD version not supported (v%d.%d)\n",
419 geo->major_ver_id, geo->minor_ver_id);
420 return -EINVAL;
421 }
422
423 geo->num_ch = le16_to_cpu(id->num_grp);
424 geo->num_lun = le16_to_cpu(id->num_pu);
425 geo->all_luns = geo->num_ch * geo->num_lun;
426
427 geo->num_chk = le32_to_cpu(id->num_chk);
428 geo->clba = le32_to_cpu(id->clba);
429
430 geo->all_chunks = geo->all_luns * geo->num_chk;
431 geo->total_secs = geo->clba * geo->all_chunks;
432
433 geo->ws_min = le32_to_cpu(id->ws_min);
434 geo->ws_opt = le32_to_cpu(id->ws_opt);
435 geo->mw_cunits = le32_to_cpu(id->mw_cunits);
436 geo->maxoc = le32_to_cpu(id->maxoc);
437 geo->maxocpu = le32_to_cpu(id->maxocpu);
438
439 geo->trdt = le32_to_cpu(id->trdt);
440 geo->trdm = le32_to_cpu(id->trdm);
441 geo->tprt = le32_to_cpu(id->twrt);
442 geo->tprm = le32_to_cpu(id->twrm);
443 geo->tbet = le32_to_cpu(id->tcrst);
444 geo->tbem = le32_to_cpu(id->tcrsm);
445
446 nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf);
447
448 return 0;
449}
450
451static int nvme_nvm_identity(struct nvm_dev *nvmdev)
274{ 452{
275 struct nvme_ns *ns = nvmdev->q->queuedata; 453 struct nvme_ns *ns = nvmdev->q->queuedata;
276 struct nvme_nvm_id *nvme_nvm_id; 454 struct nvme_nvm_id12 *id;
277 struct nvme_nvm_command c = {}; 455 struct nvme_nvm_command c = {};
278 int ret; 456 int ret;
279 457
280 c.identity.opcode = nvme_nvm_admin_identity; 458 c.identity.opcode = nvme_nvm_admin_identity;
281 c.identity.nsid = cpu_to_le32(ns->head->ns_id); 459 c.identity.nsid = cpu_to_le32(ns->head->ns_id);
282 c.identity.chnl_off = 0;
283 460
284 nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL); 461 id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL);
285 if (!nvme_nvm_id) 462 if (!id)
286 return -ENOMEM; 463 return -ENOMEM;
287 464
288 ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, 465 ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
289 nvme_nvm_id, sizeof(struct nvme_nvm_id)); 466 id, sizeof(struct nvme_nvm_id12));
290 if (ret) { 467 if (ret) {
291 ret = -EIO; 468 ret = -EIO;
292 goto out; 469 goto out;
293 } 470 }
294 471
295 nvm_id->ver_id = nvme_nvm_id->ver_id; 472 /*
296 nvm_id->vmnt = nvme_nvm_id->vmnt; 473 * The 1.2 and 2.0 specifications share the first byte in their geometry
297 nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); 474 * command to make it possible to know what version a device implements.
298 nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); 475 */
299 memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, 476 switch (id->ver_id) {
300 sizeof(struct nvm_addr_format)); 477 case 1:
478 ret = nvme_nvm_setup_12(id, &nvmdev->geo);
479 break;
480 case 2:
481 ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id,
482 &nvmdev->geo);
483 break;
484 default:
485 dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n",
486 id->ver_id);
487 ret = -EINVAL;
488 }
301 489
302 ret = init_grps(nvm_id, nvme_nvm_id);
303out: 490out:
304 kfree(nvme_nvm_id); 491 kfree(id);
305 return ret; 492 return ret;
306} 493}
307 494
@@ -314,7 +501,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
314 struct nvme_ctrl *ctrl = ns->ctrl; 501 struct nvme_ctrl *ctrl = ns->ctrl;
315 struct nvme_nvm_command c = {}; 502 struct nvme_nvm_command c = {};
316 struct nvme_nvm_bb_tbl *bb_tbl; 503 struct nvme_nvm_bb_tbl *bb_tbl;
317 int nr_blks = geo->nr_chks * geo->plane_mode; 504 int nr_blks = geo->num_chk * geo->num_pln;
318 int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; 505 int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
319 int ret = 0; 506 int ret = 0;
320 507
@@ -355,7 +542,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
355 goto out; 542 goto out;
356 } 543 }
357 544
358 memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode); 545 memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln);
359out: 546out:
360 kfree(bb_tbl); 547 kfree(bb_tbl);
361 return ret; 548 return ret;
@@ -382,6 +569,61 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
382 return ret; 569 return ret;
383} 570}
384 571
572/*
573 * Expect the lba in device format
574 */
575static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
576 struct nvm_chk_meta *meta,
577 sector_t slba, int nchks)
578{
579 struct nvm_geo *geo = &ndev->geo;
580 struct nvme_ns *ns = ndev->q->queuedata;
581 struct nvme_ctrl *ctrl = ns->ctrl;
582 struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta;
583 struct ppa_addr ppa;
584 size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
585 size_t log_pos, offset, len;
586 int ret, i;
587
588 /* Normalize lba address space to obtain log offset */
589 ppa.ppa = slba;
590 ppa = dev_to_generic_addr(ndev, ppa);
591
592 log_pos = ppa.m.chk;
593 log_pos += ppa.m.pu * geo->num_chk;
594 log_pos += ppa.m.grp * geo->num_lun * geo->num_chk;
595
596 offset = log_pos * sizeof(struct nvme_nvm_chk_meta);
597
598 while (left) {
599 len = min_t(unsigned int, left, ctrl->max_hw_sectors << 9);
600
601 ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK,
602 dev_meta, len, offset);
603 if (ret) {
604 dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
605 break;
606 }
607
608 for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) {
609 meta->state = dev_meta->state;
610 meta->type = dev_meta->type;
611 meta->wi = dev_meta->wi;
612 meta->slba = le64_to_cpu(dev_meta->slba);
613 meta->cnlb = le64_to_cpu(dev_meta->cnlb);
614 meta->wp = le64_to_cpu(dev_meta->wp);
615
616 meta++;
617 dev_meta++;
618 }
619
620 offset += len;
621 left -= len;
622 }
623
624 return ret;
625}
626
385static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, 627static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
386 struct nvme_nvm_command *c) 628 struct nvme_nvm_command *c)
387{ 629{
@@ -513,6 +755,8 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
513 .get_bb_tbl = nvme_nvm_get_bb_tbl, 755 .get_bb_tbl = nvme_nvm_get_bb_tbl,
514 .set_bb_tbl = nvme_nvm_set_bb_tbl, 756 .set_bb_tbl = nvme_nvm_set_bb_tbl,
515 757
758 .get_chk_meta = nvme_nvm_get_chk_meta,
759
516 .submit_io = nvme_nvm_submit_io, 760 .submit_io = nvme_nvm_submit_io,
517 .submit_io_sync = nvme_nvm_submit_io_sync, 761 .submit_io_sync = nvme_nvm_submit_io_sync,
518 762
@@ -520,8 +764,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
520 .destroy_dma_pool = nvme_nvm_destroy_dma_pool, 764 .destroy_dma_pool = nvme_nvm_destroy_dma_pool,
521 .dev_dma_alloc = nvme_nvm_dev_dma_alloc, 765 .dev_dma_alloc = nvme_nvm_dev_dma_alloc,
522 .dev_dma_free = nvme_nvm_dev_dma_free, 766 .dev_dma_free = nvme_nvm_dev_dma_free,
523
524 .max_phys_sect = 64,
525}; 767};
526 768
527static int nvme_nvm_submit_user_cmd(struct request_queue *q, 769static int nvme_nvm_submit_user_cmd(struct request_queue *q,
@@ -722,6 +964,15 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
722 } 964 }
723} 965}
724 966
967void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
968{
969 struct nvm_dev *ndev = ns->ndev;
970 struct nvm_geo *geo = &ndev->geo;
971
972 geo->csecs = 1 << ns->lba_shift;
973 geo->sos = ns->ms;
974}
975
725int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) 976int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
726{ 977{
727 struct request_queue *q = ns->queue; 978 struct request_queue *q = ns->queue;
@@ -748,125 +999,205 @@ void nvme_nvm_unregister(struct nvme_ns *ns)
748} 999}
749 1000
750static ssize_t nvm_dev_attr_show(struct device *dev, 1001static ssize_t nvm_dev_attr_show(struct device *dev,
751 struct device_attribute *dattr, char *page) 1002 struct device_attribute *dattr, char *page)
752{ 1003{
753 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); 1004 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
754 struct nvm_dev *ndev = ns->ndev; 1005 struct nvm_dev *ndev = ns->ndev;
755 struct nvm_id *id; 1006 struct nvm_geo *geo = &ndev->geo;
756 struct nvm_id_group *grp;
757 struct attribute *attr; 1007 struct attribute *attr;
758 1008
759 if (!ndev) 1009 if (!ndev)
760 return 0; 1010 return 0;
761 1011
762 id = &ndev->identity;
763 grp = &id->grp;
764 attr = &dattr->attr; 1012 attr = &dattr->attr;
765 1013
766 if (strcmp(attr->name, "version") == 0) { 1014 if (strcmp(attr->name, "version") == 0) {
767 return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id); 1015 if (geo->major_ver_id == 1)
768 } else if (strcmp(attr->name, "vendor_opcode") == 0) { 1016 return scnprintf(page, PAGE_SIZE, "%u\n",
769 return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt); 1017 geo->major_ver_id);
1018 else
1019 return scnprintf(page, PAGE_SIZE, "%u.%u\n",
1020 geo->major_ver_id,
1021 geo->minor_ver_id);
770 } else if (strcmp(attr->name, "capabilities") == 0) { 1022 } else if (strcmp(attr->name, "capabilities") == 0) {
771 return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); 1023 return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap);
1024 } else if (strcmp(attr->name, "read_typ") == 0) {
1025 return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt);
1026 } else if (strcmp(attr->name, "read_max") == 0) {
1027 return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm);
1028 } else {
1029 return scnprintf(page,
1030 PAGE_SIZE,
1031 "Unhandled attr(%s) in `%s`\n",
1032 attr->name, __func__);
1033 }
1034}
1035
1036static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page)
1037{
1038 return scnprintf(page, PAGE_SIZE,
1039 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
1040 ppaf->ch_offset, ppaf->ch_len,
1041 ppaf->lun_offset, ppaf->lun_len,
1042 ppaf->pln_offset, ppaf->pln_len,
1043 ppaf->blk_offset, ppaf->blk_len,
1044 ppaf->pg_offset, ppaf->pg_len,
1045 ppaf->sec_offset, ppaf->sec_len);
1046}
1047
1048static ssize_t nvm_dev_attr_show_12(struct device *dev,
1049 struct device_attribute *dattr, char *page)
1050{
1051 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1052 struct nvm_dev *ndev = ns->ndev;
1053 struct nvm_geo *geo = &ndev->geo;
1054 struct attribute *attr;
1055
1056 if (!ndev)
1057 return 0;
1058
1059 attr = &dattr->attr;
1060
1061 if (strcmp(attr->name, "vendor_opcode") == 0) {
1062 return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt);
772 } else if (strcmp(attr->name, "device_mode") == 0) { 1063 } else if (strcmp(attr->name, "device_mode") == 0) {
773 return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); 1064 return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom);
774 /* kept for compatibility */ 1065 /* kept for compatibility */
775 } else if (strcmp(attr->name, "media_manager") == 0) { 1066 } else if (strcmp(attr->name, "media_manager") == 0) {
776 return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); 1067 return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
777 } else if (strcmp(attr->name, "ppa_format") == 0) { 1068 } else if (strcmp(attr->name, "ppa_format") == 0) {
778 return scnprintf(page, PAGE_SIZE, 1069 return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page);
779 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
780 id->ppaf.ch_offset, id->ppaf.ch_len,
781 id->ppaf.lun_offset, id->ppaf.lun_len,
782 id->ppaf.pln_offset, id->ppaf.pln_len,
783 id->ppaf.blk_offset, id->ppaf.blk_len,
784 id->ppaf.pg_offset, id->ppaf.pg_len,
785 id->ppaf.sect_offset, id->ppaf.sect_len);
786 } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ 1070 } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */
787 return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype); 1071 return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype);
788 } else if (strcmp(attr->name, "flash_media_type") == 0) { 1072 } else if (strcmp(attr->name, "flash_media_type") == 0) {
789 return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype); 1073 return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype);
790 } else if (strcmp(attr->name, "num_channels") == 0) { 1074 } else if (strcmp(attr->name, "num_channels") == 0) {
791 return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch); 1075 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
792 } else if (strcmp(attr->name, "num_luns") == 0) { 1076 } else if (strcmp(attr->name, "num_luns") == 0) {
793 return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun); 1077 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
794 } else if (strcmp(attr->name, "num_planes") == 0) { 1078 } else if (strcmp(attr->name, "num_planes") == 0) {
795 return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); 1079 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln);
796 } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ 1080 } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
797 return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk); 1081 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
798 } else if (strcmp(attr->name, "num_pages") == 0) { 1082 } else if (strcmp(attr->name, "num_pages") == 0) {
799 return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); 1083 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg);
800 } else if (strcmp(attr->name, "page_size") == 0) { 1084 } else if (strcmp(attr->name, "page_size") == 0) {
801 return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz); 1085 return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz);
802 } else if (strcmp(attr->name, "hw_sector_size") == 0) { 1086 } else if (strcmp(attr->name, "hw_sector_size") == 0) {
803 return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs); 1087 return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs);
804 } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ 1088 } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
805 return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos); 1089 return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos);
806 } else if (strcmp(attr->name, "read_typ") == 0) {
807 return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt);
808 } else if (strcmp(attr->name, "read_max") == 0) {
809 return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm);
810 } else if (strcmp(attr->name, "prog_typ") == 0) { 1090 } else if (strcmp(attr->name, "prog_typ") == 0) {
811 return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt); 1091 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
812 } else if (strcmp(attr->name, "prog_max") == 0) { 1092 } else if (strcmp(attr->name, "prog_max") == 0) {
813 return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm); 1093 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
814 } else if (strcmp(attr->name, "erase_typ") == 0) { 1094 } else if (strcmp(attr->name, "erase_typ") == 0) {
815 return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet); 1095 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
816 } else if (strcmp(attr->name, "erase_max") == 0) { 1096 } else if (strcmp(attr->name, "erase_max") == 0) {
817 return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem); 1097 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
818 } else if (strcmp(attr->name, "multiplane_modes") == 0) { 1098 } else if (strcmp(attr->name, "multiplane_modes") == 0) {
819 return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos); 1099 return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos);
820 } else if (strcmp(attr->name, "media_capabilities") == 0) { 1100 } else if (strcmp(attr->name, "media_capabilities") == 0) {
821 return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap); 1101 return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap);
822 } else if (strcmp(attr->name, "max_phys_secs") == 0) { 1102 } else if (strcmp(attr->name, "max_phys_secs") == 0) {
823 return scnprintf(page, PAGE_SIZE, "%u\n", 1103 return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA);
824 ndev->ops->max_phys_sect);
825 } else { 1104 } else {
826 return scnprintf(page, 1105 return scnprintf(page, PAGE_SIZE,
827 PAGE_SIZE, 1106 "Unhandled attr(%s) in `%s`\n",
828 "Unhandled attr(%s) in `nvm_dev_attr_show`\n", 1107 attr->name, __func__);
829 attr->name); 1108 }
1109}
1110
1111static ssize_t nvm_dev_attr_show_20(struct device *dev,
1112 struct device_attribute *dattr, char *page)
1113{
1114 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1115 struct nvm_dev *ndev = ns->ndev;
1116 struct nvm_geo *geo = &ndev->geo;
1117 struct attribute *attr;
1118
1119 if (!ndev)
1120 return 0;
1121
1122 attr = &dattr->attr;
1123
1124 if (strcmp(attr->name, "groups") == 0) {
1125 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
1126 } else if (strcmp(attr->name, "punits") == 0) {
1127 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
1128 } else if (strcmp(attr->name, "chunks") == 0) {
1129 return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
1130 } else if (strcmp(attr->name, "clba") == 0) {
1131 return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba);
1132 } else if (strcmp(attr->name, "ws_min") == 0) {
1133 return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min);
1134 } else if (strcmp(attr->name, "ws_opt") == 0) {
1135 return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt);
1136 } else if (strcmp(attr->name, "maxoc") == 0) {
1137 return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc);
1138 } else if (strcmp(attr->name, "maxocpu") == 0) {
1139 return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu);
1140 } else if (strcmp(attr->name, "mw_cunits") == 0) {
1141 return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits);
1142 } else if (strcmp(attr->name, "write_typ") == 0) {
1143 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
1144 } else if (strcmp(attr->name, "write_max") == 0) {
1145 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
1146 } else if (strcmp(attr->name, "reset_typ") == 0) {
1147 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
1148 } else if (strcmp(attr->name, "reset_max") == 0) {
1149 return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
1150 } else {
1151 return scnprintf(page, PAGE_SIZE,
1152 "Unhandled attr(%s) in `%s`\n",
1153 attr->name, __func__);
830 } 1154 }
831} 1155}
832 1156
833#define NVM_DEV_ATTR_RO(_name) \ 1157#define NVM_DEV_ATTR_RO(_name) \
834 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) 1158 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
1159#define NVM_DEV_ATTR_12_RO(_name) \
1160 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
1161#define NVM_DEV_ATTR_20_RO(_name) \
1162 DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
835 1163
1164/* general attributes */
836static NVM_DEV_ATTR_RO(version); 1165static NVM_DEV_ATTR_RO(version);
837static NVM_DEV_ATTR_RO(vendor_opcode);
838static NVM_DEV_ATTR_RO(capabilities); 1166static NVM_DEV_ATTR_RO(capabilities);
839static NVM_DEV_ATTR_RO(device_mode); 1167
840static NVM_DEV_ATTR_RO(ppa_format);
841static NVM_DEV_ATTR_RO(media_manager);
842
843static NVM_DEV_ATTR_RO(media_type);
844static NVM_DEV_ATTR_RO(flash_media_type);
845static NVM_DEV_ATTR_RO(num_channels);
846static NVM_DEV_ATTR_RO(num_luns);
847static NVM_DEV_ATTR_RO(num_planes);
848static NVM_DEV_ATTR_RO(num_blocks);
849static NVM_DEV_ATTR_RO(num_pages);
850static NVM_DEV_ATTR_RO(page_size);
851static NVM_DEV_ATTR_RO(hw_sector_size);
852static NVM_DEV_ATTR_RO(oob_sector_size);
853static NVM_DEV_ATTR_RO(read_typ); 1168static NVM_DEV_ATTR_RO(read_typ);
854static NVM_DEV_ATTR_RO(read_max); 1169static NVM_DEV_ATTR_RO(read_max);
855static NVM_DEV_ATTR_RO(prog_typ); 1170
856static NVM_DEV_ATTR_RO(prog_max); 1171/* 1.2 values */
857static NVM_DEV_ATTR_RO(erase_typ); 1172static NVM_DEV_ATTR_12_RO(vendor_opcode);
858static NVM_DEV_ATTR_RO(erase_max); 1173static NVM_DEV_ATTR_12_RO(device_mode);
859static NVM_DEV_ATTR_RO(multiplane_modes); 1174static NVM_DEV_ATTR_12_RO(ppa_format);
860static NVM_DEV_ATTR_RO(media_capabilities); 1175static NVM_DEV_ATTR_12_RO(media_manager);
861static NVM_DEV_ATTR_RO(max_phys_secs); 1176static NVM_DEV_ATTR_12_RO(media_type);
862 1177static NVM_DEV_ATTR_12_RO(flash_media_type);
863static struct attribute *nvm_dev_attrs[] = { 1178static NVM_DEV_ATTR_12_RO(num_channels);
1179static NVM_DEV_ATTR_12_RO(num_luns);
1180static NVM_DEV_ATTR_12_RO(num_planes);
1181static NVM_DEV_ATTR_12_RO(num_blocks);
1182static NVM_DEV_ATTR_12_RO(num_pages);
1183static NVM_DEV_ATTR_12_RO(page_size);
1184static NVM_DEV_ATTR_12_RO(hw_sector_size);
1185static NVM_DEV_ATTR_12_RO(oob_sector_size);
1186static NVM_DEV_ATTR_12_RO(prog_typ);
1187static NVM_DEV_ATTR_12_RO(prog_max);
1188static NVM_DEV_ATTR_12_RO(erase_typ);
1189static NVM_DEV_ATTR_12_RO(erase_max);
1190static NVM_DEV_ATTR_12_RO(multiplane_modes);
1191static NVM_DEV_ATTR_12_RO(media_capabilities);
1192static NVM_DEV_ATTR_12_RO(max_phys_secs);
1193
1194static struct attribute *nvm_dev_attrs_12[] = {
864 &dev_attr_version.attr, 1195 &dev_attr_version.attr,
865 &dev_attr_vendor_opcode.attr,
866 &dev_attr_capabilities.attr, 1196 &dev_attr_capabilities.attr,
1197
1198 &dev_attr_vendor_opcode.attr,
867 &dev_attr_device_mode.attr, 1199 &dev_attr_device_mode.attr,
868 &dev_attr_media_manager.attr, 1200 &dev_attr_media_manager.attr,
869
870 &dev_attr_ppa_format.attr, 1201 &dev_attr_ppa_format.attr,
871 &dev_attr_media_type.attr, 1202 &dev_attr_media_type.attr,
872 &dev_attr_flash_media_type.attr, 1203 &dev_attr_flash_media_type.attr,
@@ -887,22 +1218,92 @@ static struct attribute *nvm_dev_attrs[] = {
887 &dev_attr_multiplane_modes.attr, 1218 &dev_attr_multiplane_modes.attr,
888 &dev_attr_media_capabilities.attr, 1219 &dev_attr_media_capabilities.attr,
889 &dev_attr_max_phys_secs.attr, 1220 &dev_attr_max_phys_secs.attr,
1221
890 NULL, 1222 NULL,
891}; 1223};
892 1224
893static const struct attribute_group nvm_dev_attr_group = { 1225static const struct attribute_group nvm_dev_attr_group_12 = {
894 .name = "lightnvm", 1226 .name = "lightnvm",
895 .attrs = nvm_dev_attrs, 1227 .attrs = nvm_dev_attrs_12,
1228};
1229
1230/* 2.0 values */
1231static NVM_DEV_ATTR_20_RO(groups);
1232static NVM_DEV_ATTR_20_RO(punits);
1233static NVM_DEV_ATTR_20_RO(chunks);
1234static NVM_DEV_ATTR_20_RO(clba);
1235static NVM_DEV_ATTR_20_RO(ws_min);
1236static NVM_DEV_ATTR_20_RO(ws_opt);
1237static NVM_DEV_ATTR_20_RO(maxoc);
1238static NVM_DEV_ATTR_20_RO(maxocpu);
1239static NVM_DEV_ATTR_20_RO(mw_cunits);
1240static NVM_DEV_ATTR_20_RO(write_typ);
1241static NVM_DEV_ATTR_20_RO(write_max);
1242static NVM_DEV_ATTR_20_RO(reset_typ);
1243static NVM_DEV_ATTR_20_RO(reset_max);
1244
1245static struct attribute *nvm_dev_attrs_20[] = {
1246 &dev_attr_version.attr,
1247 &dev_attr_capabilities.attr,
1248
1249 &dev_attr_groups.attr,
1250 &dev_attr_punits.attr,
1251 &dev_attr_chunks.attr,
1252 &dev_attr_clba.attr,
1253 &dev_attr_ws_min.attr,
1254 &dev_attr_ws_opt.attr,
1255 &dev_attr_maxoc.attr,
1256 &dev_attr_maxocpu.attr,
1257 &dev_attr_mw_cunits.attr,
1258
1259 &dev_attr_read_typ.attr,
1260 &dev_attr_read_max.attr,
1261 &dev_attr_write_typ.attr,
1262 &dev_attr_write_max.attr,
1263 &dev_attr_reset_typ.attr,
1264 &dev_attr_reset_max.attr,
1265
1266 NULL,
1267};
1268
1269static const struct attribute_group nvm_dev_attr_group_20 = {
1270 .name = "lightnvm",
1271 .attrs = nvm_dev_attrs_20,
896}; 1272};
897 1273
898int nvme_nvm_register_sysfs(struct nvme_ns *ns) 1274int nvme_nvm_register_sysfs(struct nvme_ns *ns)
899{ 1275{
900 return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, 1276 struct nvm_dev *ndev = ns->ndev;
901 &nvm_dev_attr_group); 1277 struct nvm_geo *geo = &ndev->geo;
1278
1279 if (!ndev)
1280 return -EINVAL;
1281
1282 switch (geo->major_ver_id) {
1283 case 1:
1284 return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1285 &nvm_dev_attr_group_12);
1286 case 2:
1287 return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1288 &nvm_dev_attr_group_20);
1289 }
1290
1291 return -EINVAL;
902} 1292}
903 1293
904void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) 1294void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
905{ 1295{
906 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 1296 struct nvm_dev *ndev = ns->ndev;
907 &nvm_dev_attr_group); 1297 struct nvm_geo *geo = &ndev->geo;
1298
1299 switch (geo->major_ver_id) {
1300 case 1:
1301 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1302 &nvm_dev_attr_group_12);
1303 break;
1304 case 2:
1305 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1306 &nvm_dev_attr_group_20);
1307 break;
1308 }
908} 1309}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 060f69e03427..956e0b8e9c4d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -44,12 +44,12 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
44{ 44{
45 struct nvme_ns *ns; 45 struct nvme_ns *ns;
46 46
47 mutex_lock(&ctrl->namespaces_mutex); 47 down_read(&ctrl->namespaces_rwsem);
48 list_for_each_entry(ns, &ctrl->namespaces, list) { 48 list_for_each_entry(ns, &ctrl->namespaces, list) {
49 if (ns->head->disk) 49 if (ns->head->disk)
50 kblockd_schedule_work(&ns->head->requeue_work); 50 kblockd_schedule_work(&ns->head->requeue_work);
51 } 51 }
52 mutex_unlock(&ctrl->namespaces_mutex); 52 up_read(&ctrl->namespaces_rwsem);
53} 53}
54 54
55static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) 55static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
@@ -162,13 +162,13 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
162 if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) 162 if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
163 return 0; 163 return 0;
164 164
165 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); 165 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
166 if (!q) 166 if (!q)
167 goto out; 167 goto out;
168 q->queuedata = head; 168 q->queuedata = head;
169 blk_queue_make_request(q, nvme_ns_head_make_request); 169 blk_queue_make_request(q, nvme_ns_head_make_request);
170 q->poll_fn = nvme_ns_head_poll; 170 q->poll_fn = nvme_ns_head_poll;
171 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 171 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
172 /* set to a default value for 512 until disk is validated */ 172 /* set to a default value for 512 until disk is validated */
173 blk_queue_logical_block_size(q, 512); 173 blk_queue_logical_block_size(q, 512);
174 174
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d733b14ede9d..cf93690b3ffc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -21,6 +21,7 @@
21#include <linux/blk-mq.h> 21#include <linux/blk-mq.h>
22#include <linux/lightnvm.h> 22#include <linux/lightnvm.h>
23#include <linux/sed-opal.h> 23#include <linux/sed-opal.h>
24#include <linux/fault-inject.h>
24 25
25extern unsigned int nvme_io_timeout; 26extern unsigned int nvme_io_timeout;
26#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) 27#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
@@ -140,7 +141,7 @@ struct nvme_ctrl {
140 struct blk_mq_tag_set *tagset; 141 struct blk_mq_tag_set *tagset;
141 struct blk_mq_tag_set *admin_tagset; 142 struct blk_mq_tag_set *admin_tagset;
142 struct list_head namespaces; 143 struct list_head namespaces;
143 struct mutex namespaces_mutex; 144 struct rw_semaphore namespaces_rwsem;
144 struct device ctrl_device; 145 struct device ctrl_device;
145 struct device *device; /* char device */ 146 struct device *device; /* char device */
146 struct cdev cdev; 147 struct cdev cdev;
@@ -261,6 +262,15 @@ struct nvme_ns_head {
261 int instance; 262 int instance;
262}; 263};
263 264
265#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
266struct nvme_fault_inject {
267 struct fault_attr attr;
268 struct dentry *parent;
269 bool dont_retry; /* DNR, do not retry */
270 u16 status; /* status code */
271};
272#endif
273
264struct nvme_ns { 274struct nvme_ns {
265 struct list_head list; 275 struct list_head list;
266 276
@@ -282,6 +292,11 @@ struct nvme_ns {
282#define NVME_NS_REMOVING 0 292#define NVME_NS_REMOVING 0
283#define NVME_NS_DEAD 1 293#define NVME_NS_DEAD 1
284 u16 noiob; 294 u16 noiob;
295
296#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
297 struct nvme_fault_inject fault_inject;
298#endif
299
285}; 300};
286 301
287struct nvme_ctrl_ops { 302struct nvme_ctrl_ops {
@@ -298,8 +313,19 @@ struct nvme_ctrl_ops {
298 void (*delete_ctrl)(struct nvme_ctrl *ctrl); 313 void (*delete_ctrl)(struct nvme_ctrl *ctrl);
299 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 314 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
300 int (*reinit_request)(void *data, struct request *rq); 315 int (*reinit_request)(void *data, struct request *rq);
316 void (*stop_ctrl)(struct nvme_ctrl *ctrl);
301}; 317};
302 318
319#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
320void nvme_fault_inject_init(struct nvme_ns *ns);
321void nvme_fault_inject_fini(struct nvme_ns *ns);
322void nvme_should_fail(struct request *req);
323#else
324static inline void nvme_fault_inject_init(struct nvme_ns *ns) {}
325static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {}
326static inline void nvme_should_fail(struct request *req) {}
327#endif
328
303static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) 329static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
304{ 330{
305 u32 val = 0; 331 u32 val = 0;
@@ -336,6 +362,8 @@ static inline void nvme_end_request(struct request *req, __le16 status,
336 362
337 rq->status = le16_to_cpu(status) >> 1; 363 rq->status = le16_to_cpu(status) >> 1;
338 rq->result = result; 364 rq->result = result;
365 /* inject error when permitted by fault injection framework */
366 nvme_should_fail(req);
339 blk_mq_complete_request(req); 367 blk_mq_complete_request(req);
340} 368}
341 369
@@ -401,6 +429,9 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
401int nvme_delete_ctrl(struct nvme_ctrl *ctrl); 429int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
402int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); 430int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
403 431
432int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
433 u8 log_page, void *log, size_t size, size_t offset);
434
404extern const struct attribute_group nvme_ns_id_attr_group; 435extern const struct attribute_group nvme_ns_id_attr_group;
405extern const struct block_device_operations nvme_ns_head_ops; 436extern const struct block_device_operations nvme_ns_head_ops;
406 437
@@ -461,12 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
461#endif /* CONFIG_NVME_MULTIPATH */ 492#endif /* CONFIG_NVME_MULTIPATH */
462 493
463#ifdef CONFIG_NVM 494#ifdef CONFIG_NVM
495void nvme_nvm_update_nvm_info(struct nvme_ns *ns);
464int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); 496int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
465void nvme_nvm_unregister(struct nvme_ns *ns); 497void nvme_nvm_unregister(struct nvme_ns *ns);
466int nvme_nvm_register_sysfs(struct nvme_ns *ns); 498int nvme_nvm_register_sysfs(struct nvme_ns *ns);
467void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); 499void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
468int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); 500int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
469#else 501#else
502static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {};
470static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, 503static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
471 int node) 504 int node)
472{ 505{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b6f43b738f03..295fbec1e5f2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -414,7 +414,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
414{ 414{
415 struct nvme_dev *dev = set->driver_data; 415 struct nvme_dev *dev = set->driver_data;
416 416
417 return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); 417 return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0);
418} 418}
419 419
420/** 420/**
@@ -2197,7 +2197,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
2197 if (!dead) { 2197 if (!dead) {
2198 if (shutdown) 2198 if (shutdown)
2199 nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); 2199 nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
2200 }
2201
2202 nvme_stop_queues(&dev->ctrl);
2200 2203
2204 if (!dead) {
2201 /* 2205 /*
2202 * If the controller is still alive tell it to stop using the 2206 * If the controller is still alive tell it to stop using the
2203 * host memory buffer. In theory the shutdown / reset should 2207 * host memory buffer. In theory the shutdown / reset should
@@ -2206,11 +2210,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
2206 */ 2210 */
2207 if (dev->host_mem_descs) 2211 if (dev->host_mem_descs)
2208 nvme_set_host_mem(dev, 0); 2212 nvme_set_host_mem(dev, 0);
2209
2210 }
2211 nvme_stop_queues(&dev->ctrl);
2212
2213 if (!dead) {
2214 nvme_disable_io_queues(dev); 2213 nvme_disable_io_queues(dev);
2215 nvme_disable_admin_queue(dev, shutdown); 2214 nvme_disable_admin_queue(dev, shutdown);
2216 } 2215 }
@@ -2416,6 +2415,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
2416 return 0; 2415 return 0;
2417} 2416}
2418 2417
2418static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
2419{
2420 struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
2421
2422 return snprintf(buf, size, "%s", dev_name(&pdev->dev));
2423}
2424
2419static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { 2425static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
2420 .name = "pcie", 2426 .name = "pcie",
2421 .module = THIS_MODULE, 2427 .module = THIS_MODULE,
@@ -2425,6 +2431,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
2425 .reg_read64 = nvme_pci_reg_read64, 2431 .reg_read64 = nvme_pci_reg_read64,
2426 .free_ctrl = nvme_pci_free_ctrl, 2432 .free_ctrl = nvme_pci_free_ctrl,
2427 .submit_async_event = nvme_pci_submit_async_event, 2433 .submit_async_event = nvme_pci_submit_async_event,
2434 .get_address = nvme_pci_get_address,
2428}; 2435};
2429 2436
2430static int nvme_dev_map(struct nvme_dev *dev) 2437static int nvme_dev_map(struct nvme_dev *dev)
@@ -2461,10 +2468,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
2461 } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { 2468 } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) {
2462 /* 2469 /*
2463 * Samsung SSD 960 EVO drops off the PCIe bus after system 2470 * Samsung SSD 960 EVO drops off the PCIe bus after system
2464 * suspend on a Ryzen board, ASUS PRIME B350M-A. 2471 * suspend on a Ryzen board, ASUS PRIME B350M-A, as well as
2472 * within few minutes after bootup on a Coffee Lake board -
2473 * ASUS PRIME Z370-A
2465 */ 2474 */
2466 if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && 2475 if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") &&
2467 dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) 2476 (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") ||
2477 dmi_match(DMI_BOARD_NAME, "PRIME Z370-A")))
2468 return NVME_QUIRK_NO_APST; 2478 return NVME_QUIRK_NO_APST;
2469 } 2479 }
2470 2480
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4d84a73ee12d..758537e9ba07 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -867,6 +867,14 @@ out_free_io_queues:
867 return ret; 867 return ret;
868} 868}
869 869
870static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
871{
872 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
873
874 cancel_work_sync(&ctrl->err_work);
875 cancel_delayed_work_sync(&ctrl->reconnect_work);
876}
877
870static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) 878static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
871{ 879{
872 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); 880 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -899,7 +907,6 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
899 queue_delayed_work(nvme_wq, &ctrl->reconnect_work, 907 queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
900 ctrl->ctrl.opts->reconnect_delay * HZ); 908 ctrl->ctrl.opts->reconnect_delay * HZ);
901 } else { 909 } else {
902 dev_info(ctrl->ctrl.device, "Removing controller...\n");
903 nvme_delete_ctrl(&ctrl->ctrl); 910 nvme_delete_ctrl(&ctrl->ctrl);
904 } 911 }
905} 912}
@@ -974,8 +981,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
974 nvme_start_queues(&ctrl->ctrl); 981 nvme_start_queues(&ctrl->ctrl);
975 982
976 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 983 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
977 /* state change failure should never happen */ 984 /* state change failure is ok if we're in DELETING state */
978 WARN_ON_ONCE(1); 985 WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
979 return; 986 return;
980 } 987 }
981 988
@@ -1719,9 +1726,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
1719 1726
1720static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) 1727static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
1721{ 1728{
1722 cancel_work_sync(&ctrl->err_work);
1723 cancel_delayed_work_sync(&ctrl->reconnect_work);
1724
1725 if (ctrl->ctrl.queue_count > 1) { 1729 if (ctrl->ctrl.queue_count > 1) {
1726 nvme_stop_queues(&ctrl->ctrl); 1730 nvme_stop_queues(&ctrl->ctrl);
1727 blk_mq_tagset_busy_iter(&ctrl->tag_set, 1731 blk_mq_tagset_busy_iter(&ctrl->tag_set,
@@ -1799,6 +1803,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
1799 .submit_async_event = nvme_rdma_submit_async_event, 1803 .submit_async_event = nvme_rdma_submit_async_event,
1800 .delete_ctrl = nvme_rdma_delete_ctrl, 1804 .delete_ctrl = nvme_rdma_delete_ctrl,
1801 .get_address = nvmf_get_address, 1805 .get_address = nvmf_get_address,
1806 .stop_ctrl = nvme_rdma_stop_ctrl,
1802}; 1807};
1803 1808
1804static inline bool 1809static inline bool
@@ -2025,15 +2030,26 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
2025static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) 2030static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
2026{ 2031{
2027 struct nvme_rdma_ctrl *ctrl; 2032 struct nvme_rdma_ctrl *ctrl;
2033 struct nvme_rdma_device *ndev;
2034 bool found = false;
2035
2036 mutex_lock(&device_list_mutex);
2037 list_for_each_entry(ndev, &device_list, entry) {
2038 if (ndev->dev == ib_device) {
2039 found = true;
2040 break;
2041 }
2042 }
2043 mutex_unlock(&device_list_mutex);
2044
2045 if (!found)
2046 return;
2028 2047
2029 /* Delete all controllers using this device */ 2048 /* Delete all controllers using this device */
2030 mutex_lock(&nvme_rdma_ctrl_mutex); 2049 mutex_lock(&nvme_rdma_ctrl_mutex);
2031 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { 2050 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
2032 if (ctrl->device->dev != ib_device) 2051 if (ctrl->device->dev != ib_device)
2033 continue; 2052 continue;
2034 dev_info(ctrl->ctrl.device,
2035 "Removing ctrl: NQN \"%s\", addr %pISp\n",
2036 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
2037 nvme_delete_ctrl(&ctrl->ctrl); 2053 nvme_delete_ctrl(&ctrl->ctrl);
2038 } 2054 }
2039 mutex_unlock(&nvme_rdma_ctrl_mutex); 2055 mutex_unlock(&nvme_rdma_ctrl_mutex);
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e6b2d2af81b6..ad9ff27234b5 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -23,6 +23,15 @@
23static const struct config_item_type nvmet_host_type; 23static const struct config_item_type nvmet_host_type;
24static const struct config_item_type nvmet_subsys_type; 24static const struct config_item_type nvmet_subsys_type;
25 25
26static const struct nvmet_transport_name {
27 u8 type;
28 const char *name;
29} nvmet_transport_names[] = {
30 { NVMF_TRTYPE_RDMA, "rdma" },
31 { NVMF_TRTYPE_FC, "fc" },
32 { NVMF_TRTYPE_LOOP, "loop" },
33};
34
26/* 35/*
27 * nvmet_port Generic ConfigFS definitions. 36 * nvmet_port Generic ConfigFS definitions.
28 * Used in any place in the ConfigFS tree that refers to an address. 37 * Used in any place in the ConfigFS tree that refers to an address.
@@ -208,43 +217,30 @@ CONFIGFS_ATTR(nvmet_, addr_trsvcid);
208static ssize_t nvmet_addr_trtype_show(struct config_item *item, 217static ssize_t nvmet_addr_trtype_show(struct config_item *item,
209 char *page) 218 char *page)
210{ 219{
211 switch (to_nvmet_port(item)->disc_addr.trtype) { 220 struct nvmet_port *port = to_nvmet_port(item);
212 case NVMF_TRTYPE_RDMA: 221 int i;
213 return sprintf(page, "rdma\n"); 222
214 case NVMF_TRTYPE_LOOP: 223 for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
215 return sprintf(page, "loop\n"); 224 if (port->disc_addr.trtype != nvmet_transport_names[i].type)
216 case NVMF_TRTYPE_FC: 225 continue;
217 return sprintf(page, "fc\n"); 226 return sprintf(page, "%s\n", nvmet_transport_names[i].name);
218 default:
219 return sprintf(page, "\n");
220 } 227 }
228
229 return sprintf(page, "\n");
221} 230}
222 231
223static void nvmet_port_init_tsas_rdma(struct nvmet_port *port) 232static void nvmet_port_init_tsas_rdma(struct nvmet_port *port)
224{ 233{
225 port->disc_addr.trtype = NVMF_TRTYPE_RDMA;
226 memset(&port->disc_addr.tsas.rdma, 0, NVMF_TSAS_SIZE);
227 port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED; 234 port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED;
228 port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED; 235 port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED;
229 port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM; 236 port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM;
230} 237}
231 238
232static void nvmet_port_init_tsas_loop(struct nvmet_port *port)
233{
234 port->disc_addr.trtype = NVMF_TRTYPE_LOOP;
235 memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
236}
237
238static void nvmet_port_init_tsas_fc(struct nvmet_port *port)
239{
240 port->disc_addr.trtype = NVMF_TRTYPE_FC;
241 memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
242}
243
244static ssize_t nvmet_addr_trtype_store(struct config_item *item, 239static ssize_t nvmet_addr_trtype_store(struct config_item *item,
245 const char *page, size_t count) 240 const char *page, size_t count)
246{ 241{
247 struct nvmet_port *port = to_nvmet_port(item); 242 struct nvmet_port *port = to_nvmet_port(item);
243 int i;
248 244
249 if (port->enabled) { 245 if (port->enabled) {
250 pr_err("Cannot modify address while enabled\n"); 246 pr_err("Cannot modify address while enabled\n");
@@ -252,17 +248,18 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
252 return -EACCES; 248 return -EACCES;
253 } 249 }
254 250
255 if (sysfs_streq(page, "rdma")) { 251 for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
256 nvmet_port_init_tsas_rdma(port); 252 if (sysfs_streq(page, nvmet_transport_names[i].name))
257 } else if (sysfs_streq(page, "loop")) { 253 goto found;
258 nvmet_port_init_tsas_loop(port);
259 } else if (sysfs_streq(page, "fc")) {
260 nvmet_port_init_tsas_fc(port);
261 } else {
262 pr_err("Invalid value '%s' for trtype\n", page);
263 return -EINVAL;
264 } 254 }
265 255
256 pr_err("Invalid value '%s' for trtype\n", page);
257 return -EINVAL;
258found:
259 memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
260 port->disc_addr.trtype = nvmet_transport_names[i].type;
261 if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
262 nvmet_port_init_tsas_rdma(port);
266 return count; 263 return count;
267} 264}
268 265
@@ -333,13 +330,13 @@ out_unlock:
333 return ret ? ret : count; 330 return ret ? ret : count;
334} 331}
335 332
333CONFIGFS_ATTR(nvmet_ns_, device_uuid);
334
336static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page) 335static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
337{ 336{
338 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid); 337 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
339} 338}
340 339
341CONFIGFS_ATTR(nvmet_ns_, device_uuid);
342
343static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, 340static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
344 const char *page, size_t count) 341 const char *page, size_t count)
345{ 342{
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index a78029e4e5f4..e95424f172fd 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -18,7 +18,7 @@
18 18
19#include "nvmet.h" 19#include "nvmet.h"
20 20
21static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 21static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
22static DEFINE_IDA(cntlid_ida); 22static DEFINE_IDA(cntlid_ida);
23 23
24/* 24/*
@@ -137,7 +137,7 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
137 schedule_work(&ctrl->async_event_work); 137 schedule_work(&ctrl->async_event_work);
138} 138}
139 139
140int nvmet_register_transport(struct nvmet_fabrics_ops *ops) 140int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
141{ 141{
142 int ret = 0; 142 int ret = 0;
143 143
@@ -152,7 +152,7 @@ int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
152} 152}
153EXPORT_SYMBOL_GPL(nvmet_register_transport); 153EXPORT_SYMBOL_GPL(nvmet_register_transport);
154 154
155void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops) 155void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
156{ 156{
157 down_write(&nvmet_config_sem); 157 down_write(&nvmet_config_sem);
158 nvmet_transports[ops->type] = NULL; 158 nvmet_transports[ops->type] = NULL;
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
162 162
163int nvmet_enable_port(struct nvmet_port *port) 163int nvmet_enable_port(struct nvmet_port *port)
164{ 164{
165 struct nvmet_fabrics_ops *ops; 165 const struct nvmet_fabrics_ops *ops;
166 int ret; 166 int ret;
167 167
168 lockdep_assert_held(&nvmet_config_sem); 168 lockdep_assert_held(&nvmet_config_sem);
@@ -195,7 +195,7 @@ int nvmet_enable_port(struct nvmet_port *port)
195 195
196void nvmet_disable_port(struct nvmet_port *port) 196void nvmet_disable_port(struct nvmet_port *port)
197{ 197{
198 struct nvmet_fabrics_ops *ops; 198 const struct nvmet_fabrics_ops *ops;
199 199
200 lockdep_assert_held(&nvmet_config_sem); 200 lockdep_assert_held(&nvmet_config_sem);
201 201
@@ -500,7 +500,7 @@ int nvmet_sq_init(struct nvmet_sq *sq)
500EXPORT_SYMBOL_GPL(nvmet_sq_init); 500EXPORT_SYMBOL_GPL(nvmet_sq_init);
501 501
502bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 502bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
503 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops) 503 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
504{ 504{
505 u8 flags = req->cmd->common.flags; 505 u8 flags = req->cmd->common.flags;
506 u16 status; 506 u16 status;
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 8f3b57b4c97b..a72425d8bce0 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -43,7 +43,8 @@ void nvmet_referral_disable(struct nvmet_port *port)
43} 43}
44 44
45static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, 45static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
46 struct nvmet_port *port, char *subsys_nqn, u8 type, u32 numrec) 46 struct nvmet_port *port, char *subsys_nqn, char *traddr,
47 u8 type, u32 numrec)
47{ 48{
48 struct nvmf_disc_rsp_page_entry *e = &hdr->entries[numrec]; 49 struct nvmf_disc_rsp_page_entry *e = &hdr->entries[numrec];
49 50
@@ -56,11 +57,30 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
56 e->asqsz = cpu_to_le16(NVME_AQ_DEPTH); 57 e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
57 e->subtype = type; 58 e->subtype = type;
58 memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); 59 memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
59 memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); 60 memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE);
60 memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); 61 memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE);
61 memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); 62 memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
62} 63}
63 64
65/*
66 * nvmet_set_disc_traddr - set a correct discovery log entry traddr
67 *
68 * IP based transports (e.g RDMA) can listen on "any" ipv4/ipv6 addresses
69 * (INADDR_ANY or IN6ADDR_ANY_INIT). The discovery log page traddr reply
70 * must not contain that "any" IP address. If the transport implements
71 * .disc_traddr, use it. this callback will set the discovery traddr
72 * from the req->port address in case the port in question listens
73 * "any" IP address.
74 */
75static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port,
76 char *traddr)
77{
78 if (req->ops->disc_traddr)
79 req->ops->disc_traddr(req, port, traddr);
80 else
81 memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
82}
83
64static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) 84static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
65{ 85{
66 const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); 86 const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
@@ -90,8 +110,11 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
90 if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn)) 110 if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn))
91 continue; 111 continue;
92 if (residual_len >= entry_size) { 112 if (residual_len >= entry_size) {
113 char traddr[NVMF_TRADDR_SIZE];
114
115 nvmet_set_disc_traddr(req, req->port, traddr);
93 nvmet_format_discovery_entry(hdr, req->port, 116 nvmet_format_discovery_entry(hdr, req->port,
94 p->subsys->subsysnqn, 117 p->subsys->subsysnqn, traddr,
95 NVME_NQN_NVME, numrec); 118 NVME_NQN_NVME, numrec);
96 residual_len -= entry_size; 119 residual_len -= entry_size;
97 } 120 }
@@ -102,6 +125,7 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
102 if (residual_len >= entry_size) { 125 if (residual_len >= entry_size) {
103 nvmet_format_discovery_entry(hdr, r, 126 nvmet_format_discovery_entry(hdr, r,
104 NVME_DISC_SUBSYS_NAME, 127 NVME_DISC_SUBSYS_NAME,
128 r->disc_addr.traddr,
105 NVME_NQN_DISC, numrec); 129 NVME_NQN_DISC, numrec);
106 residual_len -= entry_size; 130 residual_len -= entry_size;
107 } 131 }
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 9b39a6cb1935..33ee8d3145f8 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -87,6 +87,7 @@ struct nvmet_fc_fcp_iod {
87 struct nvmet_req req; 87 struct nvmet_req req;
88 struct work_struct work; 88 struct work_struct work;
89 struct work_struct done_work; 89 struct work_struct done_work;
90 struct work_struct defer_work;
90 91
91 struct nvmet_fc_tgtport *tgtport; 92 struct nvmet_fc_tgtport *tgtport;
92 struct nvmet_fc_tgt_queue *queue; 93 struct nvmet_fc_tgt_queue *queue;
@@ -224,6 +225,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt);
224static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); 225static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
225static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); 226static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work);
226static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); 227static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work);
228static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
227static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); 229static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
228static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); 230static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
229static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); 231static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
@@ -429,6 +431,7 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport,
429 for (i = 0; i < queue->sqsize; fod++, i++) { 431 for (i = 0; i < queue->sqsize; fod++, i++) {
430 INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); 432 INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work);
431 INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); 433 INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work);
434 INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work);
432 fod->tgtport = tgtport; 435 fod->tgtport = tgtport;
433 fod->queue = queue; 436 fod->queue = queue;
434 fod->active = false; 437 fod->active = false;
@@ -512,6 +515,17 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
512} 515}
513 516
514static void 517static void
518nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work)
519{
520 struct nvmet_fc_fcp_iod *fod =
521 container_of(work, struct nvmet_fc_fcp_iod, defer_work);
522
523 /* Submit deferred IO for processing */
524 nvmet_fc_queue_fcp_req(fod->tgtport, fod->queue, fod->fcpreq);
525
526}
527
528static void
515nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, 529nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
516 struct nvmet_fc_fcp_iod *fod) 530 struct nvmet_fc_fcp_iod *fod)
517{ 531{
@@ -568,13 +582,12 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
568 /* inform LLDD IO is now being processed */ 582 /* inform LLDD IO is now being processed */
569 tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq); 583 tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq);
570 584
571 /* Submit deferred IO for processing */
572 nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
573
574 /* 585 /*
575 * Leave the queue lookup get reference taken when 586 * Leave the queue lookup get reference taken when
576 * fod was originally allocated. 587 * fod was originally allocated.
577 */ 588 */
589
590 queue_work(queue->work_q, &fod->defer_work);
578} 591}
579 592
580static int 593static int
@@ -1550,7 +1563,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
1550 1563
1551static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req); 1564static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req);
1552 1565
1553static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; 1566static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops;
1554 1567
1555static void 1568static void
1556nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) 1569nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq)
@@ -2505,7 +2518,7 @@ nvmet_fc_remove_port(struct nvmet_port *port)
2505 /* nothing to do */ 2518 /* nothing to do */
2506} 2519}
2507 2520
2508static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { 2521static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
2509 .owner = THIS_MODULE, 2522 .owner = THIS_MODULE,
2510 .type = NVMF_TRTYPE_FC, 2523 .type = NVMF_TRTYPE_FC,
2511 .msdbd = 1, 2524 .msdbd = 1,
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 861d1509b22b..a350765d2d5c 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -71,7 +71,7 @@ static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
71static void nvme_loop_queue_response(struct nvmet_req *nvme_req); 71static void nvme_loop_queue_response(struct nvmet_req *nvme_req);
72static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl); 72static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl);
73 73
74static struct nvmet_fabrics_ops nvme_loop_ops; 74static const struct nvmet_fabrics_ops nvme_loop_ops;
75 75
76static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue) 76static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
77{ 77{
@@ -675,7 +675,7 @@ static void nvme_loop_remove_port(struct nvmet_port *port)
675 nvmet_loop_port = NULL; 675 nvmet_loop_port = NULL;
676} 676}
677 677
678static struct nvmet_fabrics_ops nvme_loop_ops = { 678static const struct nvmet_fabrics_ops nvme_loop_ops = {
679 .owner = THIS_MODULE, 679 .owner = THIS_MODULE,
680 .type = NVMF_TRTYPE_LOOP, 680 .type = NVMF_TRTYPE_LOOP,
681 .add_port = nvme_loop_add_port, 681 .add_port = nvme_loop_add_port,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 417f6c0331cc..15fd84ab21f8 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -130,7 +130,7 @@ struct nvmet_ctrl {
130 struct delayed_work ka_work; 130 struct delayed_work ka_work;
131 struct work_struct fatal_err_work; 131 struct work_struct fatal_err_work;
132 132
133 struct nvmet_fabrics_ops *ops; 133 const struct nvmet_fabrics_ops *ops;
134 134
135 char subsysnqn[NVMF_NQN_FIELD_LEN]; 135 char subsysnqn[NVMF_NQN_FIELD_LEN];
136 char hostnqn[NVMF_NQN_FIELD_LEN]; 136 char hostnqn[NVMF_NQN_FIELD_LEN];
@@ -209,6 +209,8 @@ struct nvmet_fabrics_ops {
209 int (*add_port)(struct nvmet_port *port); 209 int (*add_port)(struct nvmet_port *port);
210 void (*remove_port)(struct nvmet_port *port); 210 void (*remove_port)(struct nvmet_port *port);
211 void (*delete_ctrl)(struct nvmet_ctrl *ctrl); 211 void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
212 void (*disc_traddr)(struct nvmet_req *req,
213 struct nvmet_port *port, char *traddr);
212}; 214};
213 215
214#define NVMET_MAX_INLINE_BIOVEC 8 216#define NVMET_MAX_INLINE_BIOVEC 8
@@ -231,7 +233,7 @@ struct nvmet_req {
231 struct nvmet_port *port; 233 struct nvmet_port *port;
232 234
233 void (*execute)(struct nvmet_req *req); 235 void (*execute)(struct nvmet_req *req);
234 struct nvmet_fabrics_ops *ops; 236 const struct nvmet_fabrics_ops *ops;
235}; 237};
236 238
237static inline void nvmet_set_status(struct nvmet_req *req, u16 status) 239static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
@@ -267,7 +269,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
267u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); 269u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
268 270
269bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 271bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
270 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); 272 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
271void nvmet_req_uninit(struct nvmet_req *req); 273void nvmet_req_uninit(struct nvmet_req *req);
272void nvmet_req_execute(struct nvmet_req *req); 274void nvmet_req_execute(struct nvmet_req *req);
273void nvmet_req_complete(struct nvmet_req *req, u16 status); 275void nvmet_req_complete(struct nvmet_req *req, u16 status);
@@ -301,8 +303,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns);
301struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid); 303struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
302void nvmet_ns_free(struct nvmet_ns *ns); 304void nvmet_ns_free(struct nvmet_ns *ns);
303 305
304int nvmet_register_transport(struct nvmet_fabrics_ops *ops); 306int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
305void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops); 307void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
306 308
307int nvmet_enable_port(struct nvmet_port *port); 309int nvmet_enable_port(struct nvmet_port *port);
308void nvmet_disable_port(struct nvmet_port *port); 310void nvmet_disable_port(struct nvmet_port *port);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 978e169c11bf..52e0c5d579a7 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,7 +77,6 @@ enum nvmet_rdma_queue_state {
77 NVMET_RDMA_Q_CONNECTING, 77 NVMET_RDMA_Q_CONNECTING,
78 NVMET_RDMA_Q_LIVE, 78 NVMET_RDMA_Q_LIVE,
79 NVMET_RDMA_Q_DISCONNECTING, 79 NVMET_RDMA_Q_DISCONNECTING,
80 NVMET_RDMA_IN_DEVICE_REMOVAL,
81}; 80};
82 81
83struct nvmet_rdma_queue { 82struct nvmet_rdma_queue {
@@ -137,7 +136,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
137static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); 136static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
138static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); 137static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
139 138
140static struct nvmet_fabrics_ops nvmet_rdma_ops; 139static const struct nvmet_fabrics_ops nvmet_rdma_ops;
141 140
142/* XXX: really should move to a generic header sooner or later.. */ 141/* XXX: really should move to a generic header sooner or later.. */
143static inline u32 get_unaligned_le24(const u8 *p) 142static inline u32 get_unaligned_le24(const u8 *p)
@@ -914,8 +913,11 @@ err_destroy_cq:
914 913
915static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) 914static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
916{ 915{
917 ib_drain_qp(queue->cm_id->qp); 916 struct ib_qp *qp = queue->cm_id->qp;
918 rdma_destroy_qp(queue->cm_id); 917
918 ib_drain_qp(qp);
919 rdma_destroy_id(queue->cm_id);
920 ib_destroy_qp(qp);
919 ib_free_cq(queue->cq); 921 ib_free_cq(queue->cq);
920} 922}
921 923
@@ -940,15 +942,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
940{ 942{
941 struct nvmet_rdma_queue *queue = 943 struct nvmet_rdma_queue *queue =
942 container_of(w, struct nvmet_rdma_queue, release_work); 944 container_of(w, struct nvmet_rdma_queue, release_work);
943 struct rdma_cm_id *cm_id = queue->cm_id;
944 struct nvmet_rdma_device *dev = queue->dev; 945 struct nvmet_rdma_device *dev = queue->dev;
945 enum nvmet_rdma_queue_state state = queue->state;
946 946
947 nvmet_rdma_free_queue(queue); 947 nvmet_rdma_free_queue(queue);
948 948
949 if (state != NVMET_RDMA_IN_DEVICE_REMOVAL)
950 rdma_destroy_id(cm_id);
951
952 kref_put(&dev->ref, nvmet_rdma_free_dev); 949 kref_put(&dev->ref, nvmet_rdma_free_dev);
953} 950}
954 951
@@ -1153,8 +1150,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
1153 } 1150 }
1154 1151
1155 ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); 1152 ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
1156 if (ret) 1153 if (ret) {
1157 goto release_queue; 1154 schedule_work(&queue->release_work);
1155 /* Destroying rdma_cm id is not needed here */
1156 return 0;
1157 }
1158 1158
1159 mutex_lock(&nvmet_rdma_queue_mutex); 1159 mutex_lock(&nvmet_rdma_queue_mutex);
1160 list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list); 1160 list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list);
@@ -1162,8 +1162,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
1162 1162
1163 return 0; 1163 return 0;
1164 1164
1165release_queue:
1166 nvmet_rdma_free_queue(queue);
1167put_device: 1165put_device:
1168 kref_put(&ndev->ref, nvmet_rdma_free_dev); 1166 kref_put(&ndev->ref, nvmet_rdma_free_dev);
1169 1167
@@ -1209,7 +1207,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
1209 case NVMET_RDMA_Q_CONNECTING: 1207 case NVMET_RDMA_Q_CONNECTING:
1210 case NVMET_RDMA_Q_LIVE: 1208 case NVMET_RDMA_Q_LIVE:
1211 queue->state = NVMET_RDMA_Q_DISCONNECTING; 1209 queue->state = NVMET_RDMA_Q_DISCONNECTING;
1212 case NVMET_RDMA_IN_DEVICE_REMOVAL:
1213 disconnect = true; 1210 disconnect = true;
1214 break; 1211 break;
1215 case NVMET_RDMA_Q_DISCONNECTING: 1212 case NVMET_RDMA_Q_DISCONNECTING:
@@ -1322,13 +1319,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
1322 case RDMA_CM_EVENT_ADDR_CHANGE: 1319 case RDMA_CM_EVENT_ADDR_CHANGE:
1323 case RDMA_CM_EVENT_DISCONNECTED: 1320 case RDMA_CM_EVENT_DISCONNECTED:
1324 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 1321 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1325 /* 1322 nvmet_rdma_queue_disconnect(queue);
1326 * We might end up here when we already freed the qp
1327 * which means queue release sequence is in progress,
1328 * so don't get in the way...
1329 */
1330 if (queue)
1331 nvmet_rdma_queue_disconnect(queue);
1332 break; 1323 break;
1333 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1324 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1334 ret = nvmet_rdma_device_removal(cm_id, queue); 1325 ret = nvmet_rdma_device_removal(cm_id, queue);
@@ -1445,7 +1436,24 @@ static void nvmet_rdma_remove_port(struct nvmet_port *port)
1445 rdma_destroy_id(cm_id); 1436 rdma_destroy_id(cm_id);
1446} 1437}
1447 1438
1448static struct nvmet_fabrics_ops nvmet_rdma_ops = { 1439static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
1440 struct nvmet_port *port, char *traddr)
1441{
1442 struct rdma_cm_id *cm_id = port->priv;
1443
1444 if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) {
1445 struct nvmet_rdma_rsp *rsp =
1446 container_of(req, struct nvmet_rdma_rsp, req);
1447 struct rdma_cm_id *req_cm_id = rsp->queue->cm_id;
1448 struct sockaddr *addr = (void *)&req_cm_id->route.addr.src_addr;
1449
1450 sprintf(traddr, "%pISc", addr);
1451 } else {
1452 memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
1453 }
1454}
1455
1456static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
1449 .owner = THIS_MODULE, 1457 .owner = THIS_MODULE,
1450 .type = NVMF_TRTYPE_RDMA, 1458 .type = NVMF_TRTYPE_RDMA,
1451 .sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE, 1459 .sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE,
@@ -1455,13 +1463,31 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
1455 .remove_port = nvmet_rdma_remove_port, 1463 .remove_port = nvmet_rdma_remove_port,
1456 .queue_response = nvmet_rdma_queue_response, 1464 .queue_response = nvmet_rdma_queue_response,
1457 .delete_ctrl = nvmet_rdma_delete_ctrl, 1465 .delete_ctrl = nvmet_rdma_delete_ctrl,
1466 .disc_traddr = nvmet_rdma_disc_port_addr,
1458}; 1467};
1459 1468
1460static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) 1469static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
1461{ 1470{
1462 struct nvmet_rdma_queue *queue, *tmp; 1471 struct nvmet_rdma_queue *queue, *tmp;
1472 struct nvmet_rdma_device *ndev;
1473 bool found = false;
1463 1474
1464 /* Device is being removed, delete all queues using this device */ 1475 mutex_lock(&device_list_mutex);
1476 list_for_each_entry(ndev, &device_list, entry) {
1477 if (ndev->device == ib_device) {
1478 found = true;
1479 break;
1480 }
1481 }
1482 mutex_unlock(&device_list_mutex);
1483
1484 if (!found)
1485 return;
1486
1487 /*
1488 * IB Device that is used by nvmet controllers is being removed,
1489 * delete all queues using this device.
1490 */
1465 mutex_lock(&nvmet_rdma_queue_mutex); 1491 mutex_lock(&nvmet_rdma_queue_mutex);
1466 list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, 1492 list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list,
1467 queue_list) { 1493 queue_list) {
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index ecef8e73d40b..b5692a284bd8 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3208,7 +3208,7 @@ static void dasd_setup_queue(struct dasd_block *block)
3208 } else { 3208 } else {
3209 max = block->base->discipline->max_blocks << block->s2b_shift; 3209 max = block->base->discipline->max_blocks << block->s2b_shift;
3210 } 3210 }
3211 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 3211 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
3212 q->limits.max_dev_sectors = max; 3212 q->limits.max_dev_sectors = max;
3213 blk_queue_logical_block_size(q, logical_block_size); 3213 blk_queue_logical_block_size(q, logical_block_size);
3214 blk_queue_max_hw_sectors(q, max); 3214 blk_queue_max_hw_sectors(q, max);
@@ -3231,7 +3231,7 @@ static void dasd_setup_queue(struct dasd_block *block)
3231 3231
3232 blk_queue_max_discard_sectors(q, max_discard_sectors); 3232 blk_queue_max_discard_sectors(q, max_discard_sectors);
3233 blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); 3233 blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
3234 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 3234 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
3235 } 3235 }
3236} 3236}
3237 3237
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 9cae08b36b80..0a312e450207 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -633,7 +633,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
633 dev_info->gd->private_data = dev_info; 633 dev_info->gd->private_data = dev_info;
634 blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request); 634 blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
635 blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096); 635 blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
636 queue_flag_set_unlocked(QUEUE_FLAG_DAX, dev_info->dcssblk_queue); 636 blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
637 637
638 seg_byte_size = (dev_info->end - dev_info->start + 1); 638 seg_byte_size = (dev_info->end - dev_info->start + 1);
639 set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors 639 set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index b4130c7880d8..b1fcb76dd272 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -472,8 +472,8 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
472 blk_queue_logical_block_size(rq, 1 << 12); 472 blk_queue_logical_block_size(rq, 1 << 12);
473 blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */ 473 blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */
474 blk_queue_max_segments(rq, nr_max_blk); 474 blk_queue_max_segments(rq, nr_max_blk);
475 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rq); 475 blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
476 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, rq); 476 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
477 477
478 bdev->gendisk = alloc_disk(SCM_NR_PARTS); 478 bdev->gendisk = alloc_disk(SCM_NR_PARTS);
479 if (!bdev->gendisk) { 479 if (!bdev->gendisk) {
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 2a6334ca750e..3df5d68d09f0 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -348,8 +348,8 @@ static int __init xpram_setup_blkdev(void)
348 put_disk(xpram_disks[i]); 348 put_disk(xpram_disks[i]);
349 goto out; 349 goto out;
350 } 350 }
351 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xpram_queues[i]); 351 blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]);
352 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]); 352 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
353 blk_queue_make_request(xpram_queues[i], xpram_make_request); 353 blk_queue_make_request(xpram_queues[i], xpram_make_request);
354 blk_queue_logical_block_size(xpram_queues[i], 4096); 354 blk_queue_logical_block_size(xpram_queues[i], 4096);
355 } 355 }
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index ca218c82321f..6162cf57a20a 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -961,7 +961,7 @@ static int zfcp_fc_exec_els_job(struct bsg_job *job,
961 d_id = ntoh24(bsg_request->rqst_data.h_els.port_id); 961 d_id = ntoh24(bsg_request->rqst_data.h_els.port_id);
962 962
963 els->handler = zfcp_fc_ct_els_job_handler; 963 els->handler = zfcp_fc_ct_els_job_handler;
964 return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ); 964 return zfcp_fsf_send_els(adapter, d_id, els, job->timeout / HZ);
965} 965}
966 966
967static int zfcp_fc_exec_ct_job(struct bsg_job *job, 967static int zfcp_fc_exec_ct_job(struct bsg_job *job,
@@ -980,7 +980,7 @@ static int zfcp_fc_exec_ct_job(struct bsg_job *job,
980 return ret; 980 return ret;
981 981
982 ct->handler = zfcp_fc_ct_job_handler; 982 ct->handler = zfcp_fc_ct_job_handler;
983 ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->req->timeout / HZ); 983 ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->timeout / HZ);
984 if (ret) 984 if (ret)
985 zfcp_fc_wka_port_put(wka_port); 985 zfcp_fc_wka_port_put(wka_port);
986 986
diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h
index 95fc720c1b30..e6e5ccb1e0f3 100644
--- a/drivers/scsi/gdth.h
+++ b/drivers/scsi/gdth.h
@@ -178,9 +178,6 @@
178#define MSG_SIZE 34 /* size of message structure */ 178#define MSG_SIZE 34 /* size of message structure */
179#define MSG_REQUEST 0 /* async. event: message */ 179#define MSG_REQUEST 0 /* async. event: message */
180 180
181/* cacheservice defines */
182#define SECTOR_SIZE 0x200 /* always 512 bytes per sec. */
183
184/* DPMEM constants */ 181/* DPMEM constants */
185#define DPMEM_MAGIC 0xC0FFEE11 182#define DPMEM_MAGIC 0xC0FFEE11
186#define IC_HEADER_BYTES 48 183#define IC_HEADER_BYTES 48
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 6d886b13dbe9..2ba4b68fdb73 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -949,7 +949,7 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
949 949
950static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev) 950static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev)
951{ 951{
952 set_bit(QUEUE_FLAG_BIDI, &sdev->request_queue->queue_flags); 952 blk_queue_flag_set(QUEUE_FLAG_BIDI, sdev->request_queue);
953 return 0; 953 return 0;
954} 954}
955 955
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index dde0798b8a91..7a37ace4239b 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1864,7 +1864,7 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev, u32 max_io_size)
1864 1864
1865 blk_queue_max_hw_sectors(sdev->request_queue, (max_io_size / 512)); 1865 blk_queue_max_hw_sectors(sdev->request_queue, (max_io_size / 512));
1866 1866
1867 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue); 1867 blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue);
1868 blk_queue_virt_boundary(sdev->request_queue, mr_nvme_pg_size - 1); 1868 blk_queue_virt_boundary(sdev->request_queue, mr_nvme_pg_size - 1);
1869} 1869}
1870 1870
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 5ec3b74e8aed..ce97cde3b41c 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -1894,7 +1894,7 @@ megasas_is_prp_possible(struct megasas_instance *instance,
1894 * then sending IOs with holes. 1894 * then sending IOs with holes.
1895 * 1895 *
1896 * Though driver can request block layer to disable IO merging by calling- 1896 * Though driver can request block layer to disable IO merging by calling-
1897 * queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue) but 1897 * blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue) but
1898 * user may tune sysfs parameter- nomerges again to 0 or 1. 1898 * user may tune sysfs parameter- nomerges again to 0 or 1.
1899 * 1899 *
1900 * If in future IO scheduling is enabled with SCSI BLK MQ, 1900 * If in future IO scheduling is enabled with SCSI BLK MQ,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index a1cb0236c550..aee1a0e1c600 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2352,7 +2352,7 @@ scsih_slave_configure(struct scsi_device *sdev)
2352 ** merged and can eliminate holes created during merging 2352 ** merged and can eliminate holes created during merging
2353 ** operation. 2353 ** operation.
2354 **/ 2354 **/
2355 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, 2355 blk_queue_flag_set(QUEUE_FLAG_NOMERGES,
2356 sdev->request_queue); 2356 sdev->request_queue);
2357 blk_queue_virt_boundary(sdev->request_queue, 2357 blk_queue_virt_boundary(sdev->request_queue,
2358 ioc->page_size - 1); 2358 ioc->page_size - 1);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 5c5dcca4d1da..822d22336e15 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -6830,7 +6830,7 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost)
6830 if (USER_CTRL_IRQ(vha->hw)) 6830 if (USER_CTRL_IRQ(vha->hw))
6831 rc = blk_mq_map_queues(&shost->tag_set); 6831 rc = blk_mq_map_queues(&shost->tag_set);
6832 else 6832 else
6833 rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev); 6833 rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0);
6834 return rc; 6834 return rc;
6835} 6835}
6836 6836
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a5986dae9020..1cb353f18d08 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -3897,7 +3897,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp)
3897 if (sdebug_verbose) 3897 if (sdebug_verbose)
3898 pr_info("slave_alloc <%u %u %u %llu>\n", 3898 pr_info("slave_alloc <%u %u %u %llu>\n",
3899 sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); 3899 sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
3900 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue); 3900 blk_queue_flag_set(QUEUE_FLAG_BIDI, sdp->request_queue);
3901 return 0; 3901 return 0;
3902} 3902}
3903 3903
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c84f931388f2..ed79d3925860 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2144,8 +2144,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
2144{ 2144{
2145 struct device *dev = shost->dma_dev; 2145 struct device *dev = shost->dma_dev;
2146 2146
2147 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
2148
2149 /* 2147 /*
2150 * this limit is imposed by hardware restrictions 2148 * this limit is imposed by hardware restrictions
2151 */ 2149 */
@@ -2227,7 +2225,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
2227 struct Scsi_Host *shost = sdev->host; 2225 struct Scsi_Host *shost = sdev->host;
2228 struct request_queue *q; 2226 struct request_queue *q;
2229 2227
2230 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); 2228 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
2231 if (!q) 2229 if (!q)
2232 return NULL; 2230 return NULL;
2233 q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; 2231 q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
@@ -2243,6 +2241,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
2243 } 2241 }
2244 2242
2245 __scsi_init_queue(shost, q); 2243 __scsi_init_queue(shost, q);
2244 blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
2246 blk_queue_prep_rq(q, scsi_prep_fn); 2245 blk_queue_prep_rq(q, scsi_prep_fn);
2247 blk_queue_unprep_rq(q, scsi_unprep_fn); 2246 blk_queue_unprep_rq(q, scsi_unprep_fn);
2248 blk_queue_softirq_done(q, scsi_softirq_done); 2247 blk_queue_softirq_done(q, scsi_softirq_done);
@@ -2274,6 +2273,7 @@ struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
2274 2273
2275 sdev->request_queue->queuedata = sdev; 2274 sdev->request_queue->queuedata = sdev;
2276 __scsi_init_queue(sdev->host, sdev->request_queue); 2275 __scsi_init_queue(sdev->host, sdev->request_queue);
2276 blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue);
2277 return sdev->request_queue; 2277 return sdev->request_queue;
2278} 2278}
2279 2279
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 91b90f672d23..7142c8be1099 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1292,8 +1292,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
1292 transport_add_device(&sdev->sdev_gendev); 1292 transport_add_device(&sdev->sdev_gendev);
1293 sdev->is_visible = 1; 1293 sdev->is_visible = 1;
1294 1294
1295 error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL, NULL); 1295 error = bsg_scsi_register_queue(rq, &sdev->sdev_gendev);
1296
1297 if (error) 1296 if (error)
1298 /* we're treating error on bsg register as non-fatal, 1297 /* we're treating error on bsg register as non-fatal,
1299 * so pretend nothing went wrong */ 1298 * so pretend nothing went wrong */
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 736a1f4f9676..08acbabfae07 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -227,8 +227,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
227 * by default assume old behaviour and bounce for any highmem page 227 * by default assume old behaviour and bounce for any highmem page
228 */ 228 */
229 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 229 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
230 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); 230 blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
231 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
232 return 0; 231 return 0;
233} 232}
234 233
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1fa84d6a0f8b..a6201e696ab9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -714,7 +714,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
714 case SD_LBP_FULL: 714 case SD_LBP_FULL:
715 case SD_LBP_DISABLE: 715 case SD_LBP_DISABLE:
716 blk_queue_max_discard_sectors(q, 0); 716 blk_queue_max_discard_sectors(q, 0);
717 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 717 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
718 return; 718 return;
719 719
720 case SD_LBP_UNMAP: 720 case SD_LBP_UNMAP:
@@ -747,7 +747,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
747 } 747 }
748 748
749 blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9)); 749 blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
750 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 750 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
751} 751}
752 752
753static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) 753static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
@@ -2955,8 +2955,8 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
2955 rot = get_unaligned_be16(&buffer[4]); 2955 rot = get_unaligned_be16(&buffer[4]);
2956 2956
2957 if (rot == 1) { 2957 if (rot == 1) {
2958 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 2958 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
2959 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); 2959 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
2960 } 2960 }
2961 2961
2962 if (sdkp->device->type == TYPE_ZBC) { 2962 if (sdkp->device->type == TYPE_ZBC) {
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index b2880c7709e6..10c94011c8a8 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -5348,7 +5348,7 @@ static int pqi_map_queues(struct Scsi_Host *shost)
5348{ 5348{
5349 struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); 5349 struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
5350 5350
5351 return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev); 5351 return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0);
5352} 5352}
5353 5353
5354static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info, 5354static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 9be34d37c356..0cf25d789d05 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -525,6 +525,8 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
525 struct scsi_cd *cd; 525 struct scsi_cd *cd;
526 int ret = -ENXIO; 526 int ret = -ENXIO;
527 527
528 check_disk_change(bdev);
529
528 mutex_lock(&sr_mutex); 530 mutex_lock(&sr_mutex);
529 cd = scsi_cd_get(bdev->bd_disk); 531 cd = scsi_cd_get(bdev->bd_disk);
530 if (cd) { 532 if (cd) {
diff --git a/drivers/staging/rts5208/rtsx_chip.h b/drivers/staging/rts5208/rtsx_chip.h
index 4f6e3c1c4621..8a8cd5d3cf7e 100644
--- a/drivers/staging/rts5208/rtsx_chip.h
+++ b/drivers/staging/rts5208/rtsx_chip.h
@@ -339,13 +339,13 @@ struct sense_data_t {
339#define CHK_BIT(data, idx) ((data) & (1 << (idx))) 339#define CHK_BIT(data, idx) ((data) & (1 << (idx)))
340 340
341/* SG descriptor */ 341/* SG descriptor */
342#define SG_INT 0x04 342#define RTSX_SG_INT 0x04
343#define SG_END 0x02 343#define RTSX_SG_END 0x02
344#define SG_VALID 0x01 344#define RTSX_SG_VALID 0x01
345 345
346#define SG_NO_OP 0x00 346#define RTSX_SG_NO_OP 0x00
347#define SG_TRANS_DATA (0x02 << 4) 347#define RTSX_SG_TRANS_DATA (0x02 << 4)
348#define SG_LINK_DESC (0x03 << 4) 348#define RTSX_SG_LINK_DESC (0x03 << 4)
349 349
350struct rtsx_chip; 350struct rtsx_chip;
351 351
diff --git a/drivers/staging/rts5208/rtsx_transport.c b/drivers/staging/rts5208/rtsx_transport.c
index 8b57e17ee6d3..716cce2bd7f0 100644
--- a/drivers/staging/rts5208/rtsx_transport.c
+++ b/drivers/staging/rts5208/rtsx_transport.c
@@ -308,7 +308,7 @@ static inline void rtsx_add_sg_tbl(
308 do { 308 do {
309 if (len > 0x80000) { 309 if (len > 0x80000) {
310 temp_len = 0x80000; 310 temp_len = 0x80000;
311 temp_opt = option & (~SG_END); 311 temp_opt = option & (~RTSX_SG_END);
312 } else { 312 } else {
313 temp_len = len; 313 temp_len = len;
314 temp_opt = option; 314 temp_opt = option;
@@ -407,9 +407,9 @@ static int rtsx_transfer_sglist_adma_partial(struct rtsx_chip *chip, u8 card,
407 *index = *index + 1; 407 *index = *index + 1;
408 } 408 }
409 if ((i == (sg_cnt - 1)) || !resid) 409 if ((i == (sg_cnt - 1)) || !resid)
410 option = SG_VALID | SG_END | SG_TRANS_DATA; 410 option = RTSX_SG_VALID | RTSX_SG_END | RTSX_SG_TRANS_DATA;
411 else 411 else
412 option = SG_VALID | SG_TRANS_DATA; 412 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
413 413
414 rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option); 414 rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option);
415 415
@@ -555,9 +555,9 @@ static int rtsx_transfer_sglist_adma(struct rtsx_chip *chip, u8 card,
555 (unsigned int)addr, len); 555 (unsigned int)addr, len);
556 556
557 if (j == (sg_cnt - 1)) 557 if (j == (sg_cnt - 1))
558 option = SG_VALID | SG_END | SG_TRANS_DATA; 558 option = RTSX_SG_VALID | RTSX_SG_END | RTSX_SG_TRANS_DATA;
559 else 559 else
560 option = SG_VALID | SG_TRANS_DATA; 560 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
561 561
562 rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option); 562 rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option);
563 563
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 9eb10d34682c..8e223799347a 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -26,6 +26,7 @@
26#include <linux/delay.h> 26#include <linux/delay.h>
27#include <linux/sched/signal.h> 27#include <linux/sched/signal.h>
28#include <asm/unaligned.h> 28#include <asm/unaligned.h>
29#include <linux/inet.h>
29#include <net/ipv6.h> 30#include <net/ipv6.h>
30#include <scsi/scsi_proto.h> 31#include <scsi/scsi_proto.h>
31#include <scsi/iscsi_proto.h> 32#include <scsi/iscsi_proto.h>
@@ -3291,30 +3292,6 @@ iscsit_send_task_mgt_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
3291 return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0); 3292 return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0);
3292} 3293}
3293 3294
3294static bool iscsit_check_inaddr_any(struct iscsi_np *np)
3295{
3296 bool ret = false;
3297
3298 if (np->np_sockaddr.ss_family == AF_INET6) {
3299 const struct sockaddr_in6 sin6 = {
3300 .sin6_addr = IN6ADDR_ANY_INIT };
3301 struct sockaddr_in6 *sock_in6 =
3302 (struct sockaddr_in6 *)&np->np_sockaddr;
3303
3304 if (!memcmp(sock_in6->sin6_addr.s6_addr,
3305 sin6.sin6_addr.s6_addr, 16))
3306 ret = true;
3307 } else {
3308 struct sockaddr_in * sock_in =
3309 (struct sockaddr_in *)&np->np_sockaddr;
3310
3311 if (sock_in->sin_addr.s_addr == htonl(INADDR_ANY))
3312 ret = true;
3313 }
3314
3315 return ret;
3316}
3317
3318#define SENDTARGETS_BUF_LIMIT 32768U 3295#define SENDTARGETS_BUF_LIMIT 32768U
3319 3296
3320static int 3297static int
@@ -3393,7 +3370,6 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
3393 list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, 3370 list_for_each_entry(tpg_np, &tpg->tpg_gnp_list,
3394 tpg_np_list) { 3371 tpg_np_list) {
3395 struct iscsi_np *np = tpg_np->tpg_np; 3372 struct iscsi_np *np = tpg_np->tpg_np;
3396 bool inaddr_any = iscsit_check_inaddr_any(np);
3397 struct sockaddr_storage *sockaddr; 3373 struct sockaddr_storage *sockaddr;
3398 3374
3399 if (np->np_network_transport != network_transport) 3375 if (np->np_network_transport != network_transport)
@@ -3422,7 +3398,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
3422 } 3398 }
3423 } 3399 }
3424 3400
3425 if (inaddr_any) 3401 if (inet_addr_is_any((struct sockaddr *)&np->np_sockaddr))
3426 sockaddr = &conn->local_sockaddr; 3402 sockaddr = &conn->local_sockaddr;
3427 else 3403 else
3428 sockaddr = &np->np_sockaddr; 3404 sockaddr = &np->np_sockaddr;
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 9cd4ffe76c07..60d5b918c4ac 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -309,7 +309,7 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc)
309 309
310static int tcm_loop_slave_alloc(struct scsi_device *sd) 310static int tcm_loop_slave_alloc(struct scsi_device *sd)
311{ 311{
312 set_bit(QUEUE_FLAG_BIDI, &sd->request_queue->queue_flags); 312 blk_queue_flag_set(QUEUE_FLAG_BIDI, sd->request_queue);
313 return 0; 313 return 0;
314} 314}
315 315
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1357ef563893..ba12ee659673 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -315,8 +315,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
315 dio_warn_stale_pagecache(dio->iocb->ki_filp); 315 dio_warn_stale_pagecache(dio->iocb->ki_filp);
316 } 316 }
317 317
318 if (!(dio->flags & DIO_SKIP_DIO_COUNT)) 318 inode_dio_end(dio->inode);
319 inode_dio_end(dio->inode);
320 319
321 if (flags & DIO_COMPLETE_ASYNC) { 320 if (flags & DIO_COMPLETE_ASYNC) {
322 /* 321 /*
@@ -1252,8 +1251,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1252 */ 1251 */
1253 if (is_sync_kiocb(iocb)) 1252 if (is_sync_kiocb(iocb))
1254 dio->is_async = false; 1253 dio->is_async = false;
1255 else if (!(dio->flags & DIO_ASYNC_EXTEND) && 1254 else if (iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
1256 iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
1257 dio->is_async = false; 1255 dio->is_async = false;
1258 else 1256 else
1259 dio->is_async = true; 1257 dio->is_async = true;
@@ -1297,8 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1297 /* 1295 /*
1298 * Will be decremented at I/O completion time. 1296 * Will be decremented at I/O completion time.
1299 */ 1297 */
1300 if (!(dio->flags & DIO_SKIP_DIO_COUNT)) 1298 inode_dio_begin(inode);
1301 inode_dio_begin(inode);
1302 1299
1303 retval = 0; 1300 retval = 0;
1304 sdio.blkbits = blkbits; 1301 sdio.blkbits = blkbits;
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 69bea82ebeb1..6c666fd7de3c 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -88,6 +88,7 @@ struct blkg_policy_data {
88 /* the blkg and policy id this per-policy data belongs to */ 88 /* the blkg and policy id this per-policy data belongs to */
89 struct blkcg_gq *blkg; 89 struct blkcg_gq *blkg;
90 int plid; 90 int plid;
91 bool offline;
91}; 92};
92 93
93/* 94/*
diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h
index 6338551e0fb9..9f4c17f0d2d8 100644
--- a/include/linux/blk-mq-pci.h
+++ b/include/linux/blk-mq-pci.h
@@ -5,6 +5,7 @@
5struct blk_mq_tag_set; 5struct blk_mq_tag_set;
6struct pci_dev; 6struct pci_dev;
7 7
8int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev); 8int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
9 int offset);
9 10
10#endif /* _LINUX_BLK_MQ_PCI_H */ 11#endif /* _LINUX_BLK_MQ_PCI_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bf18b95ed92d..17b18b91ebac 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -20,8 +20,13 @@ typedef void (bio_end_io_t) (struct bio *);
20 20
21/* 21/*
22 * Block error status values. See block/blk-core:blk_errors for the details. 22 * Block error status values. See block/blk-core:blk_errors for the details.
23 * Alpha cannot write a byte atomically, so we need to use 32-bit value.
23 */ 24 */
25#if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__)
26typedef u32 __bitwise blk_status_t;
27#else
24typedef u8 __bitwise blk_status_t; 28typedef u8 __bitwise blk_status_t;
29#endif
25#define BLK_STS_OK 0 30#define BLK_STS_OK 0
26#define BLK_STS_NOTSUPP ((__force blk_status_t)1) 31#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
27#define BLK_STS_TIMEOUT ((__force blk_status_t)2) 32#define BLK_STS_TIMEOUT ((__force blk_status_t)2)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed63f3b69c12..9af3e0f430bc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -707,73 +707,10 @@ struct request_queue {
707 (1 << QUEUE_FLAG_SAME_COMP) | \ 707 (1 << QUEUE_FLAG_SAME_COMP) | \
708 (1 << QUEUE_FLAG_POLL)) 708 (1 << QUEUE_FLAG_POLL))
709 709
710/* 710void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
711 * @q->queue_lock is set while a queue is being initialized. Since we know 711void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
712 * that no other threads access the queue object before @q->queue_lock has 712bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
713 * been set, it is safe to manipulate queue flags without holding the 713bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
714 * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
715 * blk_init_allocated_queue().
716 */
717static inline void queue_lockdep_assert_held(struct request_queue *q)
718{
719 if (q->queue_lock)
720 lockdep_assert_held(q->queue_lock);
721}
722
723static inline void queue_flag_set_unlocked(unsigned int flag,
724 struct request_queue *q)
725{
726 __set_bit(flag, &q->queue_flags);
727}
728
729static inline int queue_flag_test_and_clear(unsigned int flag,
730 struct request_queue *q)
731{
732 queue_lockdep_assert_held(q);
733
734 if (test_bit(flag, &q->queue_flags)) {
735 __clear_bit(flag, &q->queue_flags);
736 return 1;
737 }
738
739 return 0;
740}
741
742static inline int queue_flag_test_and_set(unsigned int flag,
743 struct request_queue *q)
744{
745 queue_lockdep_assert_held(q);
746
747 if (!test_bit(flag, &q->queue_flags)) {
748 __set_bit(flag, &q->queue_flags);
749 return 0;
750 }
751
752 return 1;
753}
754
755static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
756{
757 queue_lockdep_assert_held(q);
758 __set_bit(flag, &q->queue_flags);
759}
760
761static inline void queue_flag_clear_unlocked(unsigned int flag,
762 struct request_queue *q)
763{
764 __clear_bit(flag, &q->queue_flags);
765}
766
767static inline int queue_in_flight(struct request_queue *q)
768{
769 return q->in_flight[0] + q->in_flight[1];
770}
771
772static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
773{
774 queue_lockdep_assert_held(q);
775 __clear_bit(flag, &q->queue_flags);
776}
777 714
778#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 715#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
779#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 716#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
@@ -804,6 +741,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
804extern int blk_set_preempt_only(struct request_queue *q); 741extern int blk_set_preempt_only(struct request_queue *q);
805extern void blk_clear_preempt_only(struct request_queue *q); 742extern void blk_clear_preempt_only(struct request_queue *q);
806 743
744static inline int queue_in_flight(struct request_queue *q)
745{
746 return q->in_flight[0] + q->in_flight[1];
747}
748
807static inline bool blk_account_rq(struct request *rq) 749static inline bool blk_account_rq(struct request *rq)
808{ 750{
809 return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); 751 return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
@@ -1080,6 +1022,19 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
1080} 1022}
1081 1023
1082/* 1024/*
1025 * The basic unit of block I/O is a sector. It is used in a number of contexts
1026 * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
1027 * bytes. Variables of type sector_t represent an offset or size that is a
1028 * multiple of 512 bytes. Hence these two constants.
1029 */
1030#ifndef SECTOR_SHIFT
1031#define SECTOR_SHIFT 9
1032#endif
1033#ifndef SECTOR_SIZE
1034#define SECTOR_SIZE (1 << SECTOR_SHIFT)
1035#endif
1036
1037/*
1083 * blk_rq_pos() : the current sector 1038 * blk_rq_pos() : the current sector
1084 * blk_rq_bytes() : bytes left in the entire request 1039 * blk_rq_bytes() : bytes left in the entire request
1085 * blk_rq_cur_bytes() : bytes left in the current segment 1040 * blk_rq_cur_bytes() : bytes left in the current segment
@@ -1106,12 +1061,12 @@ extern unsigned int blk_rq_err_bytes(const struct request *rq);
1106 1061
1107static inline unsigned int blk_rq_sectors(const struct request *rq) 1062static inline unsigned int blk_rq_sectors(const struct request *rq)
1108{ 1063{
1109 return blk_rq_bytes(rq) >> 9; 1064 return blk_rq_bytes(rq) >> SECTOR_SHIFT;
1110} 1065}
1111 1066
1112static inline unsigned int blk_rq_cur_sectors(const struct request *rq) 1067static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
1113{ 1068{
1114 return blk_rq_cur_bytes(rq) >> 9; 1069 return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
1115} 1070}
1116 1071
1117static inline unsigned int blk_rq_zone_no(struct request *rq) 1072static inline unsigned int blk_rq_zone_no(struct request *rq)
@@ -1141,7 +1096,8 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
1141 int op) 1096 int op)
1142{ 1097{
1143 if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) 1098 if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
1144 return min(q->limits.max_discard_sectors, UINT_MAX >> 9); 1099 return min(q->limits.max_discard_sectors,
1100 UINT_MAX >> SECTOR_SHIFT);
1145 1101
1146 if (unlikely(op == REQ_OP_WRITE_SAME)) 1102 if (unlikely(op == REQ_OP_WRITE_SAME))
1147 return q->limits.max_write_same_sectors; 1103 return q->limits.max_write_same_sectors;
@@ -1321,7 +1277,8 @@ extern long nr_blockdev_pages(void);
1321 1277
1322bool __must_check blk_get_queue(struct request_queue *); 1278bool __must_check blk_get_queue(struct request_queue *);
1323struct request_queue *blk_alloc_queue(gfp_t); 1279struct request_queue *blk_alloc_queue(gfp_t);
1324struct request_queue *blk_alloc_queue_node(gfp_t, int); 1280struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
1281 spinlock_t *lock);
1325extern void blk_put_queue(struct request_queue *); 1282extern void blk_put_queue(struct request_queue *);
1326extern void blk_set_queue_dying(struct request_queue *); 1283extern void blk_set_queue_dying(struct request_queue *);
1327 1284
@@ -1452,16 +1409,21 @@ extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
1452static inline int sb_issue_discard(struct super_block *sb, sector_t block, 1409static inline int sb_issue_discard(struct super_block *sb, sector_t block,
1453 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) 1410 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
1454{ 1411{
1455 return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), 1412 return blkdev_issue_discard(sb->s_bdev,
1456 nr_blocks << (sb->s_blocksize_bits - 9), 1413 block << (sb->s_blocksize_bits -
1414 SECTOR_SHIFT),
1415 nr_blocks << (sb->s_blocksize_bits -
1416 SECTOR_SHIFT),
1457 gfp_mask, flags); 1417 gfp_mask, flags);
1458} 1418}
1459static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, 1419static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
1460 sector_t nr_blocks, gfp_t gfp_mask) 1420 sector_t nr_blocks, gfp_t gfp_mask)
1461{ 1421{
1462 return blkdev_issue_zeroout(sb->s_bdev, 1422 return blkdev_issue_zeroout(sb->s_bdev,
1463 block << (sb->s_blocksize_bits - 9), 1423 block << (sb->s_blocksize_bits -
1464 nr_blocks << (sb->s_blocksize_bits - 9), 1424 SECTOR_SHIFT),
1425 nr_blocks << (sb->s_blocksize_bits -
1426 SECTOR_SHIFT),
1465 gfp_mask, 0); 1427 gfp_mask, 0);
1466} 1428}
1467 1429
@@ -1568,7 +1530,8 @@ static inline int queue_alignment_offset(struct request_queue *q)
1568static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) 1530static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
1569{ 1531{
1570 unsigned int granularity = max(lim->physical_block_size, lim->io_min); 1532 unsigned int granularity = max(lim->physical_block_size, lim->io_min);
1571 unsigned int alignment = sector_div(sector, granularity >> 9) << 9; 1533 unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
1534 << SECTOR_SHIFT;
1572 1535
1573 return (granularity + lim->alignment_offset - alignment) % granularity; 1536 return (granularity + lim->alignment_offset - alignment) % granularity;
1574} 1537}
@@ -1602,8 +1565,8 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
1602 return 0; 1565 return 0;
1603 1566
1604 /* Why are these in bytes, not sectors? */ 1567 /* Why are these in bytes, not sectors? */
1605 alignment = lim->discard_alignment >> 9; 1568 alignment = lim->discard_alignment >> SECTOR_SHIFT;
1606 granularity = lim->discard_granularity >> 9; 1569 granularity = lim->discard_granularity >> SECTOR_SHIFT;
1607 if (!granularity) 1570 if (!granularity)
1608 return 0; 1571 return 0;
1609 1572
@@ -1614,7 +1577,7 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
1614 offset = (granularity + alignment - offset) % granularity; 1577 offset = (granularity + alignment - offset) % granularity;
1615 1578
1616 /* Turn it back into bytes, gaah */ 1579 /* Turn it back into bytes, gaah */
1617 return offset << 9; 1580 return offset << SECTOR_SHIFT;
1618} 1581}
1619 1582
1620static inline int bdev_discard_alignment(struct block_device *bdev) 1583static inline int bdev_discard_alignment(struct block_device *bdev)
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index b1be0233ce35..28a7ccc55c89 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -38,12 +38,12 @@ struct bsg_buffer {
38}; 38};
39 39
40struct bsg_job { 40struct bsg_job {
41 struct scsi_request sreq;
42 struct device *dev; 41 struct device *dev;
43 struct request *req;
44 42
45 struct kref kref; 43 struct kref kref;
46 44
45 unsigned int timeout;
46
47 /* Transport/driver specific request/reply structs */ 47 /* Transport/driver specific request/reply structs */
48 void *request; 48 void *request;
49 void *reply; 49 void *reply;
@@ -63,6 +63,9 @@ struct bsg_job {
63 struct bsg_buffer request_payload; 63 struct bsg_buffer request_payload;
64 struct bsg_buffer reply_payload; 64 struct bsg_buffer reply_payload;
65 65
66 int result;
67 unsigned int reply_payload_rcv_len;
68
66 void *dd_data; /* Used for driver-specific storage */ 69 void *dd_data; /* Used for driver-specific storage */
67}; 70};
68 71
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 2a202e41a3af..0c7dd9ceb139 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -1,34 +1,43 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef BSG_H 2#ifndef _LINUX_BSG_H
3#define BSG_H 3#define _LINUX_BSG_H
4 4
5#include <uapi/linux/bsg.h> 5#include <uapi/linux/bsg.h>
6 6
7struct request;
8
9#ifdef CONFIG_BLK_DEV_BSG
10struct bsg_ops {
11 int (*check_proto)(struct sg_io_v4 *hdr);
12 int (*fill_hdr)(struct request *rq, struct sg_io_v4 *hdr,
13 fmode_t mode);
14 int (*complete_rq)(struct request *rq, struct sg_io_v4 *hdr);
15 void (*free_rq)(struct request *rq);
16};
7 17
8#if defined(CONFIG_BLK_DEV_BSG)
9struct bsg_class_device { 18struct bsg_class_device {
10 struct device *class_dev; 19 struct device *class_dev;
11 struct device *parent; 20 struct device *parent;
12 int minor; 21 int minor;
13 struct request_queue *queue; 22 struct request_queue *queue;
14 struct kref ref; 23 struct kref ref;
24 const struct bsg_ops *ops;
15 void (*release)(struct device *); 25 void (*release)(struct device *);
16}; 26};
17 27
18extern int bsg_register_queue(struct request_queue *q, 28int bsg_register_queue(struct request_queue *q, struct device *parent,
19 struct device *parent, const char *name, 29 const char *name, const struct bsg_ops *ops,
20 void (*release)(struct device *)); 30 void (*release)(struct device *));
21extern void bsg_unregister_queue(struct request_queue *); 31int bsg_scsi_register_queue(struct request_queue *q, struct device *parent);
32void bsg_unregister_queue(struct request_queue *q);
22#else 33#else
23static inline int bsg_register_queue(struct request_queue *q, 34static inline int bsg_scsi_register_queue(struct request_queue *q,
24 struct device *parent, const char *name, 35 struct device *parent)
25 void (*release)(struct device *))
26{ 36{
27 return 0; 37 return 0;
28} 38}
29static inline void bsg_unregister_queue(struct request_queue *q) 39static inline void bsg_unregister_queue(struct request_queue *q)
30{ 40{
31} 41}
32#endif 42#endif /* CONFIG_BLK_DEV_BSG */
33 43#endif /* _LINUX_BSG_H */
34#endif
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index da83f64952e7..4384433b50e7 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -542,8 +542,6 @@ do { \
542#define DMEMIT(x...) sz += ((sz >= maxlen) ? \ 542#define DMEMIT(x...) sz += ((sz >= maxlen) ? \
543 0 : scnprintf(result + sz, maxlen - sz, x)) 543 0 : scnprintf(result + sz, maxlen - sz, x))
544 544
545#define SECTOR_SHIFT 9
546
547/* 545/*
548 * Definitions of return values from target end_io function. 546 * Definitions of return values from target end_io function.
549 */ 547 */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c6baf767619e..070807ce3e41 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2977,12 +2977,6 @@ enum {
2977 2977
2978 /* filesystem does not support filling holes */ 2978 /* filesystem does not support filling holes */
2979 DIO_SKIP_HOLES = 0x02, 2979 DIO_SKIP_HOLES = 0x02,
2980
2981 /* filesystem can handle aio writes beyond i_size */
2982 DIO_ASYNC_EXTEND = 0x04,
2983
2984 /* inode/fs/bdev does not need truncate protection */
2985 DIO_SKIP_DIO_COUNT = 0x08,
2986}; 2980};
2987 2981
2988void dio_end_io(struct bio *bio); 2982void dio_end_io(struct bio *bio);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1d6f16110eae..ca9d34feb572 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -160,7 +160,6 @@ struct ide_io_ports {
160 */ 160 */
161#define PARTN_BITS 6 /* number of minor dev bits for partitions */ 161#define PARTN_BITS 6 /* number of minor dev bits for partitions */
162#define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ 162#define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */
163#define SECTOR_SIZE 512
164 163
165/* 164/*
166 * Timeouts for various operations: 165 * Timeouts for various operations:
diff --git a/include/linux/inet.h b/include/linux/inet.h
index 636ebe87e6f8..97defc1139e9 100644
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -59,5 +59,6 @@ extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char
59 59
60extern int inet_pton_with_scope(struct net *net, unsigned short af, 60extern int inet_pton_with_scope(struct net *net, unsigned short af,
61 const char *src, const char *port, struct sockaddr_storage *addr); 61 const char *src, const char *port, struct sockaddr_storage *addr);
62extern bool inet_addr_is_any(struct sockaddr *addr);
62 63
63#endif /* _LINUX_INET_H */ 64#endif /* _LINUX_INET_H */
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 7f4b60abdf27..6e0859b9d4d2 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -16,26 +16,58 @@ enum {
16 NVM_IOTYPE_GC = 1, 16 NVM_IOTYPE_GC = 1,
17}; 17};
18 18
19#define NVM_BLK_BITS (16) 19/* common format */
20#define NVM_PG_BITS (16) 20#define NVM_GEN_CH_BITS (8)
21#define NVM_SEC_BITS (8) 21#define NVM_GEN_LUN_BITS (8)
22#define NVM_PL_BITS (8) 22#define NVM_GEN_BLK_BITS (16)
23#define NVM_LUN_BITS (8) 23#define NVM_GEN_RESERVED (32)
24#define NVM_CH_BITS (7) 24
25/* 1.2 format */
26#define NVM_12_PG_BITS (16)
27#define NVM_12_PL_BITS (4)
28#define NVM_12_SEC_BITS (4)
29#define NVM_12_RESERVED (8)
30
31/* 2.0 format */
32#define NVM_20_SEC_BITS (24)
33#define NVM_20_RESERVED (8)
34
35enum {
36 NVM_OCSSD_SPEC_12 = 12,
37 NVM_OCSSD_SPEC_20 = 20,
38};
25 39
26struct ppa_addr { 40struct ppa_addr {
27 /* Generic structure for all addresses */ 41 /* Generic structure for all addresses */
28 union { 42 union {
43 /* generic device format */
44 struct {
45 u64 ch : NVM_GEN_CH_BITS;
46 u64 lun : NVM_GEN_LUN_BITS;
47 u64 blk : NVM_GEN_BLK_BITS;
48 u64 reserved : NVM_GEN_RESERVED;
49 } a;
50
51 /* 1.2 device format */
29 struct { 52 struct {
30 u64 blk : NVM_BLK_BITS; 53 u64 ch : NVM_GEN_CH_BITS;
31 u64 pg : NVM_PG_BITS; 54 u64 lun : NVM_GEN_LUN_BITS;
32 u64 sec : NVM_SEC_BITS; 55 u64 blk : NVM_GEN_BLK_BITS;
33 u64 pl : NVM_PL_BITS; 56 u64 pg : NVM_12_PG_BITS;
34 u64 lun : NVM_LUN_BITS; 57 u64 pl : NVM_12_PL_BITS;
35 u64 ch : NVM_CH_BITS; 58 u64 sec : NVM_12_SEC_BITS;
36 u64 reserved : 1; 59 u64 reserved : NVM_12_RESERVED;
37 } g; 60 } g;
38 61
62 /* 2.0 device format */
63 struct {
64 u64 grp : NVM_GEN_CH_BITS;
65 u64 pu : NVM_GEN_LUN_BITS;
66 u64 chk : NVM_GEN_BLK_BITS;
67 u64 sec : NVM_20_SEC_BITS;
68 u64 reserved : NVM_20_RESERVED;
69 } m;
70
39 struct { 71 struct {
40 u64 line : 63; 72 u64 line : 63;
41 u64 is_cached : 1; 73 u64 is_cached : 1;
@@ -49,10 +81,13 @@ struct nvm_rq;
49struct nvm_id; 81struct nvm_id;
50struct nvm_dev; 82struct nvm_dev;
51struct nvm_tgt_dev; 83struct nvm_tgt_dev;
84struct nvm_chk_meta;
52 85
53typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); 86typedef int (nvm_id_fn)(struct nvm_dev *);
54typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); 87typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
55typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); 88typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
89typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, struct nvm_chk_meta *,
90 sector_t, int);
56typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); 91typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
57typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); 92typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
58typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); 93typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
@@ -66,6 +101,8 @@ struct nvm_dev_ops {
66 nvm_op_bb_tbl_fn *get_bb_tbl; 101 nvm_op_bb_tbl_fn *get_bb_tbl;
67 nvm_op_set_bb_fn *set_bb_tbl; 102 nvm_op_set_bb_fn *set_bb_tbl;
68 103
104 nvm_get_chk_meta_fn *get_chk_meta;
105
69 nvm_submit_io_fn *submit_io; 106 nvm_submit_io_fn *submit_io;
70 nvm_submit_io_sync_fn *submit_io_sync; 107 nvm_submit_io_sync_fn *submit_io_sync;
71 108
@@ -73,8 +110,6 @@ struct nvm_dev_ops {
73 nvm_destroy_dma_pool_fn *destroy_dma_pool; 110 nvm_destroy_dma_pool_fn *destroy_dma_pool;
74 nvm_dev_dma_alloc_fn *dev_dma_alloc; 111 nvm_dev_dma_alloc_fn *dev_dma_alloc;
75 nvm_dev_dma_free_fn *dev_dma_free; 112 nvm_dev_dma_free_fn *dev_dma_free;
76
77 unsigned int max_phys_sect;
78}; 113};
79 114
80#ifdef CONFIG_NVM 115#ifdef CONFIG_NVM
@@ -154,60 +189,75 @@ struct nvm_id_lp_tbl {
154 struct nvm_id_lp_mlc mlc; 189 struct nvm_id_lp_mlc mlc;
155}; 190};
156 191
157struct nvm_id_group { 192struct nvm_addrf_12 {
158 u8 mtype;
159 u8 fmtype;
160 u8 num_ch;
161 u8 num_lun;
162 u16 num_chk;
163 u16 clba;
164 u16 csecs;
165 u16 sos;
166
167 u16 ws_min;
168 u16 ws_opt;
169 u16 ws_seq;
170 u16 ws_per_chk;
171
172 u32 trdt;
173 u32 trdm;
174 u32 tprt;
175 u32 tprm;
176 u32 tbet;
177 u32 tbem;
178 u32 mpos;
179 u32 mccap;
180 u16 cpar;
181
182 /* 1.2 compatibility */
183 u8 num_pln;
184 u16 num_pg;
185 u16 fpg_sz;
186};
187
188struct nvm_addr_format {
189 u8 ch_offset;
190 u8 ch_len; 193 u8 ch_len;
191 u8 lun_offset;
192 u8 lun_len; 194 u8 lun_len;
193 u8 pln_offset; 195 u8 blk_len;
196 u8 pg_len;
194 u8 pln_len; 197 u8 pln_len;
198 u8 sec_len;
199
200 u8 ch_offset;
201 u8 lun_offset;
195 u8 blk_offset; 202 u8 blk_offset;
196 u8 blk_len;
197 u8 pg_offset; 203 u8 pg_offset;
198 u8 pg_len; 204 u8 pln_offset;
199 u8 sect_offset; 205 u8 sec_offset;
200 u8 sect_len; 206
207 u64 ch_mask;
208 u64 lun_mask;
209 u64 blk_mask;
210 u64 pg_mask;
211 u64 pln_mask;
212 u64 sec_mask;
201}; 213};
202 214
203struct nvm_id { 215struct nvm_addrf {
204 u8 ver_id; 216 u8 ch_len;
205 u8 vmnt; 217 u8 lun_len;
206 u32 cap; 218 u8 chk_len;
207 u32 dom; 219 u8 sec_len;
208 struct nvm_addr_format ppaf; 220 u8 rsv_len[2];
209 struct nvm_id_group grp; 221
210} __packed; 222 u8 ch_offset;
223 u8 lun_offset;
224 u8 chk_offset;
225 u8 sec_offset;
226 u8 rsv_off[2];
227
228 u64 ch_mask;
229 u64 lun_mask;
230 u64 chk_mask;
231 u64 sec_mask;
232 u64 rsv_mask[2];
233};
234
235enum {
236 /* Chunk states */
237 NVM_CHK_ST_FREE = 1 << 0,
238 NVM_CHK_ST_CLOSED = 1 << 1,
239 NVM_CHK_ST_OPEN = 1 << 2,
240 NVM_CHK_ST_OFFLINE = 1 << 3,
241
242 /* Chunk types */
243 NVM_CHK_TP_W_SEQ = 1 << 0,
244 NVM_CHK_TP_W_RAN = 1 << 1,
245 NVM_CHK_TP_SZ_SPEC = 1 << 4,
246};
247
248/*
249 * Note: The structure size is linked to nvme_nvm_chk_meta such that the same
250 * buffer can be used when converting from little endian to cpu addressing.
251 */
252struct nvm_chk_meta {
253 u8 state;
254 u8 type;
255 u8 wi;
256 u8 rsvd[5];
257 u64 slba;
258 u64 cnlb;
259 u64 wp;
260};
211 261
212struct nvm_target { 262struct nvm_target {
213 struct list_head list; 263 struct list_head list;
@@ -226,6 +276,8 @@ struct nvm_target {
226#define NVM_VERSION_MINOR 0 276#define NVM_VERSION_MINOR 0
227#define NVM_VERSION_PATCH 0 277#define NVM_VERSION_PATCH 0
228 278
279#define NVM_MAX_VLBA (64) /* max logical blocks in a vector command */
280
229struct nvm_rq; 281struct nvm_rq;
230typedef void (nvm_end_io_fn)(struct nvm_rq *); 282typedef void (nvm_end_io_fn)(struct nvm_rq *);
231 283
@@ -272,38 +324,69 @@ enum {
272 NVM_BLK_ST_BAD = 0x8, /* Bad block */ 324 NVM_BLK_ST_BAD = 0x8, /* Bad block */
273}; 325};
274 326
275 327/* Instance geometry */
276/* Device generic information */
277struct nvm_geo { 328struct nvm_geo {
278 /* generic geometry */ 329 /* device reported version */
279 int nr_chnls; 330 u8 major_ver_id;
280 int all_luns; /* across channels */ 331 u8 minor_ver_id;
281 int nr_luns; /* per channel */ 332
282 int nr_chks; /* per lun */ 333 /* kernel short version */
334 u8 version;
283 335
284 int sec_size; 336 /* instance specific geometry */
285 int oob_size; 337 int num_ch;
286 int mccap; 338 int num_lun; /* per channel */
287 339
288 int sec_per_chk; 340 /* calculated values */
289 int sec_per_lun; 341 int all_luns; /* across channels */
342 int all_chunks; /* across channels */
290 343
291 int ws_min; 344 int op; /* over-provision in instance */
292 int ws_opt;
293 int ws_seq;
294 int ws_per_chk;
295 345
296 int max_rq_size; 346 sector_t total_secs; /* across channels */
347
348 /* chunk geometry */
349 u32 num_chk; /* chunks per lun */
350 u32 clba; /* sectors per chunk */
351 u16 csecs; /* sector size */
352 u16 sos; /* out-of-band area size */
353
354 /* device write constrains */
355 u32 ws_min; /* minimum write size */
356 u32 ws_opt; /* optimal write size */
357 u32 mw_cunits; /* distance required for successful read */
358 u32 maxoc; /* maximum open chunks */
359 u32 maxocpu; /* maximum open chunks per parallel unit */
360
361 /* device capabilities */
362 u32 mccap;
363
364 /* device timings */
365 u32 trdt; /* Avg. Tread (ns) */
366 u32 trdm; /* Max Tread (ns) */
367 u32 tprt; /* Avg. Tprog (ns) */
368 u32 tprm; /* Max Tprog (ns) */
369 u32 tbet; /* Avg. Terase (ns) */
370 u32 tbem; /* Max Terase (ns) */
371
372 /* generic address format */
373 struct nvm_addrf addrf;
374
375 /* 1.2 compatibility */
376 u8 vmnt;
377 u32 cap;
378 u32 dom;
297 379
298 int op; 380 u8 mtype;
381 u8 fmtype;
299 382
300 struct nvm_addr_format ppaf; 383 u16 cpar;
384 u32 mpos;
301 385
302 /* Legacy 1.2 specific geometry */ 386 u8 num_pln;
303 int plane_mode; /* drive device in single, double or quad mode */ 387 u8 pln_mode;
304 int nr_planes; 388 u16 num_pg;
305 int sec_per_pg; /* only sectors for a single page */ 389 u16 fpg_sz;
306 int sec_per_pl; /* all sectors across planes */
307}; 390};
308 391
309/* sub-device structure */ 392/* sub-device structure */
@@ -314,9 +397,6 @@ struct nvm_tgt_dev {
314 /* Base ppas for target LUNs */ 397 /* Base ppas for target LUNs */
315 struct ppa_addr *luns; 398 struct ppa_addr *luns;
316 399
317 sector_t total_secs;
318
319 struct nvm_id identity;
320 struct request_queue *q; 400 struct request_queue *q;
321 401
322 struct nvm_dev *parent; 402 struct nvm_dev *parent;
@@ -331,13 +411,9 @@ struct nvm_dev {
331 /* Device information */ 411 /* Device information */
332 struct nvm_geo geo; 412 struct nvm_geo geo;
333 413
334 unsigned long total_secs;
335
336 unsigned long *lun_map; 414 unsigned long *lun_map;
337 void *dma_pool; 415 void *dma_pool;
338 416
339 struct nvm_id identity;
340
341 /* Backend device */ 417 /* Backend device */
342 struct request_queue *q; 418 struct request_queue *q;
343 char name[DISK_NAME_LEN]; 419 char name[DISK_NAME_LEN];
@@ -353,44 +429,58 @@ struct nvm_dev {
353 struct list_head targets; 429 struct list_head targets;
354}; 430};
355 431
356static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, 432static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
357 struct ppa_addr r) 433 struct ppa_addr r)
358{ 434{
359 struct nvm_geo *geo = &tgt_dev->geo; 435 struct nvm_geo *geo = &dev->geo;
360 struct ppa_addr l; 436 struct ppa_addr l;
361 437
362 l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; 438 if (geo->version == NVM_OCSSD_SPEC_12) {
363 l.ppa |= ((u64)r.g.pg) << geo->ppaf.pg_offset; 439 struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
364 l.ppa |= ((u64)r.g.sec) << geo->ppaf.sect_offset; 440
365 l.ppa |= ((u64)r.g.pl) << geo->ppaf.pln_offset; 441 l.ppa = ((u64)r.g.ch) << ppaf->ch_offset;
366 l.ppa |= ((u64)r.g.lun) << geo->ppaf.lun_offset; 442 l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset;
367 l.ppa |= ((u64)r.g.ch) << geo->ppaf.ch_offset; 443 l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset;
444 l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset;
445 l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset;
446 l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset;
447 } else {
448 struct nvm_addrf *lbaf = &geo->addrf;
449
450 l.ppa = ((u64)r.m.grp) << lbaf->ch_offset;
451 l.ppa |= ((u64)r.m.pu) << lbaf->lun_offset;
452 l.ppa |= ((u64)r.m.chk) << lbaf->chk_offset;
453 l.ppa |= ((u64)r.m.sec) << lbaf->sec_offset;
454 }
368 455
369 return l; 456 return l;
370} 457}
371 458
372static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, 459static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
373 struct ppa_addr r) 460 struct ppa_addr r)
374{ 461{
375 struct nvm_geo *geo = &tgt_dev->geo; 462 struct nvm_geo *geo = &dev->geo;
376 struct ppa_addr l; 463 struct ppa_addr l;
377 464
378 l.ppa = 0; 465 l.ppa = 0;
379 /* 466
380 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc. 467 if (geo->version == NVM_OCSSD_SPEC_12) {
381 */ 468 struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
382 l.g.blk = (r.ppa >> geo->ppaf.blk_offset) & 469
383 (((1 << geo->ppaf.blk_len) - 1)); 470 l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset;
384 l.g.pg |= (r.ppa >> geo->ppaf.pg_offset) & 471 l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset;
385 (((1 << geo->ppaf.pg_len) - 1)); 472 l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset;
386 l.g.sec |= (r.ppa >> geo->ppaf.sect_offset) & 473 l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset;
387 (((1 << geo->ppaf.sect_len) - 1)); 474 l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset;
388 l.g.pl |= (r.ppa >> geo->ppaf.pln_offset) & 475 l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset;
389 (((1 << geo->ppaf.pln_len) - 1)); 476 } else {
390 l.g.lun |= (r.ppa >> geo->ppaf.lun_offset) & 477 struct nvm_addrf *lbaf = &geo->addrf;
391 (((1 << geo->ppaf.lun_len) - 1)); 478
392 l.g.ch |= (r.ppa >> geo->ppaf.ch_offset) & 479 l.m.grp = (r.ppa & lbaf->ch_mask) >> lbaf->ch_offset;
393 (((1 << geo->ppaf.ch_len) - 1)); 480 l.m.pu = (r.ppa & lbaf->lun_mask) >> lbaf->lun_offset;
481 l.m.chk = (r.ppa & lbaf->chk_mask) >> lbaf->chk_offset;
482 l.m.sec = (r.ppa & lbaf->sec_mask) >> lbaf->sec_offset;
483 }
394 484
395 return l; 485 return l;
396} 486}
@@ -434,9 +524,13 @@ extern struct nvm_dev *nvm_alloc_dev(int);
434extern int nvm_register(struct nvm_dev *); 524extern int nvm_register(struct nvm_dev *);
435extern void nvm_unregister(struct nvm_dev *); 525extern void nvm_unregister(struct nvm_dev *);
436 526
527
528extern int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev,
529 struct nvm_chk_meta *meta, struct ppa_addr ppa,
530 int nchks);
531
437extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, 532extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
438 int, int); 533 int, int);
439extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
440extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); 534extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
441extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); 535extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
442extern void nvm_end_io(struct nvm_rq *); 536extern void nvm_end_io(struct nvm_rq *);
diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h
index 478acf6efac6..e964bbd03fc2 100644
--- a/include/linux/rtsx_pci.h
+++ b/include/linux/rtsx_pci.h
@@ -36,12 +36,12 @@
36#define CHECK_REG_CMD 2 36#define CHECK_REG_CMD 2
37 37
38#define RTSX_HDBAR 0x08 38#define RTSX_HDBAR 0x08
39#define SG_INT 0x04 39#define RTSX_SG_INT 0x04
40#define SG_END 0x02 40#define RTSX_SG_END 0x02
41#define SG_VALID 0x01 41#define RTSX_SG_VALID 0x01
42#define SG_NO_OP 0x00 42#define RTSX_SG_NO_OP 0x00
43#define SG_TRANS_DATA (0x02 << 4) 43#define RTSX_SG_TRANS_DATA (0x02 << 4)
44#define SG_LINK_DESC (0x03 << 4) 44#define RTSX_SG_LINK_DESC (0x03 << 4)
45#define RTSX_HDBCTLR 0x0C 45#define RTSX_HDBCTLR 0x0C
46#define SDMA_MODE 0x00 46#define SDMA_MODE 0x00
47#define ADMA_MODE (0x02 << 26) 47#define ADMA_MODE (0x02 << 26)
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 0dcc60e820de..841585f6e5f2 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -171,6 +171,8 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth);
171 * starting from the last allocated bit. This is less efficient 171 * starting from the last allocated bit. This is less efficient
172 * than the default behavior (false). 172 * than the default behavior (false).
173 * 173 *
174 * This operation provides acquire barrier semantics if it succeeds.
175 *
174 * Return: Non-negative allocated bit number if successful, -1 otherwise. 176 * Return: Non-negative allocated bit number if successful, -1 otherwise.
175 */ 177 */
176int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); 178int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin);
@@ -300,6 +302,12 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
300 clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); 302 clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
301} 303}
302 304
305static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb,
306 unsigned int bitnr)
307{
308 clear_bit_unlock(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
309}
310
303static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) 311static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
304{ 312{
305 return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); 313 return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index aa5d4eb725f5..51f52020ad5f 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -65,16 +65,18 @@ struct sg_table {
65 */ 65 */
66 66
67#define SG_MAGIC 0x87654321 67#define SG_MAGIC 0x87654321
68#define SG_CHAIN 0x01UL
69#define SG_END 0x02UL
68 70
69/* 71/*
70 * We overload the LSB of the page pointer to indicate whether it's 72 * We overload the LSB of the page pointer to indicate whether it's
71 * a valid sg entry, or whether it points to the start of a new scatterlist. 73 * a valid sg entry, or whether it points to the start of a new scatterlist.
72 * Those low bits are there for everyone! (thanks mason :-) 74 * Those low bits are there for everyone! (thanks mason :-)
73 */ 75 */
74#define sg_is_chain(sg) ((sg)->page_link & 0x01) 76#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
75#define sg_is_last(sg) ((sg)->page_link & 0x02) 77#define sg_is_last(sg) ((sg)->page_link & SG_END)
76#define sg_chain_ptr(sg) \ 78#define sg_chain_ptr(sg) \
77 ((struct scatterlist *) ((sg)->page_link & ~0x03)) 79 ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
78 80
79/** 81/**
80 * sg_assign_page - Assign a given page to an SG entry 82 * sg_assign_page - Assign a given page to an SG entry
@@ -88,13 +90,13 @@ struct sg_table {
88 **/ 90 **/
89static inline void sg_assign_page(struct scatterlist *sg, struct page *page) 91static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
90{ 92{
91 unsigned long page_link = sg->page_link & 0x3; 93 unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END);
92 94
93 /* 95 /*
94 * In order for the low bit stealing approach to work, pages 96 * In order for the low bit stealing approach to work, pages
95 * must be aligned at a 32-bit boundary as a minimum. 97 * must be aligned at a 32-bit boundary as a minimum.
96 */ 98 */
97 BUG_ON((unsigned long) page & 0x03); 99 BUG_ON((unsigned long) page & (SG_CHAIN | SG_END));
98#ifdef CONFIG_DEBUG_SG 100#ifdef CONFIG_DEBUG_SG
99 BUG_ON(sg->sg_magic != SG_MAGIC); 101 BUG_ON(sg->sg_magic != SG_MAGIC);
100 BUG_ON(sg_is_chain(sg)); 102 BUG_ON(sg_is_chain(sg));
@@ -130,7 +132,7 @@ static inline struct page *sg_page(struct scatterlist *sg)
130 BUG_ON(sg->sg_magic != SG_MAGIC); 132 BUG_ON(sg->sg_magic != SG_MAGIC);
131 BUG_ON(sg_is_chain(sg)); 133 BUG_ON(sg_is_chain(sg));
132#endif 134#endif
133 return (struct page *)((sg)->page_link & ~0x3); 135 return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END));
134} 136}
135 137
136/** 138/**
@@ -178,7 +180,8 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
178 * Set lowest bit to indicate a link pointer, and make sure to clear 180 * Set lowest bit to indicate a link pointer, and make sure to clear
179 * the termination bit if it happens to be set. 181 * the termination bit if it happens to be set.
180 */ 182 */
181 prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02; 183 prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN)
184 & ~SG_END;
182} 185}
183 186
184/** 187/**
@@ -198,8 +201,8 @@ static inline void sg_mark_end(struct scatterlist *sg)
198 /* 201 /*
199 * Set termination bit, clear potential chain bit 202 * Set termination bit, clear potential chain bit
200 */ 203 */
201 sg->page_link |= 0x02; 204 sg->page_link |= SG_END;
202 sg->page_link &= ~0x01; 205 sg->page_link &= ~SG_CHAIN;
203} 206}
204 207
205/** 208/**
@@ -215,7 +218,7 @@ static inline void sg_unmark_end(struct scatterlist *sg)
215#ifdef CONFIG_DEBUG_SG 218#ifdef CONFIG_DEBUG_SG
216 BUG_ON(sg->sg_magic != SG_MAGIC); 219 BUG_ON(sg->sg_magic != SG_MAGIC);
217#endif 220#endif
218 sg->page_link &= ~0x02; 221 sg->page_link &= ~SG_END;
219} 222}
220 223
221/** 224/**
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 3c50e07ee833..690621b610e5 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -101,7 +101,7 @@ enum blktrace_notify {
101struct blk_io_trace { 101struct blk_io_trace {
102 __u32 magic; /* MAGIC << 8 | version */ 102 __u32 magic; /* MAGIC << 8 | version */
103 __u32 sequence; /* event number */ 103 __u32 sequence; /* event number */
104 __u64 time; /* in microseconds */ 104 __u64 time; /* in nanoseconds */
105 __u64 sector; /* disk offset */ 105 __u64 sector; /* disk offset */
106 __u32 bytes; /* transfer length */ 106 __u32 bytes; /* transfer length */
107 __u32 action; /* what happened */ 107 __u32 action; /* what happened */
diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h
index a45d0754102e..fde753735aba 100644
--- a/include/uapi/linux/msdos_fs.h
+++ b/include/uapi/linux/msdos_fs.h
@@ -10,7 +10,9 @@
10 * The MS-DOS filesystem constants/structures 10 * The MS-DOS filesystem constants/structures
11 */ 11 */
12 12
13#ifndef SECTOR_SIZE
13#define SECTOR_SIZE 512 /* sector size (bytes) */ 14#define SECTOR_SIZE 512 /* sector size (bytes) */
15#endif
14#define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */ 16#define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */
15#define MSDOS_DPB (MSDOS_DPS) /* dir entries per block */ 17#define MSDOS_DPB (MSDOS_DPS) /* dir entries per block */
16#define MSDOS_DPB_BITS 4 /* log2(MSDOS_DPB) */ 18#define MSDOS_DPB_BITS 4 /* log2(MSDOS_DPB) */
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 42b5ca0acf93..e6a9c06ec70c 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -100,7 +100,7 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
100 return -1; 100 return -1;
101 } 101 }
102 102
103 if (!test_and_set_bit(nr, word)) 103 if (!test_and_set_bit_lock(nr, word))
104 break; 104 break;
105 105
106 hint = nr + 1; 106 hint = nr + 1;
@@ -434,9 +434,9 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
434 /* 434 /*
435 * Pairs with the memory barrier in set_current_state() to ensure the 435 * Pairs with the memory barrier in set_current_state() to ensure the
436 * proper ordering of clear_bit()/waitqueue_active() in the waker and 436 * proper ordering of clear_bit()/waitqueue_active() in the waker and
437 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See 437 * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
438 * the comment on waitqueue_active(). This is __after_atomic because we 438 * waiter. See the comment on waitqueue_active(). This is __after_atomic
439 * just did clear_bit() in the caller. 439 * because we just did clear_bit_unlock() in the caller.
440 */ 440 */
441 smp_mb__after_atomic(); 441 smp_mb__after_atomic();
442 442
@@ -469,7 +469,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
469void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 469void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
470 unsigned int cpu) 470 unsigned int cpu)
471{ 471{
472 sbitmap_clear_bit(&sbq->sb, nr); 472 sbitmap_clear_bit_unlock(&sbq->sb, nr);
473 sbq_wake_up(sbq); 473 sbq_wake_up(sbq);
474 if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 474 if (likely(!sbq->round_robin && nr < sbq->sb.depth))
475 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 475 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index b5f940ce0143..d2984e9fcf08 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -745,7 +745,6 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
745 */ 745 */
746void wb_memcg_offline(struct mem_cgroup *memcg) 746void wb_memcg_offline(struct mem_cgroup *memcg)
747{ 747{
748 LIST_HEAD(to_destroy);
749 struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg); 748 struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
750 struct bdi_writeback *wb, *next; 749 struct bdi_writeback *wb, *next;
751 750
@@ -764,7 +763,6 @@ void wb_memcg_offline(struct mem_cgroup *memcg)
764 */ 763 */
765void wb_blkcg_offline(struct blkcg *blkcg) 764void wb_blkcg_offline(struct blkcg *blkcg)
766{ 765{
767 LIST_HEAD(to_destroy);
768 struct bdi_writeback *wb, *next; 766 struct bdi_writeback *wb, *next;
769 767
770 spin_lock_irq(&cgwb_lock); 768 spin_lock_irq(&cgwb_lock);
diff --git a/net/core/utils.c b/net/core/utils.c
index 93066bd0305a..d47863b07a60 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -403,6 +403,29 @@ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af,
403} 403}
404EXPORT_SYMBOL(inet_pton_with_scope); 404EXPORT_SYMBOL(inet_pton_with_scope);
405 405
406bool inet_addr_is_any(struct sockaddr *addr)
407{
408 if (addr->sa_family == AF_INET6) {
409 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr;
410 const struct sockaddr_in6 in6_any =
411 { .sin6_addr = IN6ADDR_ANY_INIT };
412
413 if (!memcmp(in6->sin6_addr.s6_addr,
414 in6_any.sin6_addr.s6_addr, 16))
415 return true;
416 } else if (addr->sa_family == AF_INET) {
417 struct sockaddr_in *in = (struct sockaddr_in *)addr;
418
419 if (in->sin_addr.s_addr == htonl(INADDR_ANY))
420 return true;
421 } else {
422 pr_warn("unexpected address family %u\n", addr->sa_family);
423 }
424
425 return false;
426}
427EXPORT_SYMBOL(inet_addr_is_any);
428
406void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, 429void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
407 __be32 from, __be32 to, bool pseudohdr) 430 __be32 from, __be32 to, bool pseudohdr)
408{ 431{