diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 21:14:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 21:14:36 -0400 |
commit | 67a242223958d628f0ba33283668e3ddd192d057 (patch) | |
tree | a39e7039e9a2ef9ab46f8ba561175dbdc6101d11 | |
parent | 8b35ad6232c462b02e397e87ce702bcddd4ba543 (diff) | |
parent | b8753433fc611e23e31300e1d099001a08955c88 (diff) |
Merge tag 'for-5.2/block-20190507' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"Nothing major in this series, just fixes and improvements all over the
map. This contains:
- Series of fixes for sed-opal (David, Jonas)
- Fixes and performance tweaks for BFQ (via Paolo)
- Set of fixes for bcache (via Coly)
- Set of fixes for md (via Song)
- Enabling multi-page for passthrough requests (Ming)
- Queue release fix series (Ming)
- Device notification improvements (Martin)
- Propagate underlying device rotational status in loop (Holger)
- Removal of mtip32xx trim support, which has been disabled for years
(Christoph)
- Improvement and cleanup of nvme command handling (Christoph)
- Add block SPDX tags (Christoph)
- Cleanup/hardening of bio/bvec iteration (Christoph)
- A few NVMe pull requests (Christoph)
- Removal of CONFIG_LBDAF (Christoph)
- Various little fixes here and there"
* tag 'for-5.2/block-20190507' of git://git.kernel.dk/linux-block: (164 commits)
block: fix mismerge in bvec_advance
block: don't drain in-progress dispatch in blk_cleanup_queue()
blk-mq: move cancel of hctx->run_work into blk_mq_hw_sysfs_release
blk-mq: always free hctx after request queue is freed
blk-mq: split blk_mq_alloc_and_init_hctx into two parts
blk-mq: free hw queue's resource in hctx's release handler
blk-mq: move cancel of requeue_work into blk_mq_release
blk-mq: grab .q_usage_counter when queuing request from plug code path
block: fix function name in comment
nvmet: protect discovery change log event list iteration
nvme: mark nvme_core_init and nvme_core_exit static
nvme: move command size checks to the core
nvme-fabrics: check more command sizes
nvme-pci: check more command sizes
nvme-pci: remove an unneeded variable initialization
nvme-pci: unquiesce admin queue on shutdown
nvme-pci: shutdown on timeout during deletion
nvme-pci: fix psdt field for single segment sgls
nvme-multipath: don't print ANA group state by default
nvme-multipath: split bios with the ns_head bio_set before submitting
...
207 files changed, 2312 insertions, 2257 deletions
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt index 98a8dd5ee385..1a0f2ac02eb6 100644 --- a/Documentation/block/bfq-iosched.txt +++ b/Documentation/block/bfq-iosched.txt | |||
@@ -20,13 +20,26 @@ for that device, by setting low_latency to 0. See Section 3 for | |||
20 | details on how to configure BFQ for the desired tradeoff between | 20 | details on how to configure BFQ for the desired tradeoff between |
21 | latency and throughput, or on how to maximize throughput. | 21 | latency and throughput, or on how to maximize throughput. |
22 | 22 | ||
23 | BFQ has a non-null overhead, which limits the maximum IOPS that a CPU | 23 | As every I/O scheduler, BFQ adds some overhead to per-I/O-request |
24 | can process for a device scheduled with BFQ. To give an idea of the | 24 | processing. To give an idea of this overhead, the total, |
25 | limits on slow or average CPUs, here are, first, the limits of BFQ for | 25 | single-lock-protected, per-request processing time of BFQ---i.e., the |
26 | three different CPUs, on, respectively, an average laptop, an old | 26 | sum of the execution times of the request insertion, dispatch and |
27 | desktop, and a cheap embedded system, in case full hierarchical | 27 | completion hooks---is, e.g., 1.9 us on an Intel Core i7-2760QM@2.40GHz |
28 | support is enabled (i.e., CONFIG_BFQ_GROUP_IOSCHED is set), but | 28 | (dated CPU for notebooks; time measured with simple code |
29 | CONFIG_DEBUG_BLK_CGROUP is not set (Section 4-2): | 29 | instrumentation, and using the throughput-sync.sh script of the S |
30 | suite [1], in performance-profiling mode). To put this result into | ||
31 | context, the total, single-lock-protected, per-request execution time | ||
32 | of the lightest I/O scheduler available in blk-mq, mq-deadline, is 0.7 | ||
33 | us (mq-deadline is ~800 LOC, against ~10500 LOC for BFQ). | ||
34 | |||
35 | Scheduling overhead further limits the maximum IOPS that a CPU can | ||
36 | process (already limited by the execution of the rest of the I/O | ||
37 | stack). To give an idea of the limits with BFQ, on slow or average | ||
38 | CPUs, here are, first, the limits of BFQ for three different CPUs, on, | ||
39 | respectively, an average laptop, an old desktop, and a cheap embedded | ||
40 | system, in case full hierarchical support is enabled (i.e., | ||
41 | CONFIG_BFQ_GROUP_IOSCHED is set), but CONFIG_DEBUG_BLK_CGROUP is not | ||
42 | set (Section 4-2): | ||
30 | - Intel i7-4850HQ: 400 KIOPS | 43 | - Intel i7-4850HQ: 400 KIOPS |
31 | - AMD A8-3850: 250 KIOPS | 44 | - AMD A8-3850: 250 KIOPS |
32 | - ARM CortexTM-A53 Octa-core: 80 KIOPS | 45 | - ARM CortexTM-A53 Octa-core: 80 KIOPS |
@@ -566,3 +579,5 @@ applications. Unset this tunable if you need/want to control weights. | |||
566 | Slightly extended version: | 579 | Slightly extended version: |
567 | http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite- | 580 | http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite- |
568 | results.pdf | 581 | results.pdf |
582 | |||
583 | [3] https://github.com/Algodev-github/S | ||
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt index 4cad1024fff7..41f0a3d33bbd 100644 --- a/Documentation/block/null_blk.txt +++ b/Documentation/block/null_blk.txt | |||
@@ -93,3 +93,7 @@ zoned=[0/1]: Default: 0 | |||
93 | 93 | ||
94 | zone_size=[MB]: Default: 256 | 94 | zone_size=[MB]: Default: 256 |
95 | Per zone size when exposed as a zoned block device. Must be a power of two. | 95 | Per zone size when exposed as a zoned block device. Must be a power of two. |
96 | |||
97 | zone_nr_conv=[nr_conv]: Default: 0 | ||
98 | The number of conventional zones to create when block device is zoned. If | ||
99 | zone_nr_conv >= nr_zones, it will be reduced to nr_zones - 1. | ||
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst index 367353c54949..c88867b173d9 100644 --- a/Documentation/process/submit-checklist.rst +++ b/Documentation/process/submit-checklist.rst | |||
@@ -72,47 +72,44 @@ and elsewhere regarding submitting Linux kernel patches. | |||
72 | 13) Has been build- and runtime tested with and without ``CONFIG_SMP`` and | 72 | 13) Has been build- and runtime tested with and without ``CONFIG_SMP`` and |
73 | ``CONFIG_PREEMPT.`` | 73 | ``CONFIG_PREEMPT.`` |
74 | 74 | ||
75 | 14) If the patch affects IO/Disk, etc: has been tested with and without | 75 | 16) All codepaths have been exercised with all lockdep features enabled. |
76 | ``CONFIG_LBDAF.`` | ||
77 | 76 | ||
78 | 15) All codepaths have been exercised with all lockdep features enabled. | 77 | 17) All new ``/proc`` entries are documented under ``Documentation/`` |
79 | 78 | ||
80 | 16) All new ``/proc`` entries are documented under ``Documentation/`` | 79 | 18) All new kernel boot parameters are documented in |
81 | |||
82 | 17) All new kernel boot parameters are documented in | ||
83 | ``Documentation/admin-guide/kernel-parameters.rst``. | 80 | ``Documentation/admin-guide/kernel-parameters.rst``. |
84 | 81 | ||
85 | 18) All new module parameters are documented with ``MODULE_PARM_DESC()`` | 82 | 19) All new module parameters are documented with ``MODULE_PARM_DESC()`` |
86 | 83 | ||
87 | 19) All new userspace interfaces are documented in ``Documentation/ABI/``. | 84 | 20) All new userspace interfaces are documented in ``Documentation/ABI/``. |
88 | See ``Documentation/ABI/README`` for more information. | 85 | See ``Documentation/ABI/README`` for more information. |
89 | Patches that change userspace interfaces should be CCed to | 86 | Patches that change userspace interfaces should be CCed to |
90 | linux-api@vger.kernel.org. | 87 | linux-api@vger.kernel.org. |
91 | 88 | ||
92 | 20) Check that it all passes ``make headers_check``. | 89 | 21) Check that it all passes ``make headers_check``. |
93 | 90 | ||
94 | 21) Has been checked with injection of at least slab and page-allocation | 91 | 22) Has been checked with injection of at least slab and page-allocation |
95 | failures. See ``Documentation/fault-injection/``. | 92 | failures. See ``Documentation/fault-injection/``. |
96 | 93 | ||
97 | If the new code is substantial, addition of subsystem-specific fault | 94 | If the new code is substantial, addition of subsystem-specific fault |
98 | injection might be appropriate. | 95 | injection might be appropriate. |
99 | 96 | ||
100 | 22) Newly-added code has been compiled with ``gcc -W`` (use | 97 | 23) Newly-added code has been compiled with ``gcc -W`` (use |
101 | ``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good | 98 | ``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good |
102 | for finding bugs like "warning: comparison between signed and unsigned". | 99 | for finding bugs like "warning: comparison between signed and unsigned". |
103 | 100 | ||
104 | 23) Tested after it has been merged into the -mm patchset to make sure | 101 | 24) Tested after it has been merged into the -mm patchset to make sure |
105 | that it still works with all of the other queued patches and various | 102 | that it still works with all of the other queued patches and various |
106 | changes in the VM, VFS, and other subsystems. | 103 | changes in the VM, VFS, and other subsystems. |
107 | 104 | ||
108 | 24) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a | 105 | 25) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a |
109 | comment in the source code that explains the logic of what they are doing | 106 | comment in the source code that explains the logic of what they are doing |
110 | and why. | 107 | and why. |
111 | 108 | ||
112 | 25) If any ioctl's are added by the patch, then also update | 109 | 26) If any ioctl's are added by the patch, then also update |
113 | ``Documentation/ioctl/ioctl-number.txt``. | 110 | ``Documentation/ioctl/ioctl-number.txt``. |
114 | 111 | ||
115 | 26) If your modified source code depends on or uses any of the kernel | 112 | 27) If your modified source code depends on or uses any of the kernel |
116 | APIs or features that are related to the following ``Kconfig`` symbols, | 113 | APIs or features that are related to the following ``Kconfig`` symbols, |
117 | then test multiple builds with the related ``Kconfig`` symbols disabled | 114 | then test multiple builds with the related ``Kconfig`` symbols disabled |
118 | and/or ``=m`` (if that option is available) [not all of these at the | 115 | and/or ``=m`` (if that option is available) [not all of these at the |
diff --git a/Documentation/translations/ja_JP/SubmitChecklist b/Documentation/translations/ja_JP/SubmitChecklist index 60c7c35ac517..b42220d3d46c 100644 --- a/Documentation/translations/ja_JP/SubmitChecklist +++ b/Documentation/translations/ja_JP/SubmitChecklist | |||
@@ -74,38 +74,34 @@ Linux カーネルパッチ投稿者向けチェックリスト | |||
74 | 13: CONFIG_SMP, CONFIG_PREEMPT を有効にした場合と無効にした場合の両方で | 74 | 13: CONFIG_SMP, CONFIG_PREEMPT を有効にした場合と無効にした場合の両方で |
75 | ビルドした上、動作確認を行ってください。 | 75 | ビルドした上、動作確認を行ってください。 |
76 | 76 | ||
77 | 14: もしパッチがディスクのI/O性能などに影響を与えるようであれば、 | 77 | 14: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。 |
78 | 'CONFIG_LBDAF'オプションを有効にした場合と無効にした場合の両方で | ||
79 | テストを実施してみてください。 | ||
80 | 78 | ||
81 | 15: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。 | 79 | 15: /proc に新しいエントリを追加した場合には、Documentation/ 配下に |
82 | |||
83 | 16: /proc に新しいエントリを追加した場合には、Documentation/ 配下に | ||
84 | 必ずドキュメントを追加してください。 | 80 | 必ずドキュメントを追加してください。 |
85 | 81 | ||
86 | 17: 新しいブートパラメータを追加した場合には、 | 82 | 16: 新しいブートパラメータを追加した場合には、 |
87 | 必ずDocumentation/admin-guide/kernel-parameters.rst に説明を追加してください。 | 83 | 必ずDocumentation/admin-guide/kernel-parameters.rst に説明を追加してください。 |
88 | 84 | ||
89 | 18: 新しくmoduleにパラメータを追加した場合には、MODULE_PARM_DESC()を | 85 | 17: 新しくmoduleにパラメータを追加した場合には、MODULE_PARM_DESC()を |
90 | 利用して必ずその説明を記述してください。 | 86 | 利用して必ずその説明を記述してください。 |
91 | 87 | ||
92 | 19: 新しいuserspaceインタフェースを作成した場合には、Documentation/ABI/ に | 88 | 18: 新しいuserspaceインタフェースを作成した場合には、Documentation/ABI/ に |
93 | Documentation/ABI/README を参考にして必ずドキュメントを追加してください。 | 89 | Documentation/ABI/README を参考にして必ずドキュメントを追加してください。 |
94 | 90 | ||
95 | 20: 'make headers_check'を実行して全く問題がないことを確認してください。 | 91 | 19: 'make headers_check'を実行して全く問題がないことを確認してください。 |
96 | 92 | ||
97 | 21: 少なくともslabアロケーションとpageアロケーションに失敗した場合の | 93 | 20: 少なくともslabアロケーションとpageアロケーションに失敗した場合の |
98 | 挙動について、fault-injectionを利用して確認してください。 | 94 | 挙動について、fault-injectionを利用して確認してください。 |
99 | Documentation/fault-injection/ を参照してください。 | 95 | Documentation/fault-injection/ を参照してください。 |
100 | 96 | ||
101 | 追加したコードがかなりの量であったならば、サブシステム特有の | 97 | 追加したコードがかなりの量であったならば、サブシステム特有の |
102 | fault-injectionを追加したほうが良いかもしれません。 | 98 | fault-injectionを追加したほうが良いかもしれません。 |
103 | 99 | ||
104 | 22: 新たに追加したコードは、`gcc -W'でコンパイルしてください。 | 100 | 21: 新たに追加したコードは、`gcc -W'でコンパイルしてください。 |
105 | このオプションは大量の不要なメッセージを出力しますが、 | 101 | このオプションは大量の不要なメッセージを出力しますが、 |
106 | "warning: comparison between signed and unsigned" のようなメッセージは、 | 102 | "warning: comparison between signed and unsigned" のようなメッセージは、 |
107 | バグを見つけるのに役に立ちます。 | 103 | バグを見つけるのに役に立ちます。 |
108 | 104 | ||
109 | 23: 投稿したパッチが -mm パッチセットにマージされた後、全ての既存のパッチや | 105 | 22: 投稿したパッチが -mm パッチセットにマージされた後、全ての既存のパッチや |
110 | VM, VFS およびその他のサブシステムに関する様々な変更と、現時点でも共存 | 106 | VM, VFS およびその他のサブシステムに関する様々な変更と、現時点でも共存 |
111 | できることを確認するテストを行ってください。 | 107 | できることを確認するテストを行ってください。 |
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index f56cc2070c11..b117e6c16d41 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig | |||
@@ -15,7 +15,6 @@ CONFIG_PERF_EVENTS=y | |||
15 | # CONFIG_COMPAT_BRK is not set | 15 | # CONFIG_COMPAT_BRK is not set |
16 | CONFIG_SLAB=y | 16 | CONFIG_SLAB=y |
17 | CONFIG_MODULES=y | 17 | CONFIG_MODULES=y |
18 | # CONFIG_LBDAF is not set | ||
19 | # CONFIG_BLK_DEV_BSG is not set | 18 | # CONFIG_BLK_DEV_BSG is not set |
20 | # CONFIG_IOSCHED_DEADLINE is not set | 19 | # CONFIG_IOSCHED_DEADLINE is not set |
21 | # CONFIG_IOSCHED_CFQ is not set | 20 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index b6f2482c7e74..33a787c375e2 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig | |||
@@ -17,7 +17,6 @@ CONFIG_PERF_EVENTS=y | |||
17 | CONFIG_SLAB=y | 17 | CONFIG_SLAB=y |
18 | CONFIG_KPROBES=y | 18 | CONFIG_KPROBES=y |
19 | CONFIG_MODULES=y | 19 | CONFIG_MODULES=y |
20 | # CONFIG_LBDAF is not set | ||
21 | # CONFIG_BLK_DEV_BSG is not set | 20 | # CONFIG_BLK_DEV_BSG is not set |
22 | # CONFIG_IOSCHED_DEADLINE is not set | 21 | # CONFIG_IOSCHED_DEADLINE is not set |
23 | # CONFIG_IOSCHED_CFQ is not set | 22 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 318e4cd29629..de398c7b10b3 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig | |||
@@ -18,7 +18,6 @@ CONFIG_PERF_EVENTS=y | |||
18 | CONFIG_ISA_ARCOMPACT=y | 18 | CONFIG_ISA_ARCOMPACT=y |
19 | CONFIG_KPROBES=y | 19 | CONFIG_KPROBES=y |
20 | CONFIG_MODULES=y | 20 | CONFIG_MODULES=y |
21 | # CONFIG_LBDAF is not set | ||
22 | # CONFIG_BLK_DEV_BSG is not set | 21 | # CONFIG_BLK_DEV_BSG is not set |
23 | # CONFIG_IOSCHED_DEADLINE is not set | 22 | # CONFIG_IOSCHED_DEADLINE is not set |
24 | # CONFIG_IOSCHED_CFQ is not set | 23 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index c15807b0e0c1..2dbd34a9ff07 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig | |||
@@ -20,7 +20,6 @@ CONFIG_MODULES=y | |||
20 | CONFIG_MODULE_FORCE_LOAD=y | 20 | CONFIG_MODULE_FORCE_LOAD=y |
21 | CONFIG_MODULE_UNLOAD=y | 21 | CONFIG_MODULE_UNLOAD=y |
22 | CONFIG_MODULE_FORCE_UNLOAD=y | 22 | CONFIG_MODULE_FORCE_UNLOAD=y |
23 | # CONFIG_LBDAF is not set | ||
24 | # CONFIG_BLK_DEV_BSG is not set | 23 | # CONFIG_BLK_DEV_BSG is not set |
25 | # CONFIG_IOSCHED_DEADLINE is not set | 24 | # CONFIG_IOSCHED_DEADLINE is not set |
26 | # CONFIG_IOSCHED_CFQ is not set | 25 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 65e983fd942b..c7135f1e2583 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig | |||
@@ -18,7 +18,6 @@ CONFIG_MODULES=y | |||
18 | CONFIG_MODULE_FORCE_LOAD=y | 18 | CONFIG_MODULE_FORCE_LOAD=y |
19 | CONFIG_MODULE_UNLOAD=y | 19 | CONFIG_MODULE_UNLOAD=y |
20 | CONFIG_MODULE_FORCE_UNLOAD=y | 20 | CONFIG_MODULE_FORCE_UNLOAD=y |
21 | # CONFIG_LBDAF is not set | ||
22 | # CONFIG_BLK_DEV_BSG is not set | 21 | # CONFIG_BLK_DEV_BSG is not set |
23 | # CONFIG_IOSCHED_DEADLINE is not set | 22 | # CONFIG_IOSCHED_DEADLINE is not set |
24 | # CONFIG_IOSCHED_CFQ is not set | 23 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 08c5b99ac341..385a71d3c478 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig | |||
@@ -18,7 +18,6 @@ CONFIG_PERF_EVENTS=y | |||
18 | CONFIG_ISA_ARCOMPACT=y | 18 | CONFIG_ISA_ARCOMPACT=y |
19 | CONFIG_KPROBES=y | 19 | CONFIG_KPROBES=y |
20 | CONFIG_MODULES=y | 20 | CONFIG_MODULES=y |
21 | # CONFIG_LBDAF is not set | ||
22 | # CONFIG_BLK_DEV_BSG is not set | 21 | # CONFIG_BLK_DEV_BSG is not set |
23 | # CONFIG_IOSCHED_DEADLINE is not set | 22 | # CONFIG_IOSCHED_DEADLINE is not set |
24 | # CONFIG_IOSCHED_CFQ is not set | 23 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 5b5e26d67955..248a2c3bdc12 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig | |||
@@ -17,7 +17,6 @@ CONFIG_PERF_EVENTS=y | |||
17 | # CONFIG_COMPAT_BRK is not set | 17 | # CONFIG_COMPAT_BRK is not set |
18 | CONFIG_KPROBES=y | 18 | CONFIG_KPROBES=y |
19 | CONFIG_MODULES=y | 19 | CONFIG_MODULES=y |
20 | # CONFIG_LBDAF is not set | ||
21 | # CONFIG_BLK_DEV_BSG is not set | 20 | # CONFIG_BLK_DEV_BSG is not set |
22 | # CONFIG_IOSCHED_DEADLINE is not set | 21 | # CONFIG_IOSCHED_DEADLINE is not set |
23 | # CONFIG_IOSCHED_CFQ is not set | 22 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 26af9b2f7fcb..1a4bc7b660fb 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig | |||
@@ -12,7 +12,6 @@ CONFIG_PERF_EVENTS=y | |||
12 | # CONFIG_COMPAT_BRK is not set | 12 | # CONFIG_COMPAT_BRK is not set |
13 | CONFIG_KPROBES=y | 13 | CONFIG_KPROBES=y |
14 | CONFIG_MODULES=y | 14 | CONFIG_MODULES=y |
15 | # CONFIG_LBDAF is not set | ||
16 | # CONFIG_BLK_DEV_BSG is not set | 15 | # CONFIG_BLK_DEV_BSG is not set |
17 | # CONFIG_IOSCHED_DEADLINE is not set | 16 | # CONFIG_IOSCHED_DEADLINE is not set |
18 | # CONFIG_IOSCHED_CFQ is not set | 17 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig index 1446262921b4..bdbade6af9c7 100644 --- a/arch/arm/configs/aspeed_g4_defconfig +++ b/arch/arm/configs/aspeed_g4_defconfig | |||
@@ -23,7 +23,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y | |||
23 | CONFIG_JUMP_LABEL=y | 23 | CONFIG_JUMP_LABEL=y |
24 | CONFIG_STRICT_KERNEL_RWX=y | 24 | CONFIG_STRICT_KERNEL_RWX=y |
25 | CONFIG_GCC_PLUGINS=y | 25 | CONFIG_GCC_PLUGINS=y |
26 | # CONFIG_LBDAF is not set | ||
27 | # CONFIG_BLK_DEV_BSG is not set | 26 | # CONFIG_BLK_DEV_BSG is not set |
28 | # CONFIG_BLK_DEBUG_FS is not set | 27 | # CONFIG_BLK_DEBUG_FS is not set |
29 | # CONFIG_IOSCHED_DEADLINE is not set | 28 | # CONFIG_IOSCHED_DEADLINE is not set |
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index 02fa3a41add5..4bde84eae4eb 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig | |||
@@ -23,7 +23,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y | |||
23 | CONFIG_JUMP_LABEL=y | 23 | CONFIG_JUMP_LABEL=y |
24 | CONFIG_STRICT_KERNEL_RWX=y | 24 | CONFIG_STRICT_KERNEL_RWX=y |
25 | CONFIG_GCC_PLUGINS=y | 25 | CONFIG_GCC_PLUGINS=y |
26 | # CONFIG_LBDAF is not set | ||
27 | # CONFIG_BLK_DEV_BSG is not set | 26 | # CONFIG_BLK_DEV_BSG is not set |
28 | # CONFIG_BLK_DEBUG_FS is not set | 27 | # CONFIG_BLK_DEBUG_FS is not set |
29 | # CONFIG_IOSCHED_DEADLINE is not set | 28 | # CONFIG_IOSCHED_DEADLINE is not set |
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig index e4b1be66b3f5..b7752929975c 100644 --- a/arch/arm/configs/at91_dt_defconfig +++ b/arch/arm/configs/at91_dt_defconfig | |||
@@ -9,7 +9,6 @@ CONFIG_EMBEDDED=y | |||
9 | CONFIG_SLAB=y | 9 | CONFIG_SLAB=y |
10 | CONFIG_MODULES=y | 10 | CONFIG_MODULES=y |
11 | CONFIG_MODULE_UNLOAD=y | 11 | CONFIG_MODULE_UNLOAD=y |
12 | # CONFIG_LBDAF is not set | ||
13 | # CONFIG_BLK_DEV_BSG is not set | 12 | # CONFIG_BLK_DEV_BSG is not set |
14 | # CONFIG_IOSCHED_DEADLINE is not set | 13 | # CONFIG_IOSCHED_DEADLINE is not set |
15 | # CONFIG_IOSCHED_CFQ is not set | 14 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arm/configs/clps711x_defconfig b/arch/arm/configs/clps711x_defconfig index fc105c9178cc..09ae750164e0 100644 --- a/arch/arm/configs/clps711x_defconfig +++ b/arch/arm/configs/clps711x_defconfig | |||
@@ -6,7 +6,6 @@ CONFIG_RD_LZMA=y | |||
6 | CONFIG_EMBEDDED=y | 6 | CONFIG_EMBEDDED=y |
7 | CONFIG_SLOB=y | 7 | CONFIG_SLOB=y |
8 | CONFIG_JUMP_LABEL=y | 8 | CONFIG_JUMP_LABEL=y |
9 | # CONFIG_LBDAF is not set | ||
10 | CONFIG_PARTITION_ADVANCED=y | 9 | CONFIG_PARTITION_ADVANCED=y |
11 | # CONFIG_IOSCHED_CFQ is not set | 10 | # CONFIG_IOSCHED_CFQ is not set |
12 | CONFIG_ARCH_CLPS711X=y | 11 | CONFIG_ARCH_CLPS711X=y |
diff --git a/arch/arm/configs/efm32_defconfig b/arch/arm/configs/efm32_defconfig index ee42158f41ec..10ea92513a69 100644 --- a/arch/arm/configs/efm32_defconfig +++ b/arch/arm/configs/efm32_defconfig | |||
@@ -11,7 +11,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y | |||
11 | CONFIG_EMBEDDED=y | 11 | CONFIG_EMBEDDED=y |
12 | # CONFIG_VM_EVENT_COUNTERS is not set | 12 | # CONFIG_VM_EVENT_COUNTERS is not set |
13 | # CONFIG_SLUB_DEBUG is not set | 13 | # CONFIG_SLUB_DEBUG is not set |
14 | # CONFIG_LBDAF is not set | ||
15 | # CONFIG_BLK_DEV_BSG is not set | 14 | # CONFIG_BLK_DEV_BSG is not set |
16 | # CONFIG_IOSCHED_DEADLINE is not set | 15 | # CONFIG_IOSCHED_DEADLINE is not set |
17 | # CONFIG_IOSCHED_CFQ is not set | 16 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index 484e51fbd4a6..e3afca5bd9d6 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig | |||
@@ -13,7 +13,6 @@ CONFIG_MODULES=y | |||
13 | CONFIG_MODULE_UNLOAD=y | 13 | CONFIG_MODULE_UNLOAD=y |
14 | CONFIG_MODULE_FORCE_UNLOAD=y | 14 | CONFIG_MODULE_FORCE_UNLOAD=y |
15 | CONFIG_MODVERSIONS=y | 15 | CONFIG_MODVERSIONS=y |
16 | # CONFIG_LBDAF is not set | ||
17 | # CONFIG_BLK_DEV_BSG is not set | 16 | # CONFIG_BLK_DEV_BSG is not set |
18 | # CONFIG_IOSCHED_CFQ is not set | 17 | # CONFIG_IOSCHED_CFQ is not set |
19 | CONFIG_ARCH_PXA=y | 18 | CONFIG_ARCH_PXA=y |
diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index ebeca11faa48..175881b7da7c 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig | |||
@@ -4,7 +4,6 @@ CONFIG_HIGH_RES_TIMERS=y | |||
4 | CONFIG_LOG_BUF_SHIFT=14 | 4 | CONFIG_LOG_BUF_SHIFT=14 |
5 | CONFIG_BLK_DEV_INITRD=y | 5 | CONFIG_BLK_DEV_INITRD=y |
6 | CONFIG_MODULES=y | 6 | CONFIG_MODULES=y |
7 | # CONFIG_LBDAF is not set | ||
8 | # CONFIG_BLK_DEV_BSG is not set | 7 | # CONFIG_BLK_DEV_BSG is not set |
9 | # CONFIG_IOSCHED_DEADLINE is not set | 8 | # CONFIG_IOSCHED_DEADLINE is not set |
10 | # CONFIG_IOSCHED_CFQ is not set | 9 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig index f204017c26b9..9b779e13e05d 100644 --- a/arch/arm/configs/imote2_defconfig +++ b/arch/arm/configs/imote2_defconfig | |||
@@ -12,7 +12,6 @@ CONFIG_MODULES=y | |||
12 | CONFIG_MODULE_UNLOAD=y | 12 | CONFIG_MODULE_UNLOAD=y |
13 | CONFIG_MODULE_FORCE_UNLOAD=y | 13 | CONFIG_MODULE_FORCE_UNLOAD=y |
14 | CONFIG_MODVERSIONS=y | 14 | CONFIG_MODVERSIONS=y |
15 | # CONFIG_LBDAF is not set | ||
16 | # CONFIG_BLK_DEV_BSG is not set | 15 | # CONFIG_BLK_DEV_BSG is not set |
17 | # CONFIG_IOSCHED_CFQ is not set | 16 | # CONFIG_IOSCHED_CFQ is not set |
18 | CONFIG_ARCH_PXA=y | 17 | CONFIG_ARCH_PXA=y |
diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 078228a19339..6a11669fa536 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig | |||
@@ -15,7 +15,6 @@ CONFIG_EMBEDDED=y | |||
15 | # CONFIG_VM_EVENT_COUNTERS is not set | 15 | # CONFIG_VM_EVENT_COUNTERS is not set |
16 | # CONFIG_SLUB_DEBUG is not set | 16 | # CONFIG_SLUB_DEBUG is not set |
17 | # CONFIG_COMPAT_BRK is not set | 17 | # CONFIG_COMPAT_BRK is not set |
18 | # CONFIG_LBDAF is not set | ||
19 | # CONFIG_BLK_DEV_BSG is not set | 18 | # CONFIG_BLK_DEV_BSG is not set |
20 | # CONFIG_IOSCHED_DEADLINE is not set | 19 | # CONFIG_IOSCHED_DEADLINE is not set |
21 | CONFIG_ARCH_MULTI_V4=y | 20 | CONFIG_ARCH_MULTI_V4=y |
diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig index 9a6390c172d6..eeea0c41138b 100644 --- a/arch/arm/configs/multi_v4t_defconfig +++ b/arch/arm/configs/multi_v4t_defconfig | |||
@@ -5,7 +5,6 @@ CONFIG_BLK_DEV_INITRD=y | |||
5 | CONFIG_EMBEDDED=y | 5 | CONFIG_EMBEDDED=y |
6 | CONFIG_SLOB=y | 6 | CONFIG_SLOB=y |
7 | CONFIG_JUMP_LABEL=y | 7 | CONFIG_JUMP_LABEL=y |
8 | # CONFIG_LBDAF is not set | ||
9 | CONFIG_PARTITION_ADVANCED=y | 8 | CONFIG_PARTITION_ADVANCED=y |
10 | # CONFIG_IOSCHED_CFQ is not set | 9 | # CONFIG_IOSCHED_CFQ is not set |
11 | CONFIG_ARCH_MULTI_V4T=y | 10 | CONFIG_ARCH_MULTI_V4T=y |
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index cfc00b0961ec..8448a7f407a4 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig | |||
@@ -17,7 +17,6 @@ CONFIG_OPROFILE=y | |||
17 | CONFIG_MODULES=y | 17 | CONFIG_MODULES=y |
18 | CONFIG_MODULE_UNLOAD=y | 18 | CONFIG_MODULE_UNLOAD=y |
19 | CONFIG_MODULE_FORCE_UNLOAD=y | 19 | CONFIG_MODULE_FORCE_UNLOAD=y |
20 | # CONFIG_LBDAF is not set | ||
21 | # CONFIG_BLK_DEV_BSG is not set | 20 | # CONFIG_BLK_DEV_BSG is not set |
22 | # CONFIG_IOSCHED_DEADLINE is not set | 21 | # CONFIG_IOSCHED_DEADLINE is not set |
23 | # CONFIG_IOSCHED_CFQ is not set | 22 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index 0258ba891376..152321d2893e 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig | |||
@@ -13,7 +13,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y | |||
13 | CONFIG_EMBEDDED=y | 13 | CONFIG_EMBEDDED=y |
14 | # CONFIG_VM_EVENT_COUNTERS is not set | 14 | # CONFIG_VM_EVENT_COUNTERS is not set |
15 | # CONFIG_SLUB_DEBUG is not set | 15 | # CONFIG_SLUB_DEBUG is not set |
16 | # CONFIG_LBDAF is not set | ||
17 | # CONFIG_BLK_DEV_BSG is not set | 16 | # CONFIG_BLK_DEV_BSG is not set |
18 | # CONFIG_IOSCHED_DEADLINE is not set | 17 | # CONFIG_IOSCHED_DEADLINE is not set |
19 | # CONFIG_IOSCHED_CFQ is not set | 18 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arm/configs/u300_defconfig b/arch/arm/configs/u300_defconfig index 36d77406e31b..831ba6a9ee8b 100644 --- a/arch/arm/configs/u300_defconfig +++ b/arch/arm/configs/u300_defconfig | |||
@@ -9,7 +9,6 @@ CONFIG_EXPERT=y | |||
9 | # CONFIG_VM_EVENT_COUNTERS is not set | 9 | # CONFIG_VM_EVENT_COUNTERS is not set |
10 | CONFIG_MODULES=y | 10 | CONFIG_MODULES=y |
11 | CONFIG_MODULE_UNLOAD=y | 11 | CONFIG_MODULE_UNLOAD=y |
12 | # CONFIG_LBDAF is not set | ||
13 | # CONFIG_BLK_DEV_BSG is not set | 12 | # CONFIG_BLK_DEV_BSG is not set |
14 | CONFIG_PARTITION_ADVANCED=y | 13 | CONFIG_PARTITION_ADVANCED=y |
15 | # CONFIG_IOSCHED_CFQ is not set | 14 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index 392ed3b3613c..484d77a7f589 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig | |||
@@ -14,7 +14,6 @@ CONFIG_PROFILING=y | |||
14 | CONFIG_OPROFILE=y | 14 | CONFIG_OPROFILE=y |
15 | CONFIG_MODULES=y | 15 | CONFIG_MODULES=y |
16 | CONFIG_MODULE_UNLOAD=y | 16 | CONFIG_MODULE_UNLOAD=y |
17 | # CONFIG_LBDAF is not set | ||
18 | # CONFIG_BLK_DEV_BSG is not set | 17 | # CONFIG_BLK_DEV_BSG is not set |
19 | # CONFIG_IOSCHED_DEADLINE is not set | 18 | # CONFIG_IOSCHED_DEADLINE is not set |
20 | # CONFIG_IOSCHED_CFQ is not set | 19 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index 0857cdbfde0c..d5e683dd885d 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig | |||
@@ -12,7 +12,6 @@ CONFIG_EMBEDDED=y | |||
12 | # CONFIG_VM_EVENT_COUNTERS is not set | 12 | # CONFIG_VM_EVENT_COUNTERS is not set |
13 | # CONFIG_SLUB_DEBUG is not set | 13 | # CONFIG_SLUB_DEBUG is not set |
14 | # CONFIG_COMPAT_BRK is not set | 14 | # CONFIG_COMPAT_BRK is not set |
15 | # CONFIG_LBDAF is not set | ||
16 | # CONFIG_BLK_DEV_BSG is not set | 15 | # CONFIG_BLK_DEV_BSG is not set |
17 | # CONFIG_IOSCHED_CFQ is not set | 16 | # CONFIG_IOSCHED_CFQ is not set |
18 | # CONFIG_MMU is not set | 17 | # CONFIG_MMU is not set |
diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index 4f4ccd13c11b..434bd3750966 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig | |||
@@ -11,7 +11,6 @@ CONFIG_SYSCTL_SYSCALL=y | |||
11 | # CONFIG_AIO is not set | 11 | # CONFIG_AIO is not set |
12 | CONFIG_EMBEDDED=y | 12 | CONFIG_EMBEDDED=y |
13 | CONFIG_MODULES=y | 13 | CONFIG_MODULES=y |
14 | # CONFIG_LBDAF is not set | ||
15 | # CONFIG_BLK_DEV_BSG is not set | 14 | # CONFIG_BLK_DEV_BSG is not set |
16 | # CONFIG_IOSCHED_DEADLINE is not set | 15 | # CONFIG_IOSCHED_DEADLINE is not set |
17 | # CONFIG_IOSCHED_CFQ is not set | 16 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 69f23c7b0497..27fa9465d19d 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig | |||
@@ -17,7 +17,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y | |||
17 | CONFIG_EMBEDDED=y | 17 | CONFIG_EMBEDDED=y |
18 | # CONFIG_VM_EVENT_COUNTERS is not set | 18 | # CONFIG_VM_EVENT_COUNTERS is not set |
19 | # CONFIG_COMPAT_BRK is not set | 19 | # CONFIG_COMPAT_BRK is not set |
20 | # CONFIG_LBDAF is not set | ||
21 | # CONFIG_BLK_DEV_BSG is not set | 20 | # CONFIG_BLK_DEV_BSG is not set |
22 | CONFIG_BLK_CMDLINE_PARSER=y | 21 | CONFIG_BLK_CMDLINE_PARSER=y |
23 | # CONFIG_MMU is not set | 22 | # CONFIG_MMU is not set |
diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig index 9fbfb6e5c7d2..c83fdf649327 100644 --- a/arch/mips/configs/ar7_defconfig +++ b/arch/mips/configs/ar7_defconfig | |||
@@ -18,7 +18,6 @@ CONFIG_KEXEC=y | |||
18 | # CONFIG_SECCOMP is not set | 18 | # CONFIG_SECCOMP is not set |
19 | CONFIG_MODULES=y | 19 | CONFIG_MODULES=y |
20 | CONFIG_MODULE_UNLOAD=y | 20 | CONFIG_MODULE_UNLOAD=y |
21 | # CONFIG_LBDAF is not set | ||
22 | # CONFIG_BLK_DEV_BSG is not set | 21 | # CONFIG_BLK_DEV_BSG is not set |
23 | CONFIG_PARTITION_ADVANCED=y | 22 | CONFIG_PARTITION_ADVANCED=y |
24 | CONFIG_BSD_DISKLABEL=y | 23 | CONFIG_BSD_DISKLABEL=y |
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 0c86ed86266a..30a6eafdb1d0 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig | |||
@@ -17,7 +17,6 @@ CONFIG_TC=y | |||
17 | CONFIG_MODULES=y | 17 | CONFIG_MODULES=y |
18 | CONFIG_MODULE_UNLOAD=y | 18 | CONFIG_MODULE_UNLOAD=y |
19 | CONFIG_MODULE_SRCVERSION_ALL=y | 19 | CONFIG_MODULE_SRCVERSION_ALL=y |
20 | # CONFIG_LBDAF is not set | ||
21 | CONFIG_PARTITION_ADVANCED=y | 20 | CONFIG_PARTITION_ADVANCED=y |
22 | CONFIG_OSF_PARTITION=y | 21 | CONFIG_OSF_PARTITION=y |
23 | # CONFIG_EFI_PARTITION is not set | 22 | # CONFIG_EFI_PARTITION is not set |
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index 0e54ab2680ce..e2b58dbf4aa9 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig | |||
@@ -16,7 +16,6 @@ CONFIG_TC=y | |||
16 | CONFIG_MODULES=y | 16 | CONFIG_MODULES=y |
17 | CONFIG_MODULE_UNLOAD=y | 17 | CONFIG_MODULE_UNLOAD=y |
18 | CONFIG_MODULE_SRCVERSION_ALL=y | 18 | CONFIG_MODULE_SRCVERSION_ALL=y |
19 | # CONFIG_LBDAF is not set | ||
20 | CONFIG_PARTITION_ADVANCED=y | 19 | CONFIG_PARTITION_ADVANCED=y |
21 | CONFIG_OSF_PARTITION=y | 20 | CONFIG_OSF_PARTITION=y |
22 | # CONFIG_EFI_PARTITION is not set | 21 | # CONFIG_EFI_PARTITION is not set |
diff --git a/arch/mips/configs/loongson1b_defconfig b/arch/mips/configs/loongson1b_defconfig index b064d68a5424..aa7e98c5f5fc 100644 --- a/arch/mips/configs/loongson1b_defconfig +++ b/arch/mips/configs/loongson1b_defconfig | |||
@@ -19,7 +19,6 @@ CONFIG_MACH_LOONGSON32=y | |||
19 | CONFIG_MODULES=y | 19 | CONFIG_MODULES=y |
20 | CONFIG_MODULE_UNLOAD=y | 20 | CONFIG_MODULE_UNLOAD=y |
21 | CONFIG_MODVERSIONS=y | 21 | CONFIG_MODVERSIONS=y |
22 | # CONFIG_LBDAF is not set | ||
23 | # CONFIG_BLK_DEV_BSG is not set | 22 | # CONFIG_BLK_DEV_BSG is not set |
24 | # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set | 23 | # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set |
25 | CONFIG_NET=y | 24 | CONFIG_NET=y |
diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig index 5d76559b56cd..520e7ef35383 100644 --- a/arch/mips/configs/loongson1c_defconfig +++ b/arch/mips/configs/loongson1c_defconfig | |||
@@ -20,7 +20,6 @@ CONFIG_LOONGSON1_LS1C=y | |||
20 | CONFIG_MODULES=y | 20 | CONFIG_MODULES=y |
21 | CONFIG_MODULE_UNLOAD=y | 21 | CONFIG_MODULE_UNLOAD=y |
22 | CONFIG_MODVERSIONS=y | 22 | CONFIG_MODVERSIONS=y |
23 | # CONFIG_LBDAF is not set | ||
24 | # CONFIG_BLK_DEV_BSG is not set | 23 | # CONFIG_BLK_DEV_BSG is not set |
25 | # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set | 24 | # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set |
26 | CONFIG_NET=y | 25 | CONFIG_NET=y |
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 7befe05fd813..ed1038f62a2c 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig | |||
@@ -19,7 +19,6 @@ CONFIG_PCI=y | |||
19 | # CONFIG_PCI_QUIRKS is not set | 19 | # CONFIG_PCI_QUIRKS is not set |
20 | CONFIG_MODULES=y | 20 | CONFIG_MODULES=y |
21 | CONFIG_MODULE_UNLOAD=y | 21 | CONFIG_MODULE_UNLOAD=y |
22 | # CONFIG_LBDAF is not set | ||
23 | # CONFIG_BLK_DEV_BSG is not set | 22 | # CONFIG_BLK_DEV_BSG is not set |
24 | CONFIG_PARTITION_ADVANCED=y | 23 | CONFIG_PARTITION_ADVANCED=y |
25 | CONFIG_MAC_PARTITION=y | 24 | CONFIG_MAC_PARTITION=y |
diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig index 50a2c9ad583f..b0f0c5f9ad9d 100644 --- a/arch/mips/configs/rbtx49xx_defconfig +++ b/arch/mips/configs/rbtx49xx_defconfig | |||
@@ -17,7 +17,6 @@ CONFIG_TOSHIBA_RBTX4938_MPLEX_KEEP=y | |||
17 | CONFIG_PCI=y | 17 | CONFIG_PCI=y |
18 | CONFIG_MODULES=y | 18 | CONFIG_MODULES=y |
19 | CONFIG_MODULE_UNLOAD=y | 19 | CONFIG_MODULE_UNLOAD=y |
20 | # CONFIG_LBDAF is not set | ||
21 | # CONFIG_BLK_DEV_BSG is not set | 20 | # CONFIG_BLK_DEV_BSG is not set |
22 | CONFIG_NET=y | 21 | CONFIG_NET=y |
23 | CONFIG_PACKET=y | 22 | CONFIG_PACKET=y |
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 37ae4b57c001..a8f9bbef0975 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig | |||
@@ -14,7 +14,6 @@ CONFIG_SLAB=y | |||
14 | CONFIG_MODULES=y | 14 | CONFIG_MODULES=y |
15 | CONFIG_MODULE_UNLOAD=y | 15 | CONFIG_MODULE_UNLOAD=y |
16 | CONFIG_MODULE_FORCE_UNLOAD=y | 16 | CONFIG_MODULE_FORCE_UNLOAD=y |
17 | # CONFIG_LBDAF is not set | ||
18 | # CONFIG_BLK_DEV_BSG is not set | 17 | # CONFIG_BLK_DEV_BSG is not set |
19 | CONFIG_PA7100LC=y | 18 | CONFIG_PA7100LC=y |
20 | CONFIG_SMP=y | 19 | CONFIG_SMP=y |
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index 825c641726c4..d0d9ebc7165b 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig | |||
@@ -19,7 +19,6 @@ CONFIG_SLAB=y | |||
19 | CONFIG_PROFILING=y | 19 | CONFIG_PROFILING=y |
20 | CONFIG_MODULES=y | 20 | CONFIG_MODULES=y |
21 | CONFIG_MODULE_UNLOAD=y | 21 | CONFIG_MODULE_UNLOAD=y |
22 | # CONFIG_LBDAF is not set | ||
23 | # CONFIG_BLK_DEV_BSG is not set | 22 | # CONFIG_BLK_DEV_BSG is not set |
24 | CONFIG_CFQ_GROUP_IOSCHED=y | 23 | CONFIG_CFQ_GROUP_IOSCHED=y |
25 | CONFIG_CPU_SUBTYPE_SH7786=y | 24 | CONFIG_CPU_SUBTYPE_SH7786=y |
diff --git a/arch/sh/configs/ecovec24-romimage_defconfig b/arch/sh/configs/ecovec24-romimage_defconfig index 0c5dfccbfe37..bdb61d1d0127 100644 --- a/arch/sh/configs/ecovec24-romimage_defconfig +++ b/arch/sh/configs/ecovec24-romimage_defconfig | |||
@@ -7,7 +7,6 @@ CONFIG_LOG_BUF_SHIFT=14 | |||
7 | CONFIG_BLK_DEV_INITRD=y | 7 | CONFIG_BLK_DEV_INITRD=y |
8 | # CONFIG_KALLSYMS is not set | 8 | # CONFIG_KALLSYMS is not set |
9 | CONFIG_SLAB=y | 9 | CONFIG_SLAB=y |
10 | # CONFIG_LBDAF is not set | ||
11 | # CONFIG_BLK_DEV_BSG is not set | 10 | # CONFIG_BLK_DEV_BSG is not set |
12 | CONFIG_CPU_SUBTYPE_SH7724=y | 11 | CONFIG_CPU_SUBTYPE_SH7724=y |
13 | CONFIG_MEMORY_SIZE=0x10000000 | 12 | CONFIG_MEMORY_SIZE=0x10000000 |
diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index 2b9b731fc86b..ad003ee469ea 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig | |||
@@ -16,7 +16,6 @@ CONFIG_PERF_COUNTERS=y | |||
16 | CONFIG_SLAB=y | 16 | CONFIG_SLAB=y |
17 | CONFIG_MMAP_ALLOW_UNINITIALIZED=y | 17 | CONFIG_MMAP_ALLOW_UNINITIALIZED=y |
18 | CONFIG_PROFILING=y | 18 | CONFIG_PROFILING=y |
19 | # CONFIG_LBDAF is not set | ||
20 | # CONFIG_BLK_DEV_BSG is not set | 19 | # CONFIG_BLK_DEV_BSG is not set |
21 | CONFIG_PARTITION_ADVANCED=y | 20 | CONFIG_PARTITION_ADVANCED=y |
22 | # CONFIG_IOSCHED_DEADLINE is not set | 21 | # CONFIG_IOSCHED_DEADLINE is not set |
diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index d041f7bcb84c..27fc01d58cf8 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig | |||
@@ -3,7 +3,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y | |||
3 | CONFIG_EMBEDDED=y | 3 | CONFIG_EMBEDDED=y |
4 | # CONFIG_VM_EVENT_COUNTERS is not set | 4 | # CONFIG_VM_EVENT_COUNTERS is not set |
5 | CONFIG_SLAB=y | 5 | CONFIG_SLAB=y |
6 | # CONFIG_LBDAF is not set | ||
7 | # CONFIG_BLK_DEV_BSG is not set | 6 | # CONFIG_BLK_DEV_BSG is not set |
8 | # CONFIG_IOSCHED_DEADLINE is not set | 7 | # CONFIG_IOSCHED_DEADLINE is not set |
9 | # CONFIG_IOSCHED_CFQ is not set | 8 | # CONFIG_IOSCHED_CFQ is not set |
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 2ddf5ca7094e..a89ccc15af23 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig | |||
@@ -11,7 +11,6 @@ CONFIG_PROFILING=y | |||
11 | CONFIG_GCOV_KERNEL=y | 11 | CONFIG_GCOV_KERNEL=y |
12 | CONFIG_MODULES=y | 12 | CONFIG_MODULES=y |
13 | CONFIG_MODULE_UNLOAD=y | 13 | CONFIG_MODULE_UNLOAD=y |
14 | # CONFIG_LBDAF is not set | ||
15 | # CONFIG_BLK_DEV_BSG is not set | 14 | # CONFIG_BLK_DEV_BSG is not set |
16 | CONFIG_CPU_SUBTYPE_SH7785=y | 15 | CONFIG_CPU_SUBTYPE_SH7785=y |
17 | CONFIG_MEMORY_START=0x40000000 | 16 | CONFIG_MEMORY_START=0x40000000 |
diff --git a/block/Kconfig b/block/Kconfig index 028bc085dac8..1b220101a9cb 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -26,30 +26,6 @@ menuconfig BLOCK | |||
26 | 26 | ||
27 | if BLOCK | 27 | if BLOCK |
28 | 28 | ||
29 | config LBDAF | ||
30 | bool "Support for large (2TB+) block devices and files" | ||
31 | depends on !64BIT | ||
32 | default y | ||
33 | help | ||
34 | Enable block devices or files of size 2TB and larger. | ||
35 | |||
36 | This option is required to support the full capacity of large | ||
37 | (2TB+) block devices, including RAID, disk, Network Block Device, | ||
38 | Logical Volume Manager (LVM) and loopback. | ||
39 | |||
40 | This option also enables support for single files larger than | ||
41 | 2TB. | ||
42 | |||
43 | The ext4 filesystem requires that this feature be enabled in | ||
44 | order to support filesystems that have the huge_file feature | ||
45 | enabled. Otherwise, it will refuse to mount in the read-write | ||
46 | mode any filesystems that use the huge_file feature, which is | ||
47 | enabled by default by mke2fs.ext4. | ||
48 | |||
49 | The GFS2 filesystem also requires this feature. | ||
50 | |||
51 | If unsure, say Y. | ||
52 | |||
53 | config BLK_SCSI_REQUEST | 29 | config BLK_SCSI_REQUEST |
54 | bool | 30 | bool |
55 | 31 | ||
diff --git a/block/badblocks.c b/block/badblocks.c index 91f7bcf979d3..2e5f5697db35 100644 --- a/block/badblocks.c +++ b/block/badblocks.c | |||
@@ -1,18 +1,10 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Bad block management | 3 | * Bad block management |
3 | * | 4 | * |
4 | * - Heavily based on MD badblocks code from Neil Brown | 5 | * - Heavily based on MD badblocks code from Neil Brown |
5 | * | 6 | * |
6 | * Copyright (c) 2015, Intel Corporation. | 7 | * Copyright (c) 2015, Intel Corporation. |
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | 8 | */ |
17 | 9 | ||
18 | #include <linux/badblocks.h> | 10 | #include <linux/badblocks.h> |
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index c6113af31960..b3796a40a61a 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c | |||
@@ -1,15 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
1 | /* | 2 | /* |
2 | * cgroups support for the BFQ I/O scheduler. | 3 | * cgroups support for the BFQ I/O scheduler. |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation; either version 2 of the | ||
7 | * License, or (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | */ | 4 | */ |
14 | #include <linux/module.h> | 5 | #include <linux/module.h> |
15 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
@@ -578,7 +569,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
578 | bfqg_and_blkg_get(bfqg); | 569 | bfqg_and_blkg_get(bfqg); |
579 | 570 | ||
580 | if (bfq_bfqq_busy(bfqq)) { | 571 | if (bfq_bfqq_busy(bfqq)) { |
581 | bfq_pos_tree_add_move(bfqd, bfqq); | 572 | if (unlikely(!bfqd->nonrot_with_queueing)) |
573 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
582 | bfq_activate_bfqq(bfqd, bfqq); | 574 | bfq_activate_bfqq(bfqd, bfqq); |
583 | } | 575 | } |
584 | 576 | ||
@@ -1102,7 +1094,7 @@ struct cftype bfq_blkcg_legacy_files[] = { | |||
1102 | }, | 1094 | }, |
1103 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1095 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
1104 | 1096 | ||
1105 | /* the same statictics which cover the bfqg and its descendants */ | 1097 | /* the same statistics which cover the bfqg and its descendants */ |
1106 | { | 1098 | { |
1107 | .name = "bfq.io_service_bytes_recursive", | 1099 | .name = "bfq.io_service_bytes_recursive", |
1108 | .private = (unsigned long)&blkcg_policy_bfq, | 1100 | .private = (unsigned long)&blkcg_policy_bfq, |
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 5ba1e0d841b4..f8d430f88d25 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
1 | /* | 2 | /* |
2 | * Budget Fair Queueing (BFQ) I/O scheduler. | 3 | * Budget Fair Queueing (BFQ) I/O scheduler. |
3 | * | 4 | * |
@@ -12,16 +13,6 @@ | |||
12 | * | 13 | * |
13 | * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org> | 14 | * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org> |
14 | * | 15 | * |
15 | * This program is free software; you can redistribute it and/or | ||
16 | * modify it under the terms of the GNU General Public License as | ||
17 | * published by the Free Software Foundation; either version 2 of the | ||
18 | * License, or (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
23 | * General Public License for more details. | ||
24 | * | ||
25 | * BFQ is a proportional-share I/O scheduler, with some extra | 16 | * BFQ is a proportional-share I/O scheduler, with some extra |
26 | * low-latency capabilities. BFQ also supports full hierarchical | 17 | * low-latency capabilities. BFQ also supports full hierarchical |
27 | * scheduling through cgroups. Next paragraphs provide an introduction | 18 | * scheduling through cgroups. Next paragraphs provide an introduction |
@@ -189,7 +180,7 @@ static const int bfq_default_max_budget = 16 * 1024; | |||
189 | /* | 180 | /* |
190 | * When a sync request is dispatched, the queue that contains that | 181 | * When a sync request is dispatched, the queue that contains that |
191 | * request, and all the ancestor entities of that queue, are charged | 182 | * request, and all the ancestor entities of that queue, are charged |
192 | * with the number of sectors of the request. In constrast, if the | 183 | * with the number of sectors of the request. In contrast, if the |
193 | * request is async, then the queue and its ancestor entities are | 184 | * request is async, then the queue and its ancestor entities are |
194 | * charged with the number of sectors of the request, multiplied by | 185 | * charged with the number of sectors of the request, multiplied by |
195 | * the factor below. This throttles the bandwidth for async I/O, | 186 | * the factor below. This throttles the bandwidth for async I/O, |
@@ -217,7 +208,7 @@ const int bfq_timeout = HZ / 8; | |||
217 | * queue merging. | 208 | * queue merging. |
218 | * | 209 | * |
219 | * As can be deduced from the low time limit below, queue merging, if | 210 | * As can be deduced from the low time limit below, queue merging, if |
220 | * successful, happens at the very beggining of the I/O of the involved | 211 | * successful, happens at the very beginning of the I/O of the involved |
221 | * cooperating processes, as a consequence of the arrival of the very | 212 | * cooperating processes, as a consequence of the arrival of the very |
222 | * first requests from each cooperator. After that, there is very | 213 | * first requests from each cooperator. After that, there is very |
223 | * little chance to find cooperators. | 214 | * little chance to find cooperators. |
@@ -242,6 +233,14 @@ static struct kmem_cache *bfq_pool; | |||
242 | blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT)) | 233 | blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT)) |
243 | #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) | 234 | #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) |
244 | #define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) | 235 | #define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) |
236 | /* | ||
237 | * Sync random I/O is likely to be confused with soft real-time I/O, | ||
238 | * because it is characterized by limited throughput and apparently | ||
239 | * isochronous arrival pattern. To avoid false positives, queues | ||
240 | * containing only random (seeky) I/O are prevented from being tagged | ||
241 | * as soft real-time. | ||
242 | */ | ||
243 | #define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history & -1) | ||
245 | 244 | ||
246 | /* Min number of samples required to perform peak-rate update */ | 245 | /* Min number of samples required to perform peak-rate update */ |
247 | #define BFQ_RATE_MIN_SAMPLES 32 | 246 | #define BFQ_RATE_MIN_SAMPLES 32 |
@@ -433,7 +432,7 @@ void bfq_schedule_dispatch(struct bfq_data *bfqd) | |||
433 | 432 | ||
434 | /* | 433 | /* |
435 | * Lifted from AS - choose which of rq1 and rq2 that is best served now. | 434 | * Lifted from AS - choose which of rq1 and rq2 that is best served now. |
436 | * We choose the request that is closesr to the head right now. Distance | 435 | * We choose the request that is closer to the head right now. Distance |
437 | * behind the head is penalized and only allowed to a certain extent. | 436 | * behind the head is penalized and only allowed to a certain extent. |
438 | */ | 437 | */ |
439 | static struct request *bfq_choose_req(struct bfq_data *bfqd, | 438 | static struct request *bfq_choose_req(struct bfq_data *bfqd, |
@@ -595,7 +594,16 @@ static bool bfq_too_late_for_merging(struct bfq_queue *bfqq) | |||
595 | bfq_merge_time_limit); | 594 | bfq_merge_time_limit); |
596 | } | 595 | } |
597 | 596 | ||
598 | void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) | 597 | /* |
598 | * The following function is not marked as __cold because it is | ||
599 | * actually cold, but for the same performance goal described in the | ||
600 | * comments on the likely() at the beginning of | ||
601 | * bfq_setup_cooperator(). Unexpectedly, to reach an even lower | ||
602 | * execution time for the case where this function is not invoked, we | ||
603 | * had to add an unlikely() in each involved if(). | ||
604 | */ | ||
605 | void __cold | ||
606 | bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) | ||
599 | { | 607 | { |
600 | struct rb_node **p, *parent; | 608 | struct rb_node **p, *parent; |
601 | struct bfq_queue *__bfqq; | 609 | struct bfq_queue *__bfqq; |
@@ -629,12 +637,19 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
629 | } | 637 | } |
630 | 638 | ||
631 | /* | 639 | /* |
632 | * The following function returns true if every queue must receive the | 640 | * The following function returns false either if every active queue |
633 | * same share of the throughput (this condition is used when deciding | 641 | * must receive the same share of the throughput (symmetric scenario), |
634 | * whether idling may be disabled, see the comments in the function | 642 | * or, as a special case, if bfqq must receive a share of the |
635 | * bfq_better_to_idle()). | 643 | * throughput lower than or equal to the share that every other active |
644 | * queue must receive. If bfqq does sync I/O, then these are the only | ||
645 | * two cases where bfqq happens to be guaranteed its share of the | ||
646 | * throughput even if I/O dispatching is not plugged when bfqq remains | ||
647 | * temporarily empty (for more details, see the comments in the | ||
648 | * function bfq_better_to_idle()). For this reason, the return value | ||
649 | * of this function is used to check whether I/O-dispatch plugging can | ||
650 | * be avoided. | ||
636 | * | 651 | * |
637 | * Such a scenario occurs when: | 652 | * The above first case (symmetric scenario) occurs when: |
638 | * 1) all active queues have the same weight, | 653 | * 1) all active queues have the same weight, |
639 | * 2) all active queues belong to the same I/O-priority class, | 654 | * 2) all active queues belong to the same I/O-priority class, |
640 | * 3) all active groups at the same level in the groups tree have the same | 655 | * 3) all active groups at the same level in the groups tree have the same |
@@ -654,30 +669,36 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
654 | * support or the cgroups interface are not enabled, thus no state | 669 | * support or the cgroups interface are not enabled, thus no state |
655 | * needs to be maintained in this case. | 670 | * needs to be maintained in this case. |
656 | */ | 671 | */ |
657 | static bool bfq_symmetric_scenario(struct bfq_data *bfqd) | 672 | static bool bfq_asymmetric_scenario(struct bfq_data *bfqd, |
673 | struct bfq_queue *bfqq) | ||
658 | { | 674 | { |
675 | bool smallest_weight = bfqq && | ||
676 | bfqq->weight_counter && | ||
677 | bfqq->weight_counter == | ||
678 | container_of( | ||
679 | rb_first_cached(&bfqd->queue_weights_tree), | ||
680 | struct bfq_weight_counter, | ||
681 | weights_node); | ||
682 | |||
659 | /* | 683 | /* |
660 | * For queue weights to differ, queue_weights_tree must contain | 684 | * For queue weights to differ, queue_weights_tree must contain |
661 | * at least two nodes. | 685 | * at least two nodes. |
662 | */ | 686 | */ |
663 | bool varied_queue_weights = !RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && | 687 | bool varied_queue_weights = !smallest_weight && |
664 | (bfqd->queue_weights_tree.rb_node->rb_left || | 688 | !RB_EMPTY_ROOT(&bfqd->queue_weights_tree.rb_root) && |
665 | bfqd->queue_weights_tree.rb_node->rb_right); | 689 | (bfqd->queue_weights_tree.rb_root.rb_node->rb_left || |
690 | bfqd->queue_weights_tree.rb_root.rb_node->rb_right); | ||
666 | 691 | ||
667 | bool multiple_classes_busy = | 692 | bool multiple_classes_busy = |
668 | (bfqd->busy_queues[0] && bfqd->busy_queues[1]) || | 693 | (bfqd->busy_queues[0] && bfqd->busy_queues[1]) || |
669 | (bfqd->busy_queues[0] && bfqd->busy_queues[2]) || | 694 | (bfqd->busy_queues[0] && bfqd->busy_queues[2]) || |
670 | (bfqd->busy_queues[1] && bfqd->busy_queues[2]); | 695 | (bfqd->busy_queues[1] && bfqd->busy_queues[2]); |
671 | 696 | ||
672 | /* | 697 | return varied_queue_weights || multiple_classes_busy |
673 | * For queue weights to differ, queue_weights_tree must contain | ||
674 | * at least two nodes. | ||
675 | */ | ||
676 | return !(varied_queue_weights || multiple_classes_busy | ||
677 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 698 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
678 | || bfqd->num_groups_with_pending_reqs > 0 | 699 | || bfqd->num_groups_with_pending_reqs > 0 |
679 | #endif | 700 | #endif |
680 | ); | 701 | ; |
681 | } | 702 | } |
682 | 703 | ||
683 | /* | 704 | /* |
@@ -694,10 +715,11 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) | |||
694 | * should be low too. | 715 | * should be low too. |
695 | */ | 716 | */ |
696 | void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, | 717 | void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
697 | struct rb_root *root) | 718 | struct rb_root_cached *root) |
698 | { | 719 | { |
699 | struct bfq_entity *entity = &bfqq->entity; | 720 | struct bfq_entity *entity = &bfqq->entity; |
700 | struct rb_node **new = &(root->rb_node), *parent = NULL; | 721 | struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; |
722 | bool leftmost = true; | ||
701 | 723 | ||
702 | /* | 724 | /* |
703 | * Do not insert if the queue is already associated with a | 725 | * Do not insert if the queue is already associated with a |
@@ -726,8 +748,10 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
726 | } | 748 | } |
727 | if (entity->weight < __counter->weight) | 749 | if (entity->weight < __counter->weight) |
728 | new = &((*new)->rb_left); | 750 | new = &((*new)->rb_left); |
729 | else | 751 | else { |
730 | new = &((*new)->rb_right); | 752 | new = &((*new)->rb_right); |
753 | leftmost = false; | ||
754 | } | ||
731 | } | 755 | } |
732 | 756 | ||
733 | bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), | 757 | bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), |
@@ -736,7 +760,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
736 | /* | 760 | /* |
737 | * In the unlucky event of an allocation failure, we just | 761 | * In the unlucky event of an allocation failure, we just |
738 | * exit. This will cause the weight of queue to not be | 762 | * exit. This will cause the weight of queue to not be |
739 | * considered in bfq_symmetric_scenario, which, in its turn, | 763 | * considered in bfq_asymmetric_scenario, which, in its turn, |
740 | * causes the scenario to be deemed wrongly symmetric in case | 764 | * causes the scenario to be deemed wrongly symmetric in case |
741 | * bfqq's weight would have been the only weight making the | 765 | * bfqq's weight would have been the only weight making the |
742 | * scenario asymmetric. On the bright side, no unbalance will | 766 | * scenario asymmetric. On the bright side, no unbalance will |
@@ -750,7 +774,8 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
750 | 774 | ||
751 | bfqq->weight_counter->weight = entity->weight; | 775 | bfqq->weight_counter->weight = entity->weight; |
752 | rb_link_node(&bfqq->weight_counter->weights_node, parent, new); | 776 | rb_link_node(&bfqq->weight_counter->weights_node, parent, new); |
753 | rb_insert_color(&bfqq->weight_counter->weights_node, root); | 777 | rb_insert_color_cached(&bfqq->weight_counter->weights_node, root, |
778 | leftmost); | ||
754 | 779 | ||
755 | inc_counter: | 780 | inc_counter: |
756 | bfqq->weight_counter->num_active++; | 781 | bfqq->weight_counter->num_active++; |
@@ -765,7 +790,7 @@ inc_counter: | |||
765 | */ | 790 | */ |
766 | void __bfq_weights_tree_remove(struct bfq_data *bfqd, | 791 | void __bfq_weights_tree_remove(struct bfq_data *bfqd, |
767 | struct bfq_queue *bfqq, | 792 | struct bfq_queue *bfqq, |
768 | struct rb_root *root) | 793 | struct rb_root_cached *root) |
769 | { | 794 | { |
770 | if (!bfqq->weight_counter) | 795 | if (!bfqq->weight_counter) |
771 | return; | 796 | return; |
@@ -774,7 +799,7 @@ void __bfq_weights_tree_remove(struct bfq_data *bfqd, | |||
774 | if (bfqq->weight_counter->num_active > 0) | 799 | if (bfqq->weight_counter->num_active > 0) |
775 | goto reset_entity_pointer; | 800 | goto reset_entity_pointer; |
776 | 801 | ||
777 | rb_erase(&bfqq->weight_counter->weights_node, root); | 802 | rb_erase_cached(&bfqq->weight_counter->weights_node, root); |
778 | kfree(bfqq->weight_counter); | 803 | kfree(bfqq->weight_counter); |
779 | 804 | ||
780 | reset_entity_pointer: | 805 | reset_entity_pointer: |
@@ -889,7 +914,7 @@ static unsigned long bfq_serv_to_charge(struct request *rq, | |||
889 | struct bfq_queue *bfqq) | 914 | struct bfq_queue *bfqq) |
890 | { | 915 | { |
891 | if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1 || | 916 | if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1 || |
892 | !bfq_symmetric_scenario(bfqq->bfqd)) | 917 | bfq_asymmetric_scenario(bfqq->bfqd, bfqq)) |
893 | return blk_rq_sectors(rq); | 918 | return blk_rq_sectors(rq); |
894 | 919 | ||
895 | return blk_rq_sectors(rq) * bfq_async_charge_factor; | 920 | return blk_rq_sectors(rq) * bfq_async_charge_factor; |
@@ -955,7 +980,7 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) | |||
955 | * of several files | 980 | * of several files |
956 | * mplayer took 23 seconds to start, if constantly weight-raised. | 981 | * mplayer took 23 seconds to start, if constantly weight-raised. |
957 | * | 982 | * |
958 | * As for higher values than that accomodating the above bad | 983 | * As for higher values than that accommodating the above bad |
959 | * scenario, tests show that higher values would often yield | 984 | * scenario, tests show that higher values would often yield |
960 | * the opposite of the desired result, i.e., would worsen | 985 | * the opposite of the desired result, i.e., would worsen |
961 | * responsiveness by allowing non-interactive applications to | 986 | * responsiveness by allowing non-interactive applications to |
@@ -994,6 +1019,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, | |||
994 | else | 1019 | else |
995 | bfq_clear_bfqq_IO_bound(bfqq); | 1020 | bfq_clear_bfqq_IO_bound(bfqq); |
996 | 1021 | ||
1022 | bfqq->entity.new_weight = bic->saved_weight; | ||
997 | bfqq->ttime = bic->saved_ttime; | 1023 | bfqq->ttime = bic->saved_ttime; |
998 | bfqq->wr_coeff = bic->saved_wr_coeff; | 1024 | bfqq->wr_coeff = bic->saved_wr_coeff; |
999 | bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; | 1025 | bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; |
@@ -1041,8 +1067,18 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
1041 | 1067 | ||
1042 | hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node) | 1068 | hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node) |
1043 | hlist_del_init(&item->burst_list_node); | 1069 | hlist_del_init(&item->burst_list_node); |
1044 | hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); | 1070 | |
1045 | bfqd->burst_size = 1; | 1071 | /* |
1072 | * Start the creation of a new burst list only if there is no | ||
1073 | * active queue. See comments on the conditional invocation of | ||
1074 | * bfq_handle_burst(). | ||
1075 | */ | ||
1076 | if (bfq_tot_busy_queues(bfqd) == 0) { | ||
1077 | hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); | ||
1078 | bfqd->burst_size = 1; | ||
1079 | } else | ||
1080 | bfqd->burst_size = 0; | ||
1081 | |||
1046 | bfqd->burst_parent_entity = bfqq->entity.parent; | 1082 | bfqd->burst_parent_entity = bfqq->entity.parent; |
1047 | } | 1083 | } |
1048 | 1084 | ||
@@ -1098,7 +1134,8 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
1098 | * many parallel threads/processes. Examples are systemd during boot, | 1134 | * many parallel threads/processes. Examples are systemd during boot, |
1099 | * or git grep. To help these processes get their job done as soon as | 1135 | * or git grep. To help these processes get their job done as soon as |
1100 | * possible, it is usually better to not grant either weight-raising | 1136 | * possible, it is usually better to not grant either weight-raising |
1101 | * or device idling to their queues. | 1137 | * or device idling to their queues, unless these queues must be |
1138 | * protected from the I/O flowing through other active queues. | ||
1102 | * | 1139 | * |
1103 | * In this comment we describe, firstly, the reasons why this fact | 1140 | * In this comment we describe, firstly, the reasons why this fact |
1104 | * holds, and, secondly, the next function, which implements the main | 1141 | * holds, and, secondly, the next function, which implements the main |
@@ -1110,7 +1147,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
1110 | * cumulatively served, the sooner the target job of these queues gets | 1147 | * cumulatively served, the sooner the target job of these queues gets |
1111 | * completed. As a consequence, weight-raising any of these queues, | 1148 | * completed. As a consequence, weight-raising any of these queues, |
1112 | * which also implies idling the device for it, is almost always | 1149 | * which also implies idling the device for it, is almost always |
1113 | * counterproductive. In most cases it just lowers throughput. | 1150 | * counterproductive, unless there are other active queues to isolate |
1151 | * these new queues from. If there no other active queues, then | ||
1152 | * weight-raising these new queues just lowers throughput in most | ||
1153 | * cases. | ||
1114 | * | 1154 | * |
1115 | * On the other hand, a burst of queue creations may be caused also by | 1155 | * On the other hand, a burst of queue creations may be caused also by |
1116 | * the start of an application that does not consist of a lot of | 1156 | * the start of an application that does not consist of a lot of |
@@ -1144,14 +1184,16 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
1144 | * are very rare. They typically occur if some service happens to | 1184 | * are very rare. They typically occur if some service happens to |
1145 | * start doing I/O exactly when the interactive task starts. | 1185 | * start doing I/O exactly when the interactive task starts. |
1146 | * | 1186 | * |
1147 | * Turning back to the next function, it implements all the steps | 1187 | * Turning back to the next function, it is invoked only if there are |
1148 | * needed to detect the occurrence of a large burst and to properly | 1188 | * no active queues (apart from active queues that would belong to the |
1149 | * mark all the queues belonging to it (so that they can then be | 1189 | * same, possible burst bfqq would belong to), and it implements all |
1150 | * treated in a different way). This goal is achieved by maintaining a | 1190 | * the steps needed to detect the occurrence of a large burst and to |
1151 | * "burst list" that holds, temporarily, the queues that belong to the | 1191 | * properly mark all the queues belonging to it (so that they can then |
1152 | * burst in progress. The list is then used to mark these queues as | 1192 | * be treated in a different way). This goal is achieved by |
1153 | * belonging to a large burst if the burst does become large. The main | 1193 | * maintaining a "burst list" that holds, temporarily, the queues that |
1154 | * steps are the following. | 1194 | * belong to the burst in progress. The list is then used to mark |
1195 | * these queues as belonging to a large burst if the burst does become | ||
1196 | * large. The main steps are the following. | ||
1155 | * | 1197 | * |
1156 | * . when the very first queue is created, the queue is inserted into the | 1198 | * . when the very first queue is created, the queue is inserted into the |
1157 | * list (as it could be the first queue in a possible burst) | 1199 | * list (as it could be the first queue in a possible burst) |
@@ -1596,6 +1638,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
1596 | */ | 1638 | */ |
1597 | in_burst = bfq_bfqq_in_large_burst(bfqq); | 1639 | in_burst = bfq_bfqq_in_large_burst(bfqq); |
1598 | soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && | 1640 | soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && |
1641 | !BFQQ_TOTALLY_SEEKY(bfqq) && | ||
1599 | !in_burst && | 1642 | !in_burst && |
1600 | time_is_before_jiffies(bfqq->soft_rt_next_start) && | 1643 | time_is_before_jiffies(bfqq->soft_rt_next_start) && |
1601 | bfqq->dispatched == 0; | 1644 | bfqq->dispatched == 0; |
@@ -1704,6 +1747,123 @@ static void bfq_add_request(struct request *rq) | |||
1704 | bfqq->queued[rq_is_sync(rq)]++; | 1747 | bfqq->queued[rq_is_sync(rq)]++; |
1705 | bfqd->queued++; | 1748 | bfqd->queued++; |
1706 | 1749 | ||
1750 | if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) { | ||
1751 | /* | ||
1752 | * Periodically reset inject limit, to make sure that | ||
1753 | * the latter eventually drops in case workload | ||
1754 | * changes, see step (3) in the comments on | ||
1755 | * bfq_update_inject_limit(). | ||
1756 | */ | ||
1757 | if (time_is_before_eq_jiffies(bfqq->decrease_time_jif + | ||
1758 | msecs_to_jiffies(1000))) { | ||
1759 | /* invalidate baseline total service time */ | ||
1760 | bfqq->last_serv_time_ns = 0; | ||
1761 | |||
1762 | /* | ||
1763 | * Reset pointer in case we are waiting for | ||
1764 | * some request completion. | ||
1765 | */ | ||
1766 | bfqd->waited_rq = NULL; | ||
1767 | |||
1768 | /* | ||
1769 | * If bfqq has a short think time, then start | ||
1770 | * by setting the inject limit to 0 | ||
1771 | * prudentially, because the service time of | ||
1772 | * an injected I/O request may be higher than | ||
1773 | * the think time of bfqq, and therefore, if | ||
1774 | * one request was injected when bfqq remains | ||
1775 | * empty, this injected request might delay | ||
1776 | * the service of the next I/O request for | ||
1777 | * bfqq significantly. In case bfqq can | ||
1778 | * actually tolerate some injection, then the | ||
1779 | * adaptive update will however raise the | ||
1780 | * limit soon. This lucky circumstance holds | ||
1781 | * exactly because bfqq has a short think | ||
1782 | * time, and thus, after remaining empty, is | ||
1783 | * likely to get new I/O enqueued---and then | ||
1784 | * completed---before being expired. This is | ||
1785 | * the very pattern that gives the | ||
1786 | * limit-update algorithm the chance to | ||
1787 | * measure the effect of injection on request | ||
1788 | * service times, and then to update the limit | ||
1789 | * accordingly. | ||
1790 | * | ||
1791 | * On the opposite end, if bfqq has a long | ||
1792 | * think time, then start directly by 1, | ||
1793 | * because: | ||
1794 | * a) on the bright side, keeping at most one | ||
1795 | * request in service in the drive is unlikely | ||
1796 | * to cause any harm to the latency of bfqq's | ||
1797 | * requests, as the service time of a single | ||
1798 | * request is likely to be lower than the | ||
1799 | * think time of bfqq; | ||
1800 | * b) on the downside, after becoming empty, | ||
1801 | * bfqq is likely to expire before getting its | ||
1802 | * next request. With this request arrival | ||
1803 | * pattern, it is very hard to sample total | ||
1804 | * service times and update the inject limit | ||
1805 | * accordingly (see comments on | ||
1806 | * bfq_update_inject_limit()). So the limit is | ||
1807 | * likely to be never, or at least seldom, | ||
1808 | * updated. As a consequence, by setting the | ||
1809 | * limit to 1, we avoid that no injection ever | ||
1810 | * occurs with bfqq. On the downside, this | ||
1811 | * proactive step further reduces chances to | ||
1812 | * actually compute the baseline total service | ||
1813 | * time. Thus it reduces chances to execute the | ||
1814 | * limit-update algorithm and possibly raise the | ||
1815 | * limit to more than 1. | ||
1816 | */ | ||
1817 | if (bfq_bfqq_has_short_ttime(bfqq)) | ||
1818 | bfqq->inject_limit = 0; | ||
1819 | else | ||
1820 | bfqq->inject_limit = 1; | ||
1821 | bfqq->decrease_time_jif = jiffies; | ||
1822 | } | ||
1823 | |||
1824 | /* | ||
1825 | * The following conditions must hold to setup a new | ||
1826 | * sampling of total service time, and then a new | ||
1827 | * update of the inject limit: | ||
1828 | * - bfqq is in service, because the total service | ||
1829 | * time is evaluated only for the I/O requests of | ||
1830 | * the queues in service; | ||
1831 | * - this is the right occasion to compute or to | ||
1832 | * lower the baseline total service time, because | ||
1833 | * there are actually no requests in the drive, | ||
1834 | * or | ||
1835 | * the baseline total service time is available, and | ||
1836 | * this is the right occasion to compute the other | ||
1837 | * quantity needed to update the inject limit, i.e., | ||
1838 | * the total service time caused by the amount of | ||
1839 | * injection allowed by the current value of the | ||
1840 | * limit. It is the right occasion because injection | ||
1841 | * has actually been performed during the service | ||
1842 | * hole, and there are still in-flight requests, | ||
1843 | * which are very likely to be exactly the injected | ||
1844 | * requests, or part of them; | ||
1845 | * - the minimum interval for sampling the total | ||
1846 | * service time and updating the inject limit has | ||
1847 | * elapsed. | ||
1848 | */ | ||
1849 | if (bfqq == bfqd->in_service_queue && | ||
1850 | (bfqd->rq_in_driver == 0 || | ||
1851 | (bfqq->last_serv_time_ns > 0 && | ||
1852 | bfqd->rqs_injected && bfqd->rq_in_driver > 0)) && | ||
1853 | time_is_before_eq_jiffies(bfqq->decrease_time_jif + | ||
1854 | msecs_to_jiffies(100))) { | ||
1855 | bfqd->last_empty_occupied_ns = ktime_get_ns(); | ||
1856 | /* | ||
1857 | * Start the state machine for measuring the | ||
1858 | * total service time of rq: setting | ||
1859 | * wait_dispatch will cause bfqd->waited_rq to | ||
1860 | * be set when rq will be dispatched. | ||
1861 | */ | ||
1862 | bfqd->wait_dispatch = true; | ||
1863 | bfqd->rqs_injected = false; | ||
1864 | } | ||
1865 | } | ||
1866 | |||
1707 | elv_rb_add(&bfqq->sort_list, rq); | 1867 | elv_rb_add(&bfqq->sort_list, rq); |
1708 | 1868 | ||
1709 | /* | 1869 | /* |
@@ -1715,8 +1875,9 @@ static void bfq_add_request(struct request *rq) | |||
1715 | 1875 | ||
1716 | /* | 1876 | /* |
1717 | * Adjust priority tree position, if next_rq changes. | 1877 | * Adjust priority tree position, if next_rq changes. |
1878 | * See comments on bfq_pos_tree_add_move() for the unlikely(). | ||
1718 | */ | 1879 | */ |
1719 | if (prev != bfqq->next_rq) | 1880 | if (unlikely(!bfqd->nonrot_with_queueing && prev != bfqq->next_rq)) |
1720 | bfq_pos_tree_add_move(bfqd, bfqq); | 1881 | bfq_pos_tree_add_move(bfqd, bfqq); |
1721 | 1882 | ||
1722 | if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ | 1883 | if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ |
@@ -1856,7 +2017,9 @@ static void bfq_remove_request(struct request_queue *q, | |||
1856 | bfqq->pos_root = NULL; | 2017 | bfqq->pos_root = NULL; |
1857 | } | 2018 | } |
1858 | } else { | 2019 | } else { |
1859 | bfq_pos_tree_add_move(bfqd, bfqq); | 2020 | /* see comments on bfq_pos_tree_add_move() for the unlikely() */ |
2021 | if (unlikely(!bfqd->nonrot_with_queueing)) | ||
2022 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
1860 | } | 2023 | } |
1861 | 2024 | ||
1862 | if (rq->cmd_flags & REQ_META) | 2025 | if (rq->cmd_flags & REQ_META) |
@@ -1941,7 +2104,12 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, | |||
1941 | */ | 2104 | */ |
1942 | if (prev != bfqq->next_rq) { | 2105 | if (prev != bfqq->next_rq) { |
1943 | bfq_updated_next_req(bfqd, bfqq); | 2106 | bfq_updated_next_req(bfqd, bfqq); |
1944 | bfq_pos_tree_add_move(bfqd, bfqq); | 2107 | /* |
2108 | * See comments on bfq_pos_tree_add_move() for | ||
2109 | * the unlikely(). | ||
2110 | */ | ||
2111 | if (unlikely(!bfqd->nonrot_with_queueing)) | ||
2112 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
1945 | } | 2113 | } |
1946 | } | 2114 | } |
1947 | } | 2115 | } |
@@ -2224,6 +2392,46 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
2224 | struct bfq_queue *in_service_bfqq, *new_bfqq; | 2392 | struct bfq_queue *in_service_bfqq, *new_bfqq; |
2225 | 2393 | ||
2226 | /* | 2394 | /* |
2395 | * Do not perform queue merging if the device is non | ||
2396 | * rotational and performs internal queueing. In fact, such a | ||
2397 | * device reaches a high speed through internal parallelism | ||
2398 | * and pipelining. This means that, to reach a high | ||
2399 | * throughput, it must have many requests enqueued at the same | ||
2400 | * time. But, in this configuration, the internal scheduling | ||
2401 | * algorithm of the device does exactly the job of queue | ||
2402 | * merging: it reorders requests so as to obtain as much as | ||
2403 | * possible a sequential I/O pattern. As a consequence, with | ||
2404 | * the workload generated by processes doing interleaved I/O, | ||
2405 | * the throughput reached by the device is likely to be the | ||
2406 | * same, with and without queue merging. | ||
2407 | * | ||
2408 | * Disabling merging also provides a remarkable benefit in | ||
2409 | * terms of throughput. Merging tends to make many workloads | ||
2410 | * artificially more uneven, because of shared queues | ||
2411 | * remaining non empty for incomparably more time than | ||
2412 | * non-merged queues. This may accentuate workload | ||
2413 | * asymmetries. For example, if one of the queues in a set of | ||
2414 | * merged queues has a higher weight than a normal queue, then | ||
2415 | * the shared queue may inherit such a high weight and, by | ||
2416 | * staying almost always active, may force BFQ to perform I/O | ||
2417 | * plugging most of the time. This evidently makes it harder | ||
2418 | * for BFQ to let the device reach a high throughput. | ||
2419 | * | ||
2420 | * Finally, the likely() macro below is not used because one | ||
2421 | * of the two branches is more likely than the other, but to | ||
2422 | * have the code path after the following if() executed as | ||
2423 | * fast as possible for the case of a non rotational device | ||
2424 | * with queueing. We want it because this is the fastest kind | ||
2425 | * of device. On the opposite end, the likely() may lengthen | ||
2426 | * the execution time of BFQ for the case of slower devices | ||
2427 | * (rotational or at least without queueing). But in this case | ||
2428 | * the execution time of BFQ matters very little, if not at | ||
2429 | * all. | ||
2430 | */ | ||
2431 | if (likely(bfqd->nonrot_with_queueing)) | ||
2432 | return NULL; | ||
2433 | |||
2434 | /* | ||
2227 | * Prevent bfqq from being merged if it has been created too | 2435 | * Prevent bfqq from being merged if it has been created too |
2228 | * long ago. The idea is that true cooperating processes, and | 2436 | * long ago. The idea is that true cooperating processes, and |
2229 | * thus their associated bfq_queues, are supposed to be | 2437 | * thus their associated bfq_queues, are supposed to be |
@@ -2286,6 +2494,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) | |||
2286 | if (!bic) | 2494 | if (!bic) |
2287 | return; | 2495 | return; |
2288 | 2496 | ||
2497 | bic->saved_weight = bfqq->entity.orig_weight; | ||
2289 | bic->saved_ttime = bfqq->ttime; | 2498 | bic->saved_ttime = bfqq->ttime; |
2290 | bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq); | 2499 | bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq); |
2291 | bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); | 2500 | bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); |
@@ -2374,6 +2583,16 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, | |||
2374 | * assignment causes no harm). | 2583 | * assignment causes no harm). |
2375 | */ | 2584 | */ |
2376 | new_bfqq->bic = NULL; | 2585 | new_bfqq->bic = NULL; |
2586 | /* | ||
2587 | * If the queue is shared, the pid is the pid of one of the associated | ||
2588 | * processes. Which pid depends on the exact sequence of merge events | ||
2589 | * the queue underwent. So printing such a pid is useless and confusing | ||
2590 | * because it reports a random pid between those of the associated | ||
2591 | * processes. | ||
2592 | * We mark such a queue with a pid -1, and then print SHARED instead of | ||
2593 | * a pid in logging messages. | ||
2594 | */ | ||
2595 | new_bfqq->pid = -1; | ||
2377 | bfqq->bic = NULL; | 2596 | bfqq->bic = NULL; |
2378 | /* release process reference to bfqq */ | 2597 | /* release process reference to bfqq */ |
2379 | bfq_put_queue(bfqq); | 2598 | bfq_put_queue(bfqq); |
@@ -2408,8 +2627,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, | |||
2408 | /* | 2627 | /* |
2409 | * bic still points to bfqq, then it has not yet been | 2628 | * bic still points to bfqq, then it has not yet been |
2410 | * redirected to some other bfq_queue, and a queue | 2629 | * redirected to some other bfq_queue, and a queue |
2411 | * merge beween bfqq and new_bfqq can be safely | 2630 | * merge between bfqq and new_bfqq can be safely |
2412 | * fulfillled, i.e., bic can be redirected to new_bfqq | 2631 | * fulfilled, i.e., bic can be redirected to new_bfqq |
2413 | * and bfqq can be put. | 2632 | * and bfqq can be put. |
2414 | */ | 2633 | */ |
2415 | bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq, | 2634 | bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq, |
@@ -2543,10 +2762,14 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) | |||
2543 | * queue). | 2762 | * queue). |
2544 | */ | 2763 | */ |
2545 | if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && | 2764 | if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && |
2546 | bfq_symmetric_scenario(bfqd)) | 2765 | !bfq_asymmetric_scenario(bfqd, bfqq)) |
2547 | sl = min_t(u64, sl, BFQ_MIN_TT); | 2766 | sl = min_t(u64, sl, BFQ_MIN_TT); |
2767 | else if (bfqq->wr_coeff > 1) | ||
2768 | sl = max_t(u32, sl, 20ULL * NSEC_PER_MSEC); | ||
2548 | 2769 | ||
2549 | bfqd->last_idling_start = ktime_get(); | 2770 | bfqd->last_idling_start = ktime_get(); |
2771 | bfqd->last_idling_start_jiffies = jiffies; | ||
2772 | |||
2550 | hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl), | 2773 | hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl), |
2551 | HRTIMER_MODE_REL); | 2774 | HRTIMER_MODE_REL); |
2552 | bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); | 2775 | bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); |
@@ -2848,8 +3071,10 @@ static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
2848 | bfq_requeue_bfqq(bfqd, bfqq, true); | 3071 | bfq_requeue_bfqq(bfqd, bfqq, true); |
2849 | /* | 3072 | /* |
2850 | * Resort priority tree of potential close cooperators. | 3073 | * Resort priority tree of potential close cooperators. |
3074 | * See comments on bfq_pos_tree_add_move() for the unlikely(). | ||
2851 | */ | 3075 | */ |
2852 | bfq_pos_tree_add_move(bfqd, bfqq); | 3076 | if (unlikely(!bfqd->nonrot_with_queueing)) |
3077 | bfq_pos_tree_add_move(bfqd, bfqq); | ||
2853 | } | 3078 | } |
2854 | 3079 | ||
2855 | /* | 3080 | /* |
@@ -3223,13 +3448,6 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, | |||
3223 | jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); | 3448 | jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); |
3224 | } | 3449 | } |
3225 | 3450 | ||
3226 | static bool bfq_bfqq_injectable(struct bfq_queue *bfqq) | ||
3227 | { | ||
3228 | return BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && | ||
3229 | blk_queue_nonrot(bfqq->bfqd->queue) && | ||
3230 | bfqq->bfqd->hw_tag; | ||
3231 | } | ||
3232 | |||
3233 | /** | 3451 | /** |
3234 | * bfq_bfqq_expire - expire a queue. | 3452 | * bfq_bfqq_expire - expire a queue. |
3235 | * @bfqd: device owning the queue. | 3453 | * @bfqd: device owning the queue. |
@@ -3344,6 +3562,14 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, | |||
3344 | slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq)); | 3562 | slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq)); |
3345 | 3563 | ||
3346 | /* | 3564 | /* |
3565 | * bfqq expired, so no total service time needs to be computed | ||
3566 | * any longer: reset state machine for measuring total service | ||
3567 | * times. | ||
3568 | */ | ||
3569 | bfqd->rqs_injected = bfqd->wait_dispatch = false; | ||
3570 | bfqd->waited_rq = NULL; | ||
3571 | |||
3572 | /* | ||
3347 | * Increase, decrease or leave budget unchanged according to | 3573 | * Increase, decrease or leave budget unchanged according to |
3348 | * reason. | 3574 | * reason. |
3349 | */ | 3575 | */ |
@@ -3352,8 +3578,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, | |||
3352 | /* bfqq is gone, no more actions on it */ | 3578 | /* bfqq is gone, no more actions on it */ |
3353 | return; | 3579 | return; |
3354 | 3580 | ||
3355 | bfqq->injected_service = 0; | ||
3356 | |||
3357 | /* mark bfqq as waiting a request only if a bic still points to it */ | 3581 | /* mark bfqq as waiting a request only if a bic still points to it */ |
3358 | if (!bfq_bfqq_busy(bfqq) && | 3582 | if (!bfq_bfqq_busy(bfqq) && |
3359 | reason != BFQQE_BUDGET_TIMEOUT && | 3583 | reason != BFQQE_BUDGET_TIMEOUT && |
@@ -3497,8 +3721,9 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, | |||
3497 | } | 3721 | } |
3498 | 3722 | ||
3499 | /* | 3723 | /* |
3500 | * There is a case where idling must be performed not for | 3724 | * There is a case where idling does not have to be performed for |
3501 | * throughput concerns, but to preserve service guarantees. | 3725 | * throughput concerns, but to preserve the throughput share of |
3726 | * the process associated with bfqq. | ||
3502 | * | 3727 | * |
3503 | * To introduce this case, we can note that allowing the drive | 3728 | * To introduce this case, we can note that allowing the drive |
3504 | * to enqueue more than one request at a time, and hence | 3729 | * to enqueue more than one request at a time, and hence |
@@ -3514,77 +3739,83 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, | |||
3514 | * concern about per-process throughput distribution, and | 3739 | * concern about per-process throughput distribution, and |
3515 | * makes its decisions only on a per-request basis. Therefore, | 3740 | * makes its decisions only on a per-request basis. Therefore, |
3516 | * the service distribution enforced by the drive's internal | 3741 | * the service distribution enforced by the drive's internal |
3517 | * scheduler is likely to coincide with the desired | 3742 | * scheduler is likely to coincide with the desired throughput |
3518 | * device-throughput distribution only in a completely | 3743 | * distribution only in a completely symmetric, or favorably |
3519 | * symmetric scenario where: | 3744 | * skewed scenario where: |
3520 | * (i) each of these processes must get the same throughput as | 3745 | * (i-a) each of these processes must get the same throughput as |
3521 | * the others; | 3746 | * the others, |
3522 | * (ii) the I/O of each process has the same properties, in | 3747 | * (i-b) in case (i-a) does not hold, it holds that the process |
3523 | * terms of locality (sequential or random), direction | 3748 | * associated with bfqq must receive a lower or equal |
3524 | * (reads or writes), request sizes, greediness | 3749 | * throughput than any of the other processes; |
3525 | * (from I/O-bound to sporadic), and so on. | 3750 | * (ii) the I/O of each process has the same properties, in |
3526 | * In fact, in such a scenario, the drive tends to treat | 3751 | * terms of locality (sequential or random), direction |
3527 | * the requests of each of these processes in about the same | 3752 | * (reads or writes), request sizes, greediness |
3528 | * way as the requests of the others, and thus to provide | 3753 | * (from I/O-bound to sporadic), and so on; |
3529 | * each of these processes with about the same throughput | 3754 | |
3530 | * (which is exactly the desired throughput distribution). In | 3755 | * In fact, in such a scenario, the drive tends to treat the requests |
3531 | * contrast, in any asymmetric scenario, device idling is | 3756 | * of each process in about the same way as the requests of the |
3532 | * certainly needed to guarantee that bfqq receives its | 3757 | * others, and thus to provide each of these processes with about the |
3533 | * assigned fraction of the device throughput (see [1] for | 3758 | * same throughput. This is exactly the desired throughput |
3534 | * details). | 3759 | * distribution if (i-a) holds, or, if (i-b) holds instead, this is an |
3535 | * The problem is that idling may significantly reduce | 3760 | * even more convenient distribution for (the process associated with) |
3536 | * throughput with certain combinations of types of I/O and | 3761 | * bfqq. |
3537 | * devices. An important example is sync random I/O, on flash | 3762 | * |
3538 | * storage with command queueing. So, unless bfqq falls in the | 3763 | * In contrast, in any asymmetric or unfavorable scenario, device |
3539 | * above cases where idling also boosts throughput, it would | 3764 | * idling (I/O-dispatch plugging) is certainly needed to guarantee |
3540 | * be important to check conditions (i) and (ii) accurately, | 3765 | * that bfqq receives its assigned fraction of the device throughput |
3541 | * so as to avoid idling when not strictly needed for service | 3766 | * (see [1] for details). |
3542 | * guarantees. | ||
3543 | * | 3767 | * |
3544 | * Unfortunately, it is extremely difficult to thoroughly | 3768 | * The problem is that idling may significantly reduce throughput with |
3545 | * check condition (ii). And, in case there are active groups, | 3769 | * certain combinations of types of I/O and devices. An important |
3546 | * it becomes very difficult to check condition (i) too. In | 3770 | * example is sync random I/O on flash storage with command |
3547 | * fact, if there are active groups, then, for condition (i) | 3771 | * queueing. So, unless bfqq falls in cases where idling also boosts |
3548 | * to become false, it is enough that an active group contains | 3772 | * throughput, it is important to check conditions (i-a), i(-b) and |
3549 | * more active processes or sub-groups than some other active | 3773 | * (ii) accurately, so as to avoid idling when not strictly needed for |
3550 | * group. More precisely, for condition (i) to hold because of | 3774 | * service guarantees. |
3551 | * such a group, it is not even necessary that the group is | 3775 | * |
3552 | * (still) active: it is sufficient that, even if the group | 3776 | * Unfortunately, it is extremely difficult to thoroughly check |
3553 | * has become inactive, some of its descendant processes still | 3777 | * condition (ii). And, in case there are active groups, it becomes |
3554 | * have some request already dispatched but still waiting for | 3778 | * very difficult to check conditions (i-a) and (i-b) too. In fact, |
3555 | * completion. In fact, requests have still to be guaranteed | 3779 | * if there are active groups, then, for conditions (i-a) or (i-b) to |
3556 | * their share of the throughput even after being | 3780 | * become false 'indirectly', it is enough that an active group |
3557 | * dispatched. In this respect, it is easy to show that, if a | 3781 | * contains more active processes or sub-groups than some other active |
3558 | * group frequently becomes inactive while still having | 3782 | * group. More precisely, for conditions (i-a) or (i-b) to become |
3559 | * in-flight requests, and if, when this happens, the group is | 3783 | * false because of such a group, it is not even necessary that the |
3560 | * not considered in the calculation of whether the scenario | 3784 | * group is (still) active: it is sufficient that, even if the group |
3561 | * is asymmetric, then the group may fail to be guaranteed its | 3785 | * has become inactive, some of its descendant processes still have |
3562 | * fair share of the throughput (basically because idling may | 3786 | * some request already dispatched but still waiting for |
3563 | * not be performed for the descendant processes of the group, | 3787 | * completion. In fact, requests have still to be guaranteed their |
3564 | * but it had to be). We address this issue with the | 3788 | * share of the throughput even after being dispatched. In this |
3565 | * following bi-modal behavior, implemented in the function | 3789 | * respect, it is easy to show that, if a group frequently becomes |
3566 | * bfq_symmetric_scenario(). | 3790 | * inactive while still having in-flight requests, and if, when this |
3791 | * happens, the group is not considered in the calculation of whether | ||
3792 | * the scenario is asymmetric, then the group may fail to be | ||
3793 | * guaranteed its fair share of the throughput (basically because | ||
3794 | * idling may not be performed for the descendant processes of the | ||
3795 | * group, but it had to be). We address this issue with the following | ||
3796 | * bi-modal behavior, implemented in the function | ||
3797 | * bfq_asymmetric_scenario(). | ||
3567 | * | 3798 | * |
3568 | * If there are groups with requests waiting for completion | 3799 | * If there are groups with requests waiting for completion |
3569 | * (as commented above, some of these groups may even be | 3800 | * (as commented above, some of these groups may even be |
3570 | * already inactive), then the scenario is tagged as | 3801 | * already inactive), then the scenario is tagged as |
3571 | * asymmetric, conservatively, without checking any of the | 3802 | * asymmetric, conservatively, without checking any of the |
3572 | * conditions (i) and (ii). So the device is idled for bfqq. | 3803 | * conditions (i-a), (i-b) or (ii). So the device is idled for bfqq. |
3573 | * This behavior matches also the fact that groups are created | 3804 | * This behavior matches also the fact that groups are created |
3574 | * exactly if controlling I/O is a primary concern (to | 3805 | * exactly if controlling I/O is a primary concern (to |
3575 | * preserve bandwidth and latency guarantees). | 3806 | * preserve bandwidth and latency guarantees). |
3576 | * | 3807 | * |
3577 | * On the opposite end, if there are no groups with requests | 3808 | * On the opposite end, if there are no groups with requests waiting |
3578 | * waiting for completion, then only condition (i) is actually | 3809 | * for completion, then only conditions (i-a) and (i-b) are actually |
3579 | * controlled, i.e., provided that condition (i) holds, idling | 3810 | * controlled, i.e., provided that conditions (i-a) or (i-b) holds, |
3580 | * is not performed, regardless of whether condition (ii) | 3811 | * idling is not performed, regardless of whether condition (ii) |
3581 | * holds. In other words, only if condition (i) does not hold, | 3812 | * holds. In other words, only if conditions (i-a) and (i-b) do not |
3582 | * then idling is allowed, and the device tends to be | 3813 | * hold, then idling is allowed, and the device tends to be prevented |
3583 | * prevented from queueing many requests, possibly of several | 3814 | * from queueing many requests, possibly of several processes. Since |
3584 | * processes. Since there are no groups with requests waiting | 3815 | * there are no groups with requests waiting for completion, then, to |
3585 | * for completion, then, to control condition (i) it is enough | 3816 | * control conditions (i-a) and (i-b) it is enough to check just |
3586 | * to check just whether all the queues with requests waiting | 3817 | * whether all the queues with requests waiting for completion also |
3587 | * for completion also have the same weight. | 3818 | * have the same weight. |
3588 | * | 3819 | * |
3589 | * Not checking condition (ii) evidently exposes bfqq to the | 3820 | * Not checking condition (ii) evidently exposes bfqq to the |
3590 | * risk of getting less throughput than its fair share. | 3821 | * risk of getting less throughput than its fair share. |
@@ -3636,7 +3867,7 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, | |||
3636 | * compound condition that is checked below for deciding | 3867 | * compound condition that is checked below for deciding |
3637 | * whether the scenario is asymmetric. To explain this | 3868 | * whether the scenario is asymmetric. To explain this |
3638 | * compound condition, we need to add that the function | 3869 | * compound condition, we need to add that the function |
3639 | * bfq_symmetric_scenario checks the weights of only | 3870 | * bfq_asymmetric_scenario checks the weights of only |
3640 | * non-weight-raised queues, for efficiency reasons (see | 3871 | * non-weight-raised queues, for efficiency reasons (see |
3641 | * comments on bfq_weights_tree_add()). Then the fact that | 3872 | * comments on bfq_weights_tree_add()). Then the fact that |
3642 | * bfqq is weight-raised is checked explicitly here. More | 3873 | * bfqq is weight-raised is checked explicitly here. More |
@@ -3664,7 +3895,7 @@ static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, | |||
3664 | return (bfqq->wr_coeff > 1 && | 3895 | return (bfqq->wr_coeff > 1 && |
3665 | bfqd->wr_busy_queues < | 3896 | bfqd->wr_busy_queues < |
3666 | bfq_tot_busy_queues(bfqd)) || | 3897 | bfq_tot_busy_queues(bfqd)) || |
3667 | !bfq_symmetric_scenario(bfqd); | 3898 | bfq_asymmetric_scenario(bfqd, bfqq); |
3668 | } | 3899 | } |
3669 | 3900 | ||
3670 | /* | 3901 | /* |
@@ -3740,26 +3971,98 @@ static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) | |||
3740 | return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq); | 3971 | return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq); |
3741 | } | 3972 | } |
3742 | 3973 | ||
3743 | static struct bfq_queue *bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) | 3974 | /* |
3975 | * This function chooses the queue from which to pick the next extra | ||
3976 | * I/O request to inject, if it finds a compatible queue. See the | ||
3977 | * comments on bfq_update_inject_limit() for details on the injection | ||
3978 | * mechanism, and for the definitions of the quantities mentioned | ||
3979 | * below. | ||
3980 | */ | ||
3981 | static struct bfq_queue * | ||
3982 | bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) | ||
3744 | { | 3983 | { |
3745 | struct bfq_queue *bfqq; | 3984 | struct bfq_queue *bfqq, *in_serv_bfqq = bfqd->in_service_queue; |
3985 | unsigned int limit = in_serv_bfqq->inject_limit; | ||
3986 | /* | ||
3987 | * If | ||
3988 | * - bfqq is not weight-raised and therefore does not carry | ||
3989 | * time-critical I/O, | ||
3990 | * or | ||
3991 | * - regardless of whether bfqq is weight-raised, bfqq has | ||
3992 | * however a long think time, during which it can absorb the | ||
3993 | * effect of an appropriate number of extra I/O requests | ||
3994 | * from other queues (see bfq_update_inject_limit for | ||
3995 | * details on the computation of this number); | ||
3996 | * then injection can be performed without restrictions. | ||
3997 | */ | ||
3998 | bool in_serv_always_inject = in_serv_bfqq->wr_coeff == 1 || | ||
3999 | !bfq_bfqq_has_short_ttime(in_serv_bfqq); | ||
4000 | |||
4001 | /* | ||
4002 | * If | ||
4003 | * - the baseline total service time could not be sampled yet, | ||
4004 | * so the inject limit happens to be still 0, and | ||
4005 | * - a lot of time has elapsed since the plugging of I/O | ||
4006 | * dispatching started, so drive speed is being wasted | ||
4007 | * significantly; | ||
4008 | * then temporarily raise inject limit to one request. | ||
4009 | */ | ||
4010 | if (limit == 0 && in_serv_bfqq->last_serv_time_ns == 0 && | ||
4011 | bfq_bfqq_wait_request(in_serv_bfqq) && | ||
4012 | time_is_before_eq_jiffies(bfqd->last_idling_start_jiffies + | ||
4013 | bfqd->bfq_slice_idle) | ||
4014 | ) | ||
4015 | limit = 1; | ||
4016 | |||
4017 | if (bfqd->rq_in_driver >= limit) | ||
4018 | return NULL; | ||
3746 | 4019 | ||
3747 | /* | 4020 | /* |
3748 | * A linear search; but, with a high probability, very few | 4021 | * Linear search of the source queue for injection; but, with |
3749 | * steps are needed to find a candidate queue, i.e., a queue | 4022 | * a high probability, very few steps are needed to find a |
3750 | * with enough budget left for its next request. In fact: | 4023 | * candidate queue, i.e., a queue with enough budget left for |
4024 | * its next request. In fact: | ||
3751 | * - BFQ dynamically updates the budget of every queue so as | 4025 | * - BFQ dynamically updates the budget of every queue so as |
3752 | * to accommodate the expected backlog of the queue; | 4026 | * to accommodate the expected backlog of the queue; |
3753 | * - if a queue gets all its requests dispatched as injected | 4027 | * - if a queue gets all its requests dispatched as injected |
3754 | * service, then the queue is removed from the active list | 4028 | * service, then the queue is removed from the active list |
3755 | * (and re-added only if it gets new requests, but with | 4029 | * (and re-added only if it gets new requests, but then it |
3756 | * enough budget for its new backlog). | 4030 | * is assigned again enough budget for its new backlog). |
3757 | */ | 4031 | */ |
3758 | list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) | 4032 | list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) |
3759 | if (!RB_EMPTY_ROOT(&bfqq->sort_list) && | 4033 | if (!RB_EMPTY_ROOT(&bfqq->sort_list) && |
4034 | (in_serv_always_inject || bfqq->wr_coeff > 1) && | ||
3760 | bfq_serv_to_charge(bfqq->next_rq, bfqq) <= | 4035 | bfq_serv_to_charge(bfqq->next_rq, bfqq) <= |
3761 | bfq_bfqq_budget_left(bfqq)) | 4036 | bfq_bfqq_budget_left(bfqq)) { |
3762 | return bfqq; | 4037 | /* |
4038 | * Allow for only one large in-flight request | ||
4039 | * on non-rotational devices, for the | ||
4040 | * following reason. On non-rotationl drives, | ||
4041 | * large requests take much longer than | ||
4042 | * smaller requests to be served. In addition, | ||
4043 | * the drive prefers to serve large requests | ||
4044 | * w.r.t. to small ones, if it can choose. So, | ||
4045 | * having more than one large requests queued | ||
4046 | * in the drive may easily make the next first | ||
4047 | * request of the in-service queue wait for so | ||
4048 | * long to break bfqq's service guarantees. On | ||
4049 | * the bright side, large requests let the | ||
4050 | * drive reach a very high throughput, even if | ||
4051 | * there is only one in-flight large request | ||
4052 | * at a time. | ||
4053 | */ | ||
4054 | if (blk_queue_nonrot(bfqd->queue) && | ||
4055 | blk_rq_sectors(bfqq->next_rq) >= | ||
4056 | BFQQ_SECT_THR_NONROT) | ||
4057 | limit = min_t(unsigned int, 1, limit); | ||
4058 | else | ||
4059 | limit = in_serv_bfqq->inject_limit; | ||
4060 | |||
4061 | if (bfqd->rq_in_driver < limit) { | ||
4062 | bfqd->rqs_injected = true; | ||
4063 | return bfqq; | ||
4064 | } | ||
4065 | } | ||
3763 | 4066 | ||
3764 | return NULL; | 4067 | return NULL; |
3765 | } | 4068 | } |
@@ -3846,14 +4149,32 @@ check_queue: | |||
3846 | * for a new request, or has requests waiting for a completion and | 4149 | * for a new request, or has requests waiting for a completion and |
3847 | * may idle after their completion, then keep it anyway. | 4150 | * may idle after their completion, then keep it anyway. |
3848 | * | 4151 | * |
3849 | * Yet, to boost throughput, inject service from other queues if | 4152 | * Yet, inject service from other queues if it boosts |
3850 | * possible. | 4153 | * throughput and is possible. |
3851 | */ | 4154 | */ |
3852 | if (bfq_bfqq_wait_request(bfqq) || | 4155 | if (bfq_bfqq_wait_request(bfqq) || |
3853 | (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { | 4156 | (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { |
3854 | if (bfq_bfqq_injectable(bfqq) && | 4157 | struct bfq_queue *async_bfqq = |
3855 | bfqq->injected_service * bfqq->inject_coeff < | 4158 | bfqq->bic && bfqq->bic->bfqq[0] && |
3856 | bfqq->entity.service * 10) | 4159 | bfq_bfqq_busy(bfqq->bic->bfqq[0]) ? |
4160 | bfqq->bic->bfqq[0] : NULL; | ||
4161 | |||
4162 | /* | ||
4163 | * If the process associated with bfqq has also async | ||
4164 | * I/O pending, then inject it | ||
4165 | * unconditionally. Injecting I/O from the same | ||
4166 | * process can cause no harm to the process. On the | ||
4167 | * contrary, it can only increase bandwidth and reduce | ||
4168 | * latency for the process. | ||
4169 | */ | ||
4170 | if (async_bfqq && | ||
4171 | icq_to_bic(async_bfqq->next_rq->elv.icq) == bfqq->bic && | ||
4172 | bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <= | ||
4173 | bfq_bfqq_budget_left(async_bfqq)) | ||
4174 | bfqq = bfqq->bic->bfqq[0]; | ||
4175 | else if (!idling_boosts_thr_without_issues(bfqd, bfqq) && | ||
4176 | (bfqq->wr_coeff == 1 || bfqd->wr_busy_queues > 1 || | ||
4177 | !bfq_bfqq_has_short_ttime(bfqq))) | ||
3857 | bfqq = bfq_choose_bfqq_for_injection(bfqd); | 4178 | bfqq = bfq_choose_bfqq_for_injection(bfqd); |
3858 | else | 4179 | else |
3859 | bfqq = NULL; | 4180 | bfqq = NULL; |
@@ -3945,15 +4266,15 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, | |||
3945 | 4266 | ||
3946 | bfq_bfqq_served(bfqq, service_to_charge); | 4267 | bfq_bfqq_served(bfqq, service_to_charge); |
3947 | 4268 | ||
3948 | bfq_dispatch_remove(bfqd->queue, rq); | 4269 | if (bfqq == bfqd->in_service_queue && bfqd->wait_dispatch) { |
4270 | bfqd->wait_dispatch = false; | ||
4271 | bfqd->waited_rq = rq; | ||
4272 | } | ||
3949 | 4273 | ||
3950 | if (bfqq != bfqd->in_service_queue) { | 4274 | bfq_dispatch_remove(bfqd->queue, rq); |
3951 | if (likely(bfqd->in_service_queue)) | ||
3952 | bfqd->in_service_queue->injected_service += | ||
3953 | bfq_serv_to_charge(rq, bfqq); | ||
3954 | 4275 | ||
4276 | if (bfqq != bfqd->in_service_queue) | ||
3955 | goto return_rq; | 4277 | goto return_rq; |
3956 | } | ||
3957 | 4278 | ||
3958 | /* | 4279 | /* |
3959 | * If weight raising has to terminate for bfqq, then next | 4280 | * If weight raising has to terminate for bfqq, then next |
@@ -4384,13 +4705,6 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
4384 | bfq_mark_bfqq_has_short_ttime(bfqq); | 4705 | bfq_mark_bfqq_has_short_ttime(bfqq); |
4385 | bfq_mark_bfqq_sync(bfqq); | 4706 | bfq_mark_bfqq_sync(bfqq); |
4386 | bfq_mark_bfqq_just_created(bfqq); | 4707 | bfq_mark_bfqq_just_created(bfqq); |
4387 | /* | ||
4388 | * Aggressively inject a lot of service: up to 90%. | ||
4389 | * This coefficient remains constant during bfqq life, | ||
4390 | * but this behavior might be changed, after enough | ||
4391 | * testing and tuning. | ||
4392 | */ | ||
4393 | bfqq->inject_coeff = 1; | ||
4394 | } else | 4708 | } else |
4395 | bfq_clear_bfqq_sync(bfqq); | 4709 | bfq_clear_bfqq_sync(bfqq); |
4396 | 4710 | ||
@@ -4529,6 +4843,11 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
4529 | { | 4843 | { |
4530 | bfqq->seek_history <<= 1; | 4844 | bfqq->seek_history <<= 1; |
4531 | bfqq->seek_history |= BFQ_RQ_SEEKY(bfqd, bfqq->last_request_pos, rq); | 4845 | bfqq->seek_history |= BFQ_RQ_SEEKY(bfqd, bfqq->last_request_pos, rq); |
4846 | |||
4847 | if (bfqq->wr_coeff > 1 && | ||
4848 | bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && | ||
4849 | BFQQ_TOTALLY_SEEKY(bfqq)) | ||
4850 | bfq_bfqq_end_wr(bfqq); | ||
4532 | } | 4851 | } |
4533 | 4852 | ||
4534 | static void bfq_update_has_short_ttime(struct bfq_data *bfqd, | 4853 | static void bfq_update_has_short_ttime(struct bfq_data *bfqd, |
@@ -4823,6 +5142,9 @@ static void bfq_update_hw_tag(struct bfq_data *bfqd) | |||
4823 | bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; | 5142 | bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; |
4824 | bfqd->max_rq_in_driver = 0; | 5143 | bfqd->max_rq_in_driver = 0; |
4825 | bfqd->hw_tag_samples = 0; | 5144 | bfqd->hw_tag_samples = 0; |
5145 | |||
5146 | bfqd->nonrot_with_queueing = | ||
5147 | blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag; | ||
4826 | } | 5148 | } |
4827 | 5149 | ||
4828 | static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) | 5150 | static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) |
@@ -4950,6 +5272,147 @@ static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) | |||
4950 | } | 5272 | } |
4951 | 5273 | ||
4952 | /* | 5274 | /* |
5275 | * The processes associated with bfqq may happen to generate their | ||
5276 | * cumulative I/O at a lower rate than the rate at which the device | ||
5277 | * could serve the same I/O. This is rather probable, e.g., if only | ||
5278 | * one process is associated with bfqq and the device is an SSD. It | ||
5279 | * results in bfqq becoming often empty while in service. In this | ||
5280 | * respect, if BFQ is allowed to switch to another queue when bfqq | ||
5281 | * remains empty, then the device goes on being fed with I/O requests, | ||
5282 | * and the throughput is not affected. In contrast, if BFQ is not | ||
5283 | * allowed to switch to another queue---because bfqq is sync and | ||
5284 | * I/O-dispatch needs to be plugged while bfqq is temporarily | ||
5285 | * empty---then, during the service of bfqq, there will be frequent | ||
5286 | * "service holes", i.e., time intervals during which bfqq gets empty | ||
5287 | * and the device can only consume the I/O already queued in its | ||
5288 | * hardware queues. During service holes, the device may even get to | ||
5289 | * remaining idle. In the end, during the service of bfqq, the device | ||
5290 | * is driven at a lower speed than the one it can reach with the kind | ||
5291 | * of I/O flowing through bfqq. | ||
5292 | * | ||
5293 | * To counter this loss of throughput, BFQ implements a "request | ||
5294 | * injection mechanism", which tries to fill the above service holes | ||
5295 | * with I/O requests taken from other queues. The hard part in this | ||
5296 | * mechanism is finding the right amount of I/O to inject, so as to | ||
5297 | * both boost throughput and not break bfqq's bandwidth and latency | ||
5298 | * guarantees. In this respect, the mechanism maintains a per-queue | ||
5299 | * inject limit, computed as below. While bfqq is empty, the injection | ||
5300 | * mechanism dispatches extra I/O requests only until the total number | ||
5301 | * of I/O requests in flight---i.e., already dispatched but not yet | ||
5302 | * completed---remains lower than this limit. | ||
5303 | * | ||
5304 | * A first definition comes in handy to introduce the algorithm by | ||
5305 | * which the inject limit is computed. We define as first request for | ||
5306 | * bfqq, an I/O request for bfqq that arrives while bfqq is in | ||
5307 | * service, and causes bfqq to switch from empty to non-empty. The | ||
5308 | * algorithm updates the limit as a function of the effect of | ||
5309 | * injection on the service times of only the first requests of | ||
5310 | * bfqq. The reason for this restriction is that these are the | ||
5311 | * requests whose service time is affected most, because they are the | ||
5312 | * first to arrive after injection possibly occurred. | ||
5313 | * | ||
5314 | * To evaluate the effect of injection, the algorithm measures the | ||
5315 | * "total service time" of first requests. We define as total service | ||
5316 | * time of an I/O request, the time that elapses since when the | ||
5317 | * request is enqueued into bfqq, to when it is completed. This | ||
5318 | * quantity allows the whole effect of injection to be measured. It is | ||
5319 | * easy to see why. Suppose that some requests of other queues are | ||
5320 | * actually injected while bfqq is empty, and that a new request R | ||
5321 | * then arrives for bfqq. If the device does start to serve all or | ||
5322 | * part of the injected requests during the service hole, then, | ||
5323 | * because of this extra service, it may delay the next invocation of | ||
5324 | * the dispatch hook of BFQ. Then, even after R gets eventually | ||
5325 | * dispatched, the device may delay the actual service of R if it is | ||
5326 | * still busy serving the extra requests, or if it decides to serve, | ||
5327 | * before R, some extra request still present in its queues. As a | ||
5328 | * conclusion, the cumulative extra delay caused by injection can be | ||
5329 | * easily evaluated by just comparing the total service time of first | ||
5330 | * requests with and without injection. | ||
5331 | * | ||
5332 | * The limit-update algorithm works as follows. On the arrival of a | ||
5333 | * first request of bfqq, the algorithm measures the total time of the | ||
5334 | * request only if one of the three cases below holds, and, for each | ||
5335 | * case, it updates the limit as described below: | ||
5336 | * | ||
5337 | * (1) If there is no in-flight request. This gives a baseline for the | ||
5338 | * total service time of the requests of bfqq. If the baseline has | ||
5339 | * not been computed yet, then, after computing it, the limit is | ||
5340 | * set to 1, to start boosting throughput, and to prepare the | ||
5341 | * ground for the next case. If the baseline has already been | ||
5342 | * computed, then it is updated, in case it results to be lower | ||
5343 | * than the previous value. | ||
5344 | * | ||
5345 | * (2) If the limit is higher than 0 and there are in-flight | ||
5346 | * requests. By comparing the total service time in this case with | ||
5347 | * the above baseline, it is possible to know at which extent the | ||
5348 | * current value of the limit is inflating the total service | ||
5349 | * time. If the inflation is below a certain threshold, then bfqq | ||
5350 | * is assumed to be suffering from no perceivable loss of its | ||
5351 | * service guarantees, and the limit is even tentatively | ||
5352 | * increased. If the inflation is above the threshold, then the | ||
5353 | * limit is decreased. Due to the lack of any hysteresis, this | ||
5354 | * logic makes the limit oscillate even in steady workload | ||
5355 | * conditions. Yet we opted for it, because it is fast in reaching | ||
5356 | * the best value for the limit, as a function of the current I/O | ||
5357 | * workload. To reduce oscillations, this step is disabled for a | ||
5358 | * short time interval after the limit happens to be decreased. | ||
5359 | * | ||
5360 | * (3) Periodically, after resetting the limit, to make sure that the | ||
5361 | * limit eventually drops in case the workload changes. This is | ||
5362 | * needed because, after the limit has gone safely up for a | ||
5363 | * certain workload, it is impossible to guess whether the | ||
5364 | * baseline total service time may have changed, without measuring | ||
5365 | * it again without injection. A more effective version of this | ||
5366 | * step might be to just sample the baseline, by interrupting | ||
5367 | * injection only once, and then to reset/lower the limit only if | ||
5368 | * the total service time with the current limit does happen to be | ||
5369 | * too large. | ||
5370 | * | ||
5371 | * More details on each step are provided in the comments on the | ||
5372 | * pieces of code that implement these steps: the branch handling the | ||
5373 | * transition from empty to non empty in bfq_add_request(), the branch | ||
5374 | * handling injection in bfq_select_queue(), and the function | ||
5375 | * bfq_choose_bfqq_for_injection(). These comments also explain some | ||
5376 | * exceptions, made by the injection mechanism in some special cases. | ||
5377 | */ | ||
5378 | static void bfq_update_inject_limit(struct bfq_data *bfqd, | ||
5379 | struct bfq_queue *bfqq) | ||
5380 | { | ||
5381 | u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns; | ||
5382 | unsigned int old_limit = bfqq->inject_limit; | ||
5383 | |||
5384 | if (bfqq->last_serv_time_ns > 0) { | ||
5385 | u64 threshold = (bfqq->last_serv_time_ns * 3)>>1; | ||
5386 | |||
5387 | if (tot_time_ns >= threshold && old_limit > 0) { | ||
5388 | bfqq->inject_limit--; | ||
5389 | bfqq->decrease_time_jif = jiffies; | ||
5390 | } else if (tot_time_ns < threshold && | ||
5391 | old_limit < bfqd->max_rq_in_driver<<1) | ||
5392 | bfqq->inject_limit++; | ||
5393 | } | ||
5394 | |||
5395 | /* | ||
5396 | * Either we still have to compute the base value for the | ||
5397 | * total service time, and there seem to be the right | ||
5398 | * conditions to do it, or we can lower the last base value | ||
5399 | * computed. | ||
5400 | */ | ||
5401 | if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 0) || | ||
5402 | tot_time_ns < bfqq->last_serv_time_ns) { | ||
5403 | bfqq->last_serv_time_ns = tot_time_ns; | ||
5404 | /* | ||
5405 | * Now we certainly have a base value: make sure we | ||
5406 | * start trying injection. | ||
5407 | */ | ||
5408 | bfqq->inject_limit = max_t(unsigned int, 1, old_limit); | ||
5409 | } | ||
5410 | |||
5411 | /* update complete, not waiting for any request completion any longer */ | ||
5412 | bfqd->waited_rq = NULL; | ||
5413 | } | ||
5414 | |||
5415 | /* | ||
4953 | * Handle either a requeue or a finish for rq. The things to do are | 5416 | * Handle either a requeue or a finish for rq. The things to do are |
4954 | * the same in both cases: all references to rq are to be dropped. In | 5417 | * the same in both cases: all references to rq are to be dropped. In |
4955 | * particular, rq is considered completed from the point of view of | 5418 | * particular, rq is considered completed from the point of view of |
@@ -4993,6 +5456,9 @@ static void bfq_finish_requeue_request(struct request *rq) | |||
4993 | 5456 | ||
4994 | spin_lock_irqsave(&bfqd->lock, flags); | 5457 | spin_lock_irqsave(&bfqd->lock, flags); |
4995 | 5458 | ||
5459 | if (rq == bfqd->waited_rq) | ||
5460 | bfq_update_inject_limit(bfqd, bfqq); | ||
5461 | |||
4996 | bfq_completed_request(bfqq, bfqd); | 5462 | bfq_completed_request(bfqq, bfqd); |
4997 | bfq_finish_requeue_request_body(bfqq); | 5463 | bfq_finish_requeue_request_body(bfqq); |
4998 | 5464 | ||
@@ -5156,7 +5622,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) | |||
5156 | * preparation is that, after the prepare_request hook is invoked for | 5622 | * preparation is that, after the prepare_request hook is invoked for |
5157 | * rq, rq may still be transformed into a request with no icq, i.e., a | 5623 | * rq, rq may still be transformed into a request with no icq, i.e., a |
5158 | * request not associated with any queue. No bfq hook is invoked to | 5624 | * request not associated with any queue. No bfq hook is invoked to |
5159 | * signal this tranformation. As a consequence, should these | 5625 | * signal this transformation. As a consequence, should these |
5160 | * preparation operations be performed when the prepare_request hook | 5626 | * preparation operations be performed when the prepare_request hook |
5161 | * is invoked, and should rq be transformed one moment later, bfq | 5627 | * is invoked, and should rq be transformed one moment later, bfq |
5162 | * would end up in an inconsistent state, because it would have | 5628 | * would end up in an inconsistent state, because it would have |
@@ -5247,7 +5713,29 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) | |||
5247 | } | 5713 | } |
5248 | } | 5714 | } |
5249 | 5715 | ||
5250 | if (unlikely(bfq_bfqq_just_created(bfqq))) | 5716 | /* |
5717 | * Consider bfqq as possibly belonging to a burst of newly | ||
5718 | * created queues only if: | ||
5719 | * 1) A burst is actually happening (bfqd->burst_size > 0) | ||
5720 | * or | ||
5721 | * 2) There is no other active queue. In fact, if, in | ||
5722 | * contrast, there are active queues not belonging to the | ||
5723 | * possible burst bfqq may belong to, then there is no gain | ||
5724 | * in considering bfqq as belonging to a burst, and | ||
5725 | * therefore in not weight-raising bfqq. See comments on | ||
5726 | * bfq_handle_burst(). | ||
5727 | * | ||
5728 | * This filtering also helps eliminating false positives, | ||
5729 | * occurring when bfqq does not belong to an actual large | ||
5730 | * burst, but some background task (e.g., a service) happens | ||
5731 | * to trigger the creation of new queues very close to when | ||
5732 | * bfqq and its possible companion queues are created. See | ||
5733 | * comments on bfq_handle_burst() for further details also on | ||
5734 | * this issue. | ||
5735 | */ | ||
5736 | if (unlikely(bfq_bfqq_just_created(bfqq) && | ||
5737 | (bfqd->burst_size > 0 || | ||
5738 | bfq_tot_busy_queues(bfqd) == 0))) | ||
5251 | bfq_handle_burst(bfqd, bfqq); | 5739 | bfq_handle_burst(bfqd, bfqq); |
5252 | 5740 | ||
5253 | return bfqq; | 5741 | return bfqq; |
@@ -5507,7 +5995,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
5507 | HRTIMER_MODE_REL); | 5995 | HRTIMER_MODE_REL); |
5508 | bfqd->idle_slice_timer.function = bfq_idle_slice_timer; | 5996 | bfqd->idle_slice_timer.function = bfq_idle_slice_timer; |
5509 | 5997 | ||
5510 | bfqd->queue_weights_tree = RB_ROOT; | 5998 | bfqd->queue_weights_tree = RB_ROOT_CACHED; |
5511 | bfqd->num_groups_with_pending_reqs = 0; | 5999 | bfqd->num_groups_with_pending_reqs = 0; |
5512 | 6000 | ||
5513 | INIT_LIST_HEAD(&bfqd->active_list); | 6001 | INIT_LIST_HEAD(&bfqd->active_list); |
@@ -5515,6 +6003,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
5515 | INIT_HLIST_HEAD(&bfqd->burst_list); | 6003 | INIT_HLIST_HEAD(&bfqd->burst_list); |
5516 | 6004 | ||
5517 | bfqd->hw_tag = -1; | 6005 | bfqd->hw_tag = -1; |
6006 | bfqd->nonrot_with_queueing = blk_queue_nonrot(bfqd->queue); | ||
5518 | 6007 | ||
5519 | bfqd->bfq_max_budget = bfq_default_max_budget; | 6008 | bfqd->bfq_max_budget = bfq_default_max_budget; |
5520 | 6009 | ||
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 86394e503ca9..c2faa77824f8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h | |||
@@ -1,16 +1,7 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
1 | /* | 2 | /* |
2 | * Header file for the BFQ I/O scheduler: data structures and | 3 | * Header file for the BFQ I/O scheduler: data structures and |
3 | * prototypes of interface functions among BFQ components. | 4 | * prototypes of interface functions among BFQ components. |
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation; either version 2 of the | ||
8 | * License, or (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * General Public License for more details. | ||
14 | */ | 5 | */ |
15 | #ifndef _BFQ_H | 6 | #ifndef _BFQ_H |
16 | #define _BFQ_H | 7 | #define _BFQ_H |
@@ -32,6 +23,8 @@ | |||
32 | #define BFQ_DEFAULT_GRP_IOPRIO 0 | 23 | #define BFQ_DEFAULT_GRP_IOPRIO 0 |
33 | #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE | 24 | #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE |
34 | 25 | ||
26 | #define MAX_PID_STR_LENGTH 12 | ||
27 | |||
35 | /* | 28 | /* |
36 | * Soft real-time applications are extremely more latency sensitive | 29 | * Soft real-time applications are extremely more latency sensitive |
37 | * than interactive ones. Over-raise the weight of the former to | 30 | * than interactive ones. Over-raise the weight of the former to |
@@ -89,7 +82,7 @@ struct bfq_service_tree { | |||
89 | * expiration. This peculiar definition allows for the following | 82 | * expiration. This peculiar definition allows for the following |
90 | * optimization, not yet exploited: while a given entity is still in | 83 | * optimization, not yet exploited: while a given entity is still in |
91 | * service, we already know which is the best candidate for next | 84 | * service, we already know which is the best candidate for next |
92 | * service among the other active entitities in the same parent | 85 | * service among the other active entities in the same parent |
93 | * entity. We can then quickly compare the timestamps of the | 86 | * entity. We can then quickly compare the timestamps of the |
94 | * in-service entity with those of such best candidate. | 87 | * in-service entity with those of such best candidate. |
95 | * | 88 | * |
@@ -140,7 +133,7 @@ struct bfq_weight_counter { | |||
140 | * | 133 | * |
141 | * Unless cgroups are used, the weight value is calculated from the | 134 | * Unless cgroups are used, the weight value is calculated from the |
142 | * ioprio to export the same interface as CFQ. When dealing with | 135 | * ioprio to export the same interface as CFQ. When dealing with |
143 | * ``well-behaved'' queues (i.e., queues that do not spend too much | 136 | * "well-behaved" queues (i.e., queues that do not spend too much |
144 | * time to consume their budget and have true sequential behavior, and | 137 | * time to consume their budget and have true sequential behavior, and |
145 | * when there are no external factors breaking anticipation) the | 138 | * when there are no external factors breaking anticipation) the |
146 | * relative weights at each level of the cgroups hierarchy should be | 139 | * relative weights at each level of the cgroups hierarchy should be |
@@ -240,6 +233,13 @@ struct bfq_queue { | |||
240 | /* next ioprio and ioprio class if a change is in progress */ | 233 | /* next ioprio and ioprio class if a change is in progress */ |
241 | unsigned short new_ioprio, new_ioprio_class; | 234 | unsigned short new_ioprio, new_ioprio_class; |
242 | 235 | ||
236 | /* last total-service-time sample, see bfq_update_inject_limit() */ | ||
237 | u64 last_serv_time_ns; | ||
238 | /* limit for request injection */ | ||
239 | unsigned int inject_limit; | ||
240 | /* last time the inject limit has been decreased, in jiffies */ | ||
241 | unsigned long decrease_time_jif; | ||
242 | |||
243 | /* | 243 | /* |
244 | * Shared bfq_queue if queue is cooperating with one or more | 244 | * Shared bfq_queue if queue is cooperating with one or more |
245 | * other queues. | 245 | * other queues. |
@@ -357,29 +357,6 @@ struct bfq_queue { | |||
357 | 357 | ||
358 | /* max service rate measured so far */ | 358 | /* max service rate measured so far */ |
359 | u32 max_service_rate; | 359 | u32 max_service_rate; |
360 | /* | ||
361 | * Ratio between the service received by bfqq while it is in | ||
362 | * service, and the cumulative service (of requests of other | ||
363 | * queues) that may be injected while bfqq is empty but still | ||
364 | * in service. To increase precision, the coefficient is | ||
365 | * measured in tenths of unit. Here are some example of (1) | ||
366 | * ratios, (2) resulting percentages of service injected | ||
367 | * w.r.t. to the total service dispatched while bfqq is in | ||
368 | * service, and (3) corresponding values of the coefficient: | ||
369 | * 1 (50%) -> 10 | ||
370 | * 2 (33%) -> 20 | ||
371 | * 10 (9%) -> 100 | ||
372 | * 9.9 (9%) -> 99 | ||
373 | * 1.5 (40%) -> 15 | ||
374 | * 0.5 (66%) -> 5 | ||
375 | * 0.1 (90%) -> 1 | ||
376 | * | ||
377 | * So, if the coefficient is lower than 10, then | ||
378 | * injected service is more than bfqq service. | ||
379 | */ | ||
380 | unsigned int inject_coeff; | ||
381 | /* amount of service injected in current service slot */ | ||
382 | unsigned int injected_service; | ||
383 | }; | 360 | }; |
384 | 361 | ||
385 | /** | 362 | /** |
@@ -419,6 +396,15 @@ struct bfq_io_cq { | |||
419 | bool was_in_burst_list; | 396 | bool was_in_burst_list; |
420 | 397 | ||
421 | /* | 398 | /* |
399 | * Save the weight when a merge occurs, to be able | ||
400 | * to restore it in case of split. If the weight is not | ||
401 | * correctly resumed when the queue is recycled, | ||
402 | * then the weight of the recycled queue could differ | ||
403 | * from the weight of the original queue. | ||
404 | */ | ||
405 | unsigned int saved_weight; | ||
406 | |||
407 | /* | ||
422 | * Similar to previous fields: save wr information. | 408 | * Similar to previous fields: save wr information. |
423 | */ | 409 | */ |
424 | unsigned long saved_wr_coeff; | 410 | unsigned long saved_wr_coeff; |
@@ -450,7 +436,7 @@ struct bfq_data { | |||
450 | * weight-raised @bfq_queue (see the comments to the functions | 436 | * weight-raised @bfq_queue (see the comments to the functions |
451 | * bfq_weights_tree_[add|remove] for further details). | 437 | * bfq_weights_tree_[add|remove] for further details). |
452 | */ | 438 | */ |
453 | struct rb_root queue_weights_tree; | 439 | struct rb_root_cached queue_weights_tree; |
454 | 440 | ||
455 | /* | 441 | /* |
456 | * Number of groups with at least one descendant process that | 442 | * Number of groups with at least one descendant process that |
@@ -513,6 +499,9 @@ struct bfq_data { | |||
513 | /* number of requests dispatched and waiting for completion */ | 499 | /* number of requests dispatched and waiting for completion */ |
514 | int rq_in_driver; | 500 | int rq_in_driver; |
515 | 501 | ||
502 | /* true if the device is non rotational and performs queueing */ | ||
503 | bool nonrot_with_queueing; | ||
504 | |||
516 | /* | 505 | /* |
517 | * Maximum number of requests in driver in the last | 506 | * Maximum number of requests in driver in the last |
518 | * @hw_tag_samples completed requests. | 507 | * @hw_tag_samples completed requests. |
@@ -544,6 +533,26 @@ struct bfq_data { | |||
544 | /* time of last request completion (ns) */ | 533 | /* time of last request completion (ns) */ |
545 | u64 last_completion; | 534 | u64 last_completion; |
546 | 535 | ||
536 | /* time of last transition from empty to non-empty (ns) */ | ||
537 | u64 last_empty_occupied_ns; | ||
538 | |||
539 | /* | ||
540 | * Flag set to activate the sampling of the total service time | ||
541 | * of a just-arrived first I/O request (see | ||
542 | * bfq_update_inject_limit()). This will cause the setting of | ||
543 | * waited_rq when the request is finally dispatched. | ||
544 | */ | ||
545 | bool wait_dispatch; | ||
546 | /* | ||
547 | * If set, then bfq_update_inject_limit() is invoked when | ||
548 | * waited_rq is eventually completed. | ||
549 | */ | ||
550 | struct request *waited_rq; | ||
551 | /* | ||
552 | * True if some request has been injected during the last service hole. | ||
553 | */ | ||
554 | bool rqs_injected; | ||
555 | |||
547 | /* time of first rq dispatch in current observation interval (ns) */ | 556 | /* time of first rq dispatch in current observation interval (ns) */ |
548 | u64 first_dispatch; | 557 | u64 first_dispatch; |
549 | /* time of last rq dispatch in current observation interval (ns) */ | 558 | /* time of last rq dispatch in current observation interval (ns) */ |
@@ -553,6 +562,7 @@ struct bfq_data { | |||
553 | ktime_t last_budget_start; | 562 | ktime_t last_budget_start; |
554 | /* beginning of the last idle slice */ | 563 | /* beginning of the last idle slice */ |
555 | ktime_t last_idling_start; | 564 | ktime_t last_idling_start; |
565 | unsigned long last_idling_start_jiffies; | ||
556 | 566 | ||
557 | /* number of samples in current observation interval */ | 567 | /* number of samples in current observation interval */ |
558 | int peak_rate_samples; | 568 | int peak_rate_samples; |
@@ -898,10 +908,10 @@ void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); | |||
898 | struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); | 908 | struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); |
899 | void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); | 909 | void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); |
900 | void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, | 910 | void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
901 | struct rb_root *root); | 911 | struct rb_root_cached *root); |
902 | void __bfq_weights_tree_remove(struct bfq_data *bfqd, | 912 | void __bfq_weights_tree_remove(struct bfq_data *bfqd, |
903 | struct bfq_queue *bfqq, | 913 | struct bfq_queue *bfqq, |
904 | struct rb_root *root); | 914 | struct rb_root_cached *root); |
905 | void bfq_weights_tree_remove(struct bfq_data *bfqd, | 915 | void bfq_weights_tree_remove(struct bfq_data *bfqd, |
906 | struct bfq_queue *bfqq); | 916 | struct bfq_queue *bfqq); |
907 | void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, | 917 | void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
@@ -1008,13 +1018,23 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq); | |||
1008 | /* --------------- end of interface of B-WF2Q+ ---------------- */ | 1018 | /* --------------- end of interface of B-WF2Q+ ---------------- */ |
1009 | 1019 | ||
1010 | /* Logging facilities. */ | 1020 | /* Logging facilities. */ |
1021 | static inline void bfq_pid_to_str(int pid, char *str, int len) | ||
1022 | { | ||
1023 | if (pid != -1) | ||
1024 | snprintf(str, len, "%d", pid); | ||
1025 | else | ||
1026 | snprintf(str, len, "SHARED-"); | ||
1027 | } | ||
1028 | |||
1011 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 1029 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
1012 | struct bfq_group *bfqq_group(struct bfq_queue *bfqq); | 1030 | struct bfq_group *bfqq_group(struct bfq_queue *bfqq); |
1013 | 1031 | ||
1014 | #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ | 1032 | #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ |
1033 | char pid_str[MAX_PID_STR_LENGTH]; \ | ||
1034 | bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ | ||
1015 | blk_add_cgroup_trace_msg((bfqd)->queue, \ | 1035 | blk_add_cgroup_trace_msg((bfqd)->queue, \ |
1016 | bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \ | 1036 | bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \ |
1017 | "bfq%d%c " fmt, (bfqq)->pid, \ | 1037 | "bfq%s%c " fmt, pid_str, \ |
1018 | bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \ | 1038 | bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \ |
1019 | } while (0) | 1039 | } while (0) |
1020 | 1040 | ||
@@ -1025,10 +1045,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq); | |||
1025 | 1045 | ||
1026 | #else /* CONFIG_BFQ_GROUP_IOSCHED */ | 1046 | #else /* CONFIG_BFQ_GROUP_IOSCHED */ |
1027 | 1047 | ||
1028 | #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ | 1048 | #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ |
1029 | blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \ | 1049 | char pid_str[MAX_PID_STR_LENGTH]; \ |
1050 | bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ | ||
1051 | blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \ | ||
1030 | bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ | 1052 | bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ |
1031 | ##args) | 1053 | ##args); \ |
1054 | } while (0) | ||
1032 | #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) | 1055 | #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) |
1033 | 1056 | ||
1034 | #endif /* CONFIG_BFQ_GROUP_IOSCHED */ | 1057 | #endif /* CONFIG_BFQ_GROUP_IOSCHED */ |
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index ae4d000ac0af..c9ba225081ce 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c | |||
@@ -1,19 +1,10 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
1 | /* | 2 | /* |
2 | * Hierarchical Budget Worst-case Fair Weighted Fair Queueing | 3 | * Hierarchical Budget Worst-case Fair Weighted Fair Queueing |
3 | * (B-WF2Q+): hierarchical scheduling algorithm by which the BFQ I/O | 4 | * (B-WF2Q+): hierarchical scheduling algorithm by which the BFQ I/O |
4 | * scheduler schedules generic entities. The latter can represent | 5 | * scheduler schedules generic entities. The latter can represent |
5 | * either single bfq queues (associated with processes) or groups of | 6 | * either single bfq queues (associated with processes) or groups of |
6 | * bfq queues (associated with cgroups). | 7 | * bfq queues (associated with cgroups). |
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as | ||
10 | * published by the Free Software Foundation; either version 2 of the | ||
11 | * License, or (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * General Public License for more details. | ||
17 | */ | 8 | */ |
18 | #include "bfq-iosched.h" | 9 | #include "bfq-iosched.h" |
19 | 10 | ||
@@ -59,7 +50,7 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service); | |||
59 | * bfq_update_next_in_service - update sd->next_in_service | 50 | * bfq_update_next_in_service - update sd->next_in_service |
60 | * @sd: sched_data for which to perform the update. | 51 | * @sd: sched_data for which to perform the update. |
61 | * @new_entity: if not NULL, pointer to the entity whose activation, | 52 | * @new_entity: if not NULL, pointer to the entity whose activation, |
62 | * requeueing or repositionig triggered the invocation of | 53 | * requeueing or repositioning triggered the invocation of |
63 | * this function. | 54 | * this function. |
64 | * @expiration: id true, this function is being invoked after the | 55 | * @expiration: id true, this function is being invoked after the |
65 | * expiration of the in-service entity | 56 | * expiration of the in-service entity |
@@ -90,7 +81,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, | |||
90 | 81 | ||
91 | /* | 82 | /* |
92 | * If this update is triggered by the activation, requeueing | 83 | * If this update is triggered by the activation, requeueing |
93 | * or repositiong of an entity that does not coincide with | 84 | * or repositioning of an entity that does not coincide with |
94 | * sd->next_in_service, then a full lookup in the active tree | 85 | * sd->next_in_service, then a full lookup in the active tree |
95 | * can be avoided. In fact, it is enough to check whether the | 86 | * can be avoided. In fact, it is enough to check whether the |
96 | * just-modified entity has the same priority as | 87 | * just-modified entity has the same priority as |
@@ -737,7 +728,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, | |||
737 | struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); | 728 | struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
738 | unsigned int prev_weight, new_weight; | 729 | unsigned int prev_weight, new_weight; |
739 | struct bfq_data *bfqd = NULL; | 730 | struct bfq_data *bfqd = NULL; |
740 | struct rb_root *root; | 731 | struct rb_root_cached *root; |
741 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 732 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
742 | struct bfq_sched_data *sd; | 733 | struct bfq_sched_data *sd; |
743 | struct bfq_group *bfqg; | 734 | struct bfq_group *bfqg; |
@@ -1396,7 +1387,7 @@ left: | |||
1396 | * In this first case, update the virtual time in @st too (see the | 1387 | * In this first case, update the virtual time in @st too (see the |
1397 | * comments on this update inside the function). | 1388 | * comments on this update inside the function). |
1398 | * | 1389 | * |
1399 | * In constrast, if there is an in-service entity, then return the | 1390 | * In contrast, if there is an in-service entity, then return the |
1400 | * entity that would be set in service if not only the above | 1391 | * entity that would be set in service if not only the above |
1401 | * conditions, but also the next one held true: the currently | 1392 | * conditions, but also the next one held true: the currently |
1402 | * in-service entity, on expiration, | 1393 | * in-service entity, on expiration, |
@@ -1479,12 +1470,12 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, | |||
1479 | * is being invoked as a part of the expiration path | 1470 | * is being invoked as a part of the expiration path |
1480 | * of the in-service queue. In this case, even if | 1471 | * of the in-service queue. In this case, even if |
1481 | * sd->in_service_entity is not NULL, | 1472 | * sd->in_service_entity is not NULL, |
1482 | * sd->in_service_entiy at this point is actually not | 1473 | * sd->in_service_entity at this point is actually not |
1483 | * in service any more, and, if needed, has already | 1474 | * in service any more, and, if needed, has already |
1484 | * been properly queued or requeued into the right | 1475 | * been properly queued or requeued into the right |
1485 | * tree. The reason why sd->in_service_entity is still | 1476 | * tree. The reason why sd->in_service_entity is still |
1486 | * not NULL here, even if expiration is true, is that | 1477 | * not NULL here, even if expiration is true, is that |
1487 | * sd->in_service_entiy is reset as a last step in the | 1478 | * sd->in_service_entity is reset as a last step in the |
1488 | * expiration path. So, if expiration is true, tell | 1479 | * expiration path. So, if expiration is true, tell |
1489 | * __bfq_lookup_next_entity that there is no | 1480 | * __bfq_lookup_next_entity that there is no |
1490 | * sd->in_service_entity. | 1481 | * sd->in_service_entity. |
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 1b633a3526d4..42536674020a 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c | |||
@@ -1,23 +1,9 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * bio-integrity.c - bio data integrity extensions | 3 | * bio-integrity.c - bio data integrity extensions |
3 | * | 4 | * |
4 | * Copyright (C) 2007, 2008, 2009 Oracle Corporation | 5 | * Copyright (C) 2007, 2008, 2009 Oracle Corporation |
5 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> | 6 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> |
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License version | ||
9 | * 2 as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; see the file COPYING. If not, write to | ||
18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
19 | * USA. | ||
20 | * | ||
21 | */ | 7 | */ |
22 | 8 | ||
23 | #include <linux/blkdev.h> | 9 | #include <linux/blkdev.h> |
diff --git a/block/bio.c b/block/bio.c index 716510ecd7ff..683cbb40f051 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -1,19 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> | 3 | * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public Licens | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- | ||
16 | * | ||
17 | */ | 4 | */ |
18 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
19 | #include <linux/swap.h> | 6 | #include <linux/swap.h> |
@@ -647,25 +634,72 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) | |||
647 | } | 634 | } |
648 | EXPORT_SYMBOL(bio_clone_fast); | 635 | EXPORT_SYMBOL(bio_clone_fast); |
649 | 636 | ||
637 | static inline bool page_is_mergeable(const struct bio_vec *bv, | ||
638 | struct page *page, unsigned int len, unsigned int off, | ||
639 | bool same_page) | ||
640 | { | ||
641 | phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + | ||
642 | bv->bv_offset + bv->bv_len - 1; | ||
643 | phys_addr_t page_addr = page_to_phys(page); | ||
644 | |||
645 | if (vec_end_addr + 1 != page_addr + off) | ||
646 | return false; | ||
647 | if (xen_domain() && !xen_biovec_phys_mergeable(bv, page)) | ||
648 | return false; | ||
649 | |||
650 | if ((vec_end_addr & PAGE_MASK) != page_addr) { | ||
651 | if (same_page) | ||
652 | return false; | ||
653 | if (pfn_to_page(PFN_DOWN(vec_end_addr)) + 1 != page) | ||
654 | return false; | ||
655 | } | ||
656 | |||
657 | WARN_ON_ONCE(same_page && (len + off) > PAGE_SIZE); | ||
658 | |||
659 | return true; | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * Check if the @page can be added to the current segment(@bv), and make | ||
664 | * sure to call it only if page_is_mergeable(@bv, @page) is true | ||
665 | */ | ||
666 | static bool can_add_page_to_seg(struct request_queue *q, | ||
667 | struct bio_vec *bv, struct page *page, unsigned len, | ||
668 | unsigned offset) | ||
669 | { | ||
670 | unsigned long mask = queue_segment_boundary(q); | ||
671 | phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset; | ||
672 | phys_addr_t addr2 = page_to_phys(page) + offset + len - 1; | ||
673 | |||
674 | if ((addr1 | mask) != (addr2 | mask)) | ||
675 | return false; | ||
676 | |||
677 | if (bv->bv_len + len > queue_max_segment_size(q)) | ||
678 | return false; | ||
679 | |||
680 | return true; | ||
681 | } | ||
682 | |||
650 | /** | 683 | /** |
651 | * bio_add_pc_page - attempt to add page to bio | 684 | * __bio_add_pc_page - attempt to add page to passthrough bio |
652 | * @q: the target queue | 685 | * @q: the target queue |
653 | * @bio: destination bio | 686 | * @bio: destination bio |
654 | * @page: page to add | 687 | * @page: page to add |
655 | * @len: vec entry length | 688 | * @len: vec entry length |
656 | * @offset: vec entry offset | 689 | * @offset: vec entry offset |
690 | * @put_same_page: put the page if it is same with last added page | ||
657 | * | 691 | * |
658 | * Attempt to add a page to the bio_vec maplist. This can fail for a | 692 | * Attempt to add a page to the bio_vec maplist. This can fail for a |
659 | * number of reasons, such as the bio being full or target block device | 693 | * number of reasons, such as the bio being full or target block device |
660 | * limitations. The target block device must allow bio's up to PAGE_SIZE, | 694 | * limitations. The target block device must allow bio's up to PAGE_SIZE, |
661 | * so it is always possible to add a single page to an empty bio. | 695 | * so it is always possible to add a single page to an empty bio. |
662 | * | 696 | * |
663 | * This should only be used by REQ_PC bios. | 697 | * This should only be used by passthrough bios. |
664 | */ | 698 | */ |
665 | int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page | 699 | static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, |
666 | *page, unsigned int len, unsigned int offset) | 700 | struct page *page, unsigned int len, unsigned int offset, |
701 | bool put_same_page) | ||
667 | { | 702 | { |
668 | int retried_segments = 0; | ||
669 | struct bio_vec *bvec; | 703 | struct bio_vec *bvec; |
670 | 704 | ||
671 | /* | 705 | /* |
@@ -677,18 +711,14 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page | |||
677 | if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q)) | 711 | if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q)) |
678 | return 0; | 712 | return 0; |
679 | 713 | ||
680 | /* | ||
681 | * For filesystems with a blocksize smaller than the pagesize | ||
682 | * we will often be called with the same page as last time and | ||
683 | * a consecutive offset. Optimize this special case. | ||
684 | */ | ||
685 | if (bio->bi_vcnt > 0) { | 714 | if (bio->bi_vcnt > 0) { |
686 | struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; | 715 | bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
687 | 716 | ||
688 | if (page == prev->bv_page && | 717 | if (page == bvec->bv_page && |
689 | offset == prev->bv_offset + prev->bv_len) { | 718 | offset == bvec->bv_offset + bvec->bv_len) { |
690 | prev->bv_len += len; | 719 | if (put_same_page) |
691 | bio->bi_iter.bi_size += len; | 720 | put_page(page); |
721 | bvec->bv_len += len; | ||
692 | goto done; | 722 | goto done; |
693 | } | 723 | } |
694 | 724 | ||
@@ -696,63 +726,47 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page | |||
696 | * If the queue doesn't support SG gaps and adding this | 726 | * If the queue doesn't support SG gaps and adding this |
697 | * offset would create a gap, disallow it. | 727 | * offset would create a gap, disallow it. |
698 | */ | 728 | */ |
699 | if (bvec_gap_to_prev(q, prev, offset)) | 729 | if (bvec_gap_to_prev(q, bvec, offset)) |
700 | return 0; | 730 | return 0; |
731 | |||
732 | if (page_is_mergeable(bvec, page, len, offset, false) && | ||
733 | can_add_page_to_seg(q, bvec, page, len, offset)) { | ||
734 | bvec->bv_len += len; | ||
735 | goto done; | ||
736 | } | ||
701 | } | 737 | } |
702 | 738 | ||
703 | if (bio_full(bio)) | 739 | if (bio_full(bio)) |
704 | return 0; | 740 | return 0; |
705 | 741 | ||
706 | /* | 742 | if (bio->bi_phys_segments >= queue_max_segments(q)) |
707 | * setup the new entry, we might clear it again later if we | 743 | return 0; |
708 | * cannot add the page | 744 | |
709 | */ | ||
710 | bvec = &bio->bi_io_vec[bio->bi_vcnt]; | 745 | bvec = &bio->bi_io_vec[bio->bi_vcnt]; |
711 | bvec->bv_page = page; | 746 | bvec->bv_page = page; |
712 | bvec->bv_len = len; | 747 | bvec->bv_len = len; |
713 | bvec->bv_offset = offset; | 748 | bvec->bv_offset = offset; |
714 | bio->bi_vcnt++; | 749 | bio->bi_vcnt++; |
715 | bio->bi_phys_segments++; | ||
716 | bio->bi_iter.bi_size += len; | ||
717 | |||
718 | /* | ||
719 | * Perform a recount if the number of segments is greater | ||
720 | * than queue_max_segments(q). | ||
721 | */ | ||
722 | |||
723 | while (bio->bi_phys_segments > queue_max_segments(q)) { | ||
724 | |||
725 | if (retried_segments) | ||
726 | goto failed; | ||
727 | |||
728 | retried_segments = 1; | ||
729 | blk_recount_segments(q, bio); | ||
730 | } | ||
731 | |||
732 | /* If we may be able to merge these biovecs, force a recount */ | ||
733 | if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec)) | ||
734 | bio_clear_flag(bio, BIO_SEG_VALID); | ||
735 | |||
736 | done: | 750 | done: |
751 | bio->bi_iter.bi_size += len; | ||
752 | bio->bi_phys_segments = bio->bi_vcnt; | ||
753 | bio_set_flag(bio, BIO_SEG_VALID); | ||
737 | return len; | 754 | return len; |
755 | } | ||
738 | 756 | ||
739 | failed: | 757 | int bio_add_pc_page(struct request_queue *q, struct bio *bio, |
740 | bvec->bv_page = NULL; | 758 | struct page *page, unsigned int len, unsigned int offset) |
741 | bvec->bv_len = 0; | 759 | { |
742 | bvec->bv_offset = 0; | 760 | return __bio_add_pc_page(q, bio, page, len, offset, false); |
743 | bio->bi_vcnt--; | ||
744 | bio->bi_iter.bi_size -= len; | ||
745 | blk_recount_segments(q, bio); | ||
746 | return 0; | ||
747 | } | 761 | } |
748 | EXPORT_SYMBOL(bio_add_pc_page); | 762 | EXPORT_SYMBOL(bio_add_pc_page); |
749 | 763 | ||
750 | /** | 764 | /** |
751 | * __bio_try_merge_page - try appending data to an existing bvec. | 765 | * __bio_try_merge_page - try appending data to an existing bvec. |
752 | * @bio: destination bio | 766 | * @bio: destination bio |
753 | * @page: page to add | 767 | * @page: start page to add |
754 | * @len: length of the data to add | 768 | * @len: length of the data to add |
755 | * @off: offset of the data in @page | 769 | * @off: offset of the data relative to @page |
756 | * @same_page: if %true only merge if the new data is in the same physical | 770 | * @same_page: if %true only merge if the new data is in the same physical |
757 | * page as the last segment of the bio. | 771 | * page as the last segment of the bio. |
758 | * | 772 | * |
@@ -760,6 +774,8 @@ EXPORT_SYMBOL(bio_add_pc_page); | |||
760 | * a useful optimisation for file systems with a block size smaller than the | 774 | * a useful optimisation for file systems with a block size smaller than the |
761 | * page size. | 775 | * page size. |
762 | * | 776 | * |
777 | * Warn if (@len, @off) crosses pages in case that @same_page is true. | ||
778 | * | ||
763 | * Return %true on success or %false on failure. | 779 | * Return %true on success or %false on failure. |
764 | */ | 780 | */ |
765 | bool __bio_try_merge_page(struct bio *bio, struct page *page, | 781 | bool __bio_try_merge_page(struct bio *bio, struct page *page, |
@@ -770,29 +786,23 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, | |||
770 | 786 | ||
771 | if (bio->bi_vcnt > 0) { | 787 | if (bio->bi_vcnt > 0) { |
772 | struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; | 788 | struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
773 | phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + | ||
774 | bv->bv_offset + bv->bv_len - 1; | ||
775 | phys_addr_t page_addr = page_to_phys(page); | ||
776 | 789 | ||
777 | if (vec_end_addr + 1 != page_addr + off) | 790 | if (page_is_mergeable(bv, page, len, off, same_page)) { |
778 | return false; | 791 | bv->bv_len += len; |
779 | if (same_page && (vec_end_addr & PAGE_MASK) != page_addr) | 792 | bio->bi_iter.bi_size += len; |
780 | return false; | 793 | return true; |
781 | 794 | } | |
782 | bv->bv_len += len; | ||
783 | bio->bi_iter.bi_size += len; | ||
784 | return true; | ||
785 | } | 795 | } |
786 | return false; | 796 | return false; |
787 | } | 797 | } |
788 | EXPORT_SYMBOL_GPL(__bio_try_merge_page); | 798 | EXPORT_SYMBOL_GPL(__bio_try_merge_page); |
789 | 799 | ||
790 | /** | 800 | /** |
791 | * __bio_add_page - add page to a bio in a new segment | 801 | * __bio_add_page - add page(s) to a bio in a new segment |
792 | * @bio: destination bio | 802 | * @bio: destination bio |
793 | * @page: page to add | 803 | * @page: start page to add |
794 | * @len: length of the data to add | 804 | * @len: length of the data to add, may cross pages |
795 | * @off: offset of the data in @page | 805 | * @off: offset of the data relative to @page, may cross pages |
796 | * | 806 | * |
797 | * Add the data at @page + @off to @bio as a new bvec. The caller must ensure | 807 | * Add the data at @page + @off to @bio as a new bvec. The caller must ensure |
798 | * that @bio has space for another bvec. | 808 | * that @bio has space for another bvec. |
@@ -815,13 +825,13 @@ void __bio_add_page(struct bio *bio, struct page *page, | |||
815 | EXPORT_SYMBOL_GPL(__bio_add_page); | 825 | EXPORT_SYMBOL_GPL(__bio_add_page); |
816 | 826 | ||
817 | /** | 827 | /** |
818 | * bio_add_page - attempt to add page to bio | 828 | * bio_add_page - attempt to add page(s) to bio |
819 | * @bio: destination bio | 829 | * @bio: destination bio |
820 | * @page: page to add | 830 | * @page: start page to add |
821 | * @len: vec entry length | 831 | * @len: vec entry length, may cross pages |
822 | * @offset: vec entry offset | 832 | * @offset: vec entry offset relative to @page, may cross pages |
823 | * | 833 | * |
824 | * Attempt to add a page to the bio_vec maplist. This will only fail | 834 | * Attempt to add page(s) to the bio_vec maplist. This will only fail |
825 | * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio. | 835 | * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio. |
826 | */ | 836 | */ |
827 | int bio_add_page(struct bio *bio, struct page *page, | 837 | int bio_add_page(struct bio *bio, struct page *page, |
@@ -836,6 +846,24 @@ int bio_add_page(struct bio *bio, struct page *page, | |||
836 | } | 846 | } |
837 | EXPORT_SYMBOL(bio_add_page); | 847 | EXPORT_SYMBOL(bio_add_page); |
838 | 848 | ||
849 | static void bio_get_pages(struct bio *bio) | ||
850 | { | ||
851 | struct bvec_iter_all iter_all; | ||
852 | struct bio_vec *bvec; | ||
853 | |||
854 | bio_for_each_segment_all(bvec, bio, iter_all) | ||
855 | get_page(bvec->bv_page); | ||
856 | } | ||
857 | |||
858 | static void bio_release_pages(struct bio *bio) | ||
859 | { | ||
860 | struct bvec_iter_all iter_all; | ||
861 | struct bio_vec *bvec; | ||
862 | |||
863 | bio_for_each_segment_all(bvec, bio, iter_all) | ||
864 | put_page(bvec->bv_page); | ||
865 | } | ||
866 | |||
839 | static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) | 867 | static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) |
840 | { | 868 | { |
841 | const struct bio_vec *bv = iter->bvec; | 869 | const struct bio_vec *bv = iter->bvec; |
@@ -848,20 +876,10 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) | |||
848 | len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count); | 876 | len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count); |
849 | size = bio_add_page(bio, bv->bv_page, len, | 877 | size = bio_add_page(bio, bv->bv_page, len, |
850 | bv->bv_offset + iter->iov_offset); | 878 | bv->bv_offset + iter->iov_offset); |
851 | if (size == len) { | 879 | if (unlikely(size != len)) |
852 | if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { | 880 | return -EINVAL; |
853 | struct page *page; | 881 | iov_iter_advance(iter, size); |
854 | int i; | 882 | return 0; |
855 | |||
856 | mp_bvec_for_each_page(page, bv, i) | ||
857 | get_page(page); | ||
858 | } | ||
859 | |||
860 | iov_iter_advance(iter, size); | ||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | return -EINVAL; | ||
865 | } | 883 | } |
866 | 884 | ||
867 | #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) | 885 | #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) |
@@ -934,29 +952,24 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) | |||
934 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) | 952 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) |
935 | { | 953 | { |
936 | const bool is_bvec = iov_iter_is_bvec(iter); | 954 | const bool is_bvec = iov_iter_is_bvec(iter); |
937 | unsigned short orig_vcnt = bio->bi_vcnt; | 955 | int ret; |
938 | 956 | ||
939 | /* | 957 | if (WARN_ON_ONCE(bio->bi_vcnt)) |
940 | * If this is a BVEC iter, then the pages are kernel pages. Don't | 958 | return -EINVAL; |
941 | * release them on IO completion, if the caller asked us to. | ||
942 | */ | ||
943 | if (is_bvec && iov_iter_bvec_no_ref(iter)) | ||
944 | bio_set_flag(bio, BIO_NO_PAGE_REF); | ||
945 | 959 | ||
946 | do { | 960 | do { |
947 | int ret; | ||
948 | |||
949 | if (is_bvec) | 961 | if (is_bvec) |
950 | ret = __bio_iov_bvec_add_pages(bio, iter); | 962 | ret = __bio_iov_bvec_add_pages(bio, iter); |
951 | else | 963 | else |
952 | ret = __bio_iov_iter_get_pages(bio, iter); | 964 | ret = __bio_iov_iter_get_pages(bio, iter); |
965 | } while (!ret && iov_iter_count(iter) && !bio_full(bio)); | ||
953 | 966 | ||
954 | if (unlikely(ret)) | 967 | if (iov_iter_bvec_no_ref(iter)) |
955 | return bio->bi_vcnt > orig_vcnt ? 0 : ret; | 968 | bio_set_flag(bio, BIO_NO_PAGE_REF); |
956 | 969 | else if (is_bvec) | |
957 | } while (iov_iter_count(iter) && !bio_full(bio)); | 970 | bio_get_pages(bio); |
958 | 971 | ||
959 | return 0; | 972 | return bio->bi_vcnt ? 0 : ret; |
960 | } | 973 | } |
961 | 974 | ||
962 | static void submit_bio_wait_endio(struct bio *bio) | 975 | static void submit_bio_wait_endio(struct bio *bio) |
@@ -1127,11 +1140,10 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, | |||
1127 | */ | 1140 | */ |
1128 | static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) | 1141 | static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) |
1129 | { | 1142 | { |
1130 | int i; | ||
1131 | struct bio_vec *bvec; | 1143 | struct bio_vec *bvec; |
1132 | struct bvec_iter_all iter_all; | 1144 | struct bvec_iter_all iter_all; |
1133 | 1145 | ||
1134 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 1146 | bio_for_each_segment_all(bvec, bio, iter_all) { |
1135 | ssize_t ret; | 1147 | ssize_t ret; |
1136 | 1148 | ||
1137 | ret = copy_page_from_iter(bvec->bv_page, | 1149 | ret = copy_page_from_iter(bvec->bv_page, |
@@ -1159,11 +1171,10 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) | |||
1159 | */ | 1171 | */ |
1160 | static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) | 1172 | static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) |
1161 | { | 1173 | { |
1162 | int i; | ||
1163 | struct bio_vec *bvec; | 1174 | struct bio_vec *bvec; |
1164 | struct bvec_iter_all iter_all; | 1175 | struct bvec_iter_all iter_all; |
1165 | 1176 | ||
1166 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 1177 | bio_for_each_segment_all(bvec, bio, iter_all) { |
1167 | ssize_t ret; | 1178 | ssize_t ret; |
1168 | 1179 | ||
1169 | ret = copy_page_to_iter(bvec->bv_page, | 1180 | ret = copy_page_to_iter(bvec->bv_page, |
@@ -1184,10 +1195,9 @@ static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) | |||
1184 | void bio_free_pages(struct bio *bio) | 1195 | void bio_free_pages(struct bio *bio) |
1185 | { | 1196 | { |
1186 | struct bio_vec *bvec; | 1197 | struct bio_vec *bvec; |
1187 | int i; | ||
1188 | struct bvec_iter_all iter_all; | 1198 | struct bvec_iter_all iter_all; |
1189 | 1199 | ||
1190 | bio_for_each_segment_all(bvec, bio, i, iter_all) | 1200 | bio_for_each_segment_all(bvec, bio, iter_all) |
1191 | __free_page(bvec->bv_page); | 1201 | __free_page(bvec->bv_page); |
1192 | } | 1202 | } |
1193 | EXPORT_SYMBOL(bio_free_pages); | 1203 | EXPORT_SYMBOL(bio_free_pages); |
@@ -1388,21 +1398,14 @@ struct bio *bio_map_user_iov(struct request_queue *q, | |||
1388 | for (j = 0; j < npages; j++) { | 1398 | for (j = 0; j < npages; j++) { |
1389 | struct page *page = pages[j]; | 1399 | struct page *page = pages[j]; |
1390 | unsigned int n = PAGE_SIZE - offs; | 1400 | unsigned int n = PAGE_SIZE - offs; |
1391 | unsigned short prev_bi_vcnt = bio->bi_vcnt; | ||
1392 | 1401 | ||
1393 | if (n > bytes) | 1402 | if (n > bytes) |
1394 | n = bytes; | 1403 | n = bytes; |
1395 | 1404 | ||
1396 | if (!bio_add_pc_page(q, bio, page, n, offs)) | 1405 | if (!__bio_add_pc_page(q, bio, page, n, offs, |
1406 | true)) | ||
1397 | break; | 1407 | break; |
1398 | 1408 | ||
1399 | /* | ||
1400 | * check if vector was merged with previous | ||
1401 | * drop page reference if needed | ||
1402 | */ | ||
1403 | if (bio->bi_vcnt == prev_bi_vcnt) | ||
1404 | put_page(page); | ||
1405 | |||
1406 | added += n; | 1409 | added += n; |
1407 | bytes -= n; | 1410 | bytes -= n; |
1408 | offs = 0; | 1411 | offs = 0; |
@@ -1432,7 +1435,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, | |||
1432 | return bio; | 1435 | return bio; |
1433 | 1436 | ||
1434 | out_unmap: | 1437 | out_unmap: |
1435 | bio_for_each_segment_all(bvec, bio, j, iter_all) { | 1438 | bio_for_each_segment_all(bvec, bio, iter_all) { |
1436 | put_page(bvec->bv_page); | 1439 | put_page(bvec->bv_page); |
1437 | } | 1440 | } |
1438 | bio_put(bio); | 1441 | bio_put(bio); |
@@ -1442,13 +1445,12 @@ struct bio *bio_map_user_iov(struct request_queue *q, | |||
1442 | static void __bio_unmap_user(struct bio *bio) | 1445 | static void __bio_unmap_user(struct bio *bio) |
1443 | { | 1446 | { |
1444 | struct bio_vec *bvec; | 1447 | struct bio_vec *bvec; |
1445 | int i; | ||
1446 | struct bvec_iter_all iter_all; | 1448 | struct bvec_iter_all iter_all; |
1447 | 1449 | ||
1448 | /* | 1450 | /* |
1449 | * make sure we dirty pages we wrote to | 1451 | * make sure we dirty pages we wrote to |
1450 | */ | 1452 | */ |
1451 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 1453 | bio_for_each_segment_all(bvec, bio, iter_all) { |
1452 | if (bio_data_dir(bio) == READ) | 1454 | if (bio_data_dir(bio) == READ) |
1453 | set_page_dirty_lock(bvec->bv_page); | 1455 | set_page_dirty_lock(bvec->bv_page); |
1454 | 1456 | ||
@@ -1539,10 +1541,9 @@ static void bio_copy_kern_endio_read(struct bio *bio) | |||
1539 | { | 1541 | { |
1540 | char *p = bio->bi_private; | 1542 | char *p = bio->bi_private; |
1541 | struct bio_vec *bvec; | 1543 | struct bio_vec *bvec; |
1542 | int i; | ||
1543 | struct bvec_iter_all iter_all; | 1544 | struct bvec_iter_all iter_all; |
1544 | 1545 | ||
1545 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 1546 | bio_for_each_segment_all(bvec, bio, iter_all) { |
1546 | memcpy(p, page_address(bvec->bv_page), bvec->bv_len); | 1547 | memcpy(p, page_address(bvec->bv_page), bvec->bv_len); |
1547 | p += bvec->bv_len; | 1548 | p += bvec->bv_len; |
1548 | } | 1549 | } |
@@ -1650,25 +1651,14 @@ cleanup: | |||
1650 | void bio_set_pages_dirty(struct bio *bio) | 1651 | void bio_set_pages_dirty(struct bio *bio) |
1651 | { | 1652 | { |
1652 | struct bio_vec *bvec; | 1653 | struct bio_vec *bvec; |
1653 | int i; | ||
1654 | struct bvec_iter_all iter_all; | 1654 | struct bvec_iter_all iter_all; |
1655 | 1655 | ||
1656 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 1656 | bio_for_each_segment_all(bvec, bio, iter_all) { |
1657 | if (!PageCompound(bvec->bv_page)) | 1657 | if (!PageCompound(bvec->bv_page)) |
1658 | set_page_dirty_lock(bvec->bv_page); | 1658 | set_page_dirty_lock(bvec->bv_page); |
1659 | } | 1659 | } |
1660 | } | 1660 | } |
1661 | 1661 | ||
1662 | static void bio_release_pages(struct bio *bio) | ||
1663 | { | ||
1664 | struct bio_vec *bvec; | ||
1665 | int i; | ||
1666 | struct bvec_iter_all iter_all; | ||
1667 | |||
1668 | bio_for_each_segment_all(bvec, bio, i, iter_all) | ||
1669 | put_page(bvec->bv_page); | ||
1670 | } | ||
1671 | |||
1672 | /* | 1662 | /* |
1673 | * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. | 1663 | * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. |
1674 | * If they are, then fine. If, however, some pages are clean then they must | 1664 | * If they are, then fine. If, however, some pages are clean then they must |
@@ -1712,10 +1702,9 @@ void bio_check_pages_dirty(struct bio *bio) | |||
1712 | { | 1702 | { |
1713 | struct bio_vec *bvec; | 1703 | struct bio_vec *bvec; |
1714 | unsigned long flags; | 1704 | unsigned long flags; |
1715 | int i; | ||
1716 | struct bvec_iter_all iter_all; | 1705 | struct bvec_iter_all iter_all; |
1717 | 1706 | ||
1718 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 1707 | bio_for_each_segment_all(bvec, bio, iter_all) { |
1719 | if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page)) | 1708 | if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page)) |
1720 | goto defer; | 1709 | goto defer; |
1721 | } | 1710 | } |
@@ -2203,6 +2192,9 @@ static int __init init_bio(void) | |||
2203 | bio_slab_nr = 0; | 2192 | bio_slab_nr = 0; |
2204 | bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), | 2193 | bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), |
2205 | GFP_KERNEL); | 2194 | GFP_KERNEL); |
2195 | |||
2196 | BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET); | ||
2197 | |||
2206 | if (!bio_slabs) | 2198 | if (!bio_slabs) |
2207 | panic("bio: can't allocate bios\n"); | 2199 | panic("bio: can't allocate bios\n"); |
2208 | 2200 | ||
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 617a2b3f7582..b97b479e4f64 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Common Block IO controller cgroup interface | 3 | * Common Block IO controller cgroup interface |
3 | * | 4 | * |
diff --git a/block/blk-core.c b/block/blk-core.c index a55389ba8779..419d600e6637 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics | 4 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
@@ -232,15 +233,6 @@ void blk_sync_queue(struct request_queue *q) | |||
232 | { | 233 | { |
233 | del_timer_sync(&q->timeout); | 234 | del_timer_sync(&q->timeout); |
234 | cancel_work_sync(&q->timeout_work); | 235 | cancel_work_sync(&q->timeout_work); |
235 | |||
236 | if (queue_is_mq(q)) { | ||
237 | struct blk_mq_hw_ctx *hctx; | ||
238 | int i; | ||
239 | |||
240 | cancel_delayed_work_sync(&q->requeue_work); | ||
241 | queue_for_each_hw_ctx(q, hctx, i) | ||
242 | cancel_delayed_work_sync(&hctx->run_work); | ||
243 | } | ||
244 | } | 236 | } |
245 | EXPORT_SYMBOL(blk_sync_queue); | 237 | EXPORT_SYMBOL(blk_sync_queue); |
246 | 238 | ||
@@ -347,18 +339,6 @@ void blk_cleanup_queue(struct request_queue *q) | |||
347 | 339 | ||
348 | blk_queue_flag_set(QUEUE_FLAG_DEAD, q); | 340 | blk_queue_flag_set(QUEUE_FLAG_DEAD, q); |
349 | 341 | ||
350 | /* | ||
351 | * make sure all in-progress dispatch are completed because | ||
352 | * blk_freeze_queue() can only complete all requests, and | ||
353 | * dispatch may still be in-progress since we dispatch requests | ||
354 | * from more than one contexts. | ||
355 | * | ||
356 | * We rely on driver to deal with the race in case that queue | ||
357 | * initialization isn't done. | ||
358 | */ | ||
359 | if (queue_is_mq(q) && blk_queue_init_done(q)) | ||
360 | blk_mq_quiesce_queue(q); | ||
361 | |||
362 | /* for synchronous bio-based driver finish in-flight integrity i/o */ | 342 | /* for synchronous bio-based driver finish in-flight integrity i/o */ |
363 | blk_flush_integrity(); | 343 | blk_flush_integrity(); |
364 | 344 | ||
@@ -375,7 +355,7 @@ void blk_cleanup_queue(struct request_queue *q) | |||
375 | blk_exit_queue(q); | 355 | blk_exit_queue(q); |
376 | 356 | ||
377 | if (queue_is_mq(q)) | 357 | if (queue_is_mq(q)) |
378 | blk_mq_free_queue(q); | 358 | blk_mq_exit_queue(q); |
379 | 359 | ||
380 | percpu_ref_exit(&q->q_usage_counter); | 360 | percpu_ref_exit(&q->q_usage_counter); |
381 | 361 | ||
diff --git a/block/blk-exec.c b/block/blk-exec.c index a34b7d918742..1db44ca0f4a6 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Functions related to setting various queue properties from drivers | 3 | * Functions related to setting various queue properties from drivers |
3 | */ | 4 | */ |
diff --git a/block/blk-flush.c b/block/blk-flush.c index d95f94892015..aedd9320e605 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -1,11 +1,10 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Functions to sequence PREFLUSH and FUA writes. | 3 | * Functions to sequence PREFLUSH and FUA writes. |
3 | * | 4 | * |
4 | * Copyright (C) 2011 Max Planck Institute for Gravitational Physics | 5 | * Copyright (C) 2011 Max Planck Institute for Gravitational Physics |
5 | * Copyright (C) 2011 Tejun Heo <tj@kernel.org> | 6 | * Copyright (C) 2011 Tejun Heo <tj@kernel.org> |
6 | * | 7 | * |
7 | * This file is released under the GPLv2. | ||
8 | * | ||
9 | * REQ_{PREFLUSH|FUA} requests are decomposed to sequences consisted of three | 8 | * REQ_{PREFLUSH|FUA} requests are decomposed to sequences consisted of three |
10 | * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request | 9 | * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request |
11 | * properties and hardware capability. | 10 | * properties and hardware capability. |
diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 85864c71e858..825c9c070458 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c | |||
@@ -1,23 +1,9 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * blk-integrity.c - Block layer data integrity extensions | 3 | * blk-integrity.c - Block layer data integrity extensions |
3 | * | 4 | * |
4 | * Copyright (C) 2007, 2008 Oracle Corporation | 5 | * Copyright (C) 2007, 2008 Oracle Corporation |
5 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> | 6 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> |
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License version | ||
9 | * 2 as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; see the file COPYING. If not, write to | ||
18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
19 | * USA. | ||
20 | * | ||
21 | */ | 7 | */ |
22 | 8 | ||
23 | #include <linux/blkdev.h> | 9 | #include <linux/blkdev.h> |
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 507212d75ee2..d22e61bced86 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Block rq-qos base io controller | 3 | * Block rq-qos base io controller |
3 | * | 4 | * |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 1c9d4f0f96ea..21e87a714a73 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -267,23 +267,6 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, | |||
267 | goto split; | 267 | goto split; |
268 | } | 268 | } |
269 | 269 | ||
270 | if (bvprvp) { | ||
271 | if (seg_size + bv.bv_len > queue_max_segment_size(q)) | ||
272 | goto new_segment; | ||
273 | if (!biovec_phys_mergeable(q, bvprvp, &bv)) | ||
274 | goto new_segment; | ||
275 | |||
276 | seg_size += bv.bv_len; | ||
277 | bvprv = bv; | ||
278 | bvprvp = &bvprv; | ||
279 | sectors += bv.bv_len >> 9; | ||
280 | |||
281 | if (nsegs == 1 && seg_size > front_seg_size) | ||
282 | front_seg_size = seg_size; | ||
283 | |||
284 | continue; | ||
285 | } | ||
286 | new_segment: | ||
287 | if (nsegs == max_segs) | 270 | if (nsegs == max_segs) |
288 | goto split; | 271 | goto split; |
289 | 272 | ||
@@ -370,12 +353,12 @@ EXPORT_SYMBOL(blk_queue_split); | |||
370 | static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | 353 | static unsigned int __blk_recalc_rq_segments(struct request_queue *q, |
371 | struct bio *bio) | 354 | struct bio *bio) |
372 | { | 355 | { |
373 | struct bio_vec bv, bvprv = { NULL }; | 356 | struct bio_vec uninitialized_var(bv), bvprv = { NULL }; |
374 | int prev = 0; | ||
375 | unsigned int seg_size, nr_phys_segs; | 357 | unsigned int seg_size, nr_phys_segs; |
376 | unsigned front_seg_size; | 358 | unsigned front_seg_size; |
377 | struct bio *fbio, *bbio; | 359 | struct bio *fbio, *bbio; |
378 | struct bvec_iter iter; | 360 | struct bvec_iter iter; |
361 | bool new_bio = false; | ||
379 | 362 | ||
380 | if (!bio) | 363 | if (!bio) |
381 | return 0; | 364 | return 0; |
@@ -396,7 +379,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
396 | nr_phys_segs = 0; | 379 | nr_phys_segs = 0; |
397 | for_each_bio(bio) { | 380 | for_each_bio(bio) { |
398 | bio_for_each_bvec(bv, bio, iter) { | 381 | bio_for_each_bvec(bv, bio, iter) { |
399 | if (prev) { | 382 | if (new_bio) { |
400 | if (seg_size + bv.bv_len | 383 | if (seg_size + bv.bv_len |
401 | > queue_max_segment_size(q)) | 384 | > queue_max_segment_size(q)) |
402 | goto new_segment; | 385 | goto new_segment; |
@@ -404,7 +387,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
404 | goto new_segment; | 387 | goto new_segment; |
405 | 388 | ||
406 | seg_size += bv.bv_len; | 389 | seg_size += bv.bv_len; |
407 | bvprv = bv; | ||
408 | 390 | ||
409 | if (nr_phys_segs == 1 && seg_size > | 391 | if (nr_phys_segs == 1 && seg_size > |
410 | front_seg_size) | 392 | front_seg_size) |
@@ -413,12 +395,15 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
413 | continue; | 395 | continue; |
414 | } | 396 | } |
415 | new_segment: | 397 | new_segment: |
416 | bvprv = bv; | ||
417 | prev = 1; | ||
418 | bvec_split_segs(q, &bv, &nr_phys_segs, &seg_size, | 398 | bvec_split_segs(q, &bv, &nr_phys_segs, &seg_size, |
419 | &front_seg_size, NULL, UINT_MAX); | 399 | &front_seg_size, NULL, UINT_MAX); |
400 | new_bio = false; | ||
420 | } | 401 | } |
421 | bbio = bio; | 402 | bbio = bio; |
403 | if (likely(bio->bi_iter.bi_size)) { | ||
404 | bvprv = bv; | ||
405 | new_bio = true; | ||
406 | } | ||
422 | } | 407 | } |
423 | 408 | ||
424 | fbio->bi_seg_front_size = front_seg_size; | 409 | fbio->bi_seg_front_size = front_seg_size; |
@@ -484,79 +469,97 @@ static unsigned blk_bvec_map_sg(struct request_queue *q, | |||
484 | struct scatterlist **sg) | 469 | struct scatterlist **sg) |
485 | { | 470 | { |
486 | unsigned nbytes = bvec->bv_len; | 471 | unsigned nbytes = bvec->bv_len; |
487 | unsigned nsegs = 0, total = 0, offset = 0; | 472 | unsigned nsegs = 0, total = 0; |
488 | 473 | ||
489 | while (nbytes > 0) { | 474 | while (nbytes > 0) { |
490 | unsigned seg_size; | 475 | unsigned offset = bvec->bv_offset + total; |
491 | struct page *pg; | 476 | unsigned len = min(get_max_segment_size(q, offset), nbytes); |
492 | unsigned idx; | 477 | struct page *page = bvec->bv_page; |
493 | |||
494 | *sg = blk_next_sg(sg, sglist); | ||
495 | 478 | ||
496 | seg_size = get_max_segment_size(q, bvec->bv_offset + total); | 479 | /* |
497 | seg_size = min(nbytes, seg_size); | 480 | * Unfortunately a fair number of drivers barf on scatterlists |
498 | 481 | * that have an offset larger than PAGE_SIZE, despite other | |
499 | offset = (total + bvec->bv_offset) % PAGE_SIZE; | 482 | * subsystems dealing with that invariant just fine. For now |
500 | idx = (total + bvec->bv_offset) / PAGE_SIZE; | 483 | * stick to the legacy format where we never present those from |
501 | pg = bvec_nth_page(bvec->bv_page, idx); | 484 | * the block layer, but the code below should be removed once |
485 | * these offenders (mostly MMC/SD drivers) are fixed. | ||
486 | */ | ||
487 | page += (offset >> PAGE_SHIFT); | ||
488 | offset &= ~PAGE_MASK; | ||
502 | 489 | ||
503 | sg_set_page(*sg, pg, seg_size, offset); | 490 | *sg = blk_next_sg(sg, sglist); |
491 | sg_set_page(*sg, page, len, offset); | ||
504 | 492 | ||
505 | total += seg_size; | 493 | total += len; |
506 | nbytes -= seg_size; | 494 | nbytes -= len; |
507 | nsegs++; | 495 | nsegs++; |
508 | } | 496 | } |
509 | 497 | ||
510 | return nsegs; | 498 | return nsegs; |
511 | } | 499 | } |
512 | 500 | ||
513 | static inline void | 501 | static inline int __blk_bvec_map_sg(struct bio_vec bv, |
514 | __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, | 502 | struct scatterlist *sglist, struct scatterlist **sg) |
515 | struct scatterlist *sglist, struct bio_vec *bvprv, | 503 | { |
516 | struct scatterlist **sg, int *nsegs) | 504 | *sg = blk_next_sg(sg, sglist); |
505 | sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); | ||
506 | return 1; | ||
507 | } | ||
508 | |||
509 | /* only try to merge bvecs into one sg if they are from two bios */ | ||
510 | static inline bool | ||
511 | __blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec, | ||
512 | struct bio_vec *bvprv, struct scatterlist **sg) | ||
517 | { | 513 | { |
518 | 514 | ||
519 | int nbytes = bvec->bv_len; | 515 | int nbytes = bvec->bv_len; |
520 | 516 | ||
521 | if (*sg) { | 517 | if (!*sg) |
522 | if ((*sg)->length + nbytes > queue_max_segment_size(q)) | 518 | return false; |
523 | goto new_segment; | ||
524 | if (!biovec_phys_mergeable(q, bvprv, bvec)) | ||
525 | goto new_segment; | ||
526 | 519 | ||
527 | (*sg)->length += nbytes; | 520 | if ((*sg)->length + nbytes > queue_max_segment_size(q)) |
528 | } else { | 521 | return false; |
529 | new_segment: | ||
530 | if (bvec->bv_offset + bvec->bv_len <= PAGE_SIZE) { | ||
531 | *sg = blk_next_sg(sg, sglist); | ||
532 | sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); | ||
533 | (*nsegs) += 1; | ||
534 | } else | ||
535 | (*nsegs) += blk_bvec_map_sg(q, bvec, sglist, sg); | ||
536 | } | ||
537 | *bvprv = *bvec; | ||
538 | } | ||
539 | 522 | ||
540 | static inline int __blk_bvec_map_sg(struct request_queue *q, struct bio_vec bv, | 523 | if (!biovec_phys_mergeable(q, bvprv, bvec)) |
541 | struct scatterlist *sglist, struct scatterlist **sg) | 524 | return false; |
542 | { | 525 | |
543 | *sg = sglist; | 526 | (*sg)->length += nbytes; |
544 | sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); | 527 | |
545 | return 1; | 528 | return true; |
546 | } | 529 | } |
547 | 530 | ||
548 | static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, | 531 | static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, |
549 | struct scatterlist *sglist, | 532 | struct scatterlist *sglist, |
550 | struct scatterlist **sg) | 533 | struct scatterlist **sg) |
551 | { | 534 | { |
552 | struct bio_vec bvec, bvprv = { NULL }; | 535 | struct bio_vec uninitialized_var(bvec), bvprv = { NULL }; |
553 | struct bvec_iter iter; | 536 | struct bvec_iter iter; |
554 | int nsegs = 0; | 537 | int nsegs = 0; |
538 | bool new_bio = false; | ||
555 | 539 | ||
556 | for_each_bio(bio) | 540 | for_each_bio(bio) { |
557 | bio_for_each_bvec(bvec, bio, iter) | 541 | bio_for_each_bvec(bvec, bio, iter) { |
558 | __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, | 542 | /* |
559 | &nsegs); | 543 | * Only try to merge bvecs from two bios given we |
544 | * have done bio internal merge when adding pages | ||
545 | * to bio | ||
546 | */ | ||
547 | if (new_bio && | ||
548 | __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg)) | ||
549 | goto next_bvec; | ||
550 | |||
551 | if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE) | ||
552 | nsegs += __blk_bvec_map_sg(bvec, sglist, sg); | ||
553 | else | ||
554 | nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg); | ||
555 | next_bvec: | ||
556 | new_bio = false; | ||
557 | } | ||
558 | if (likely(bio->bi_iter.bi_size)) { | ||
559 | bvprv = bvec; | ||
560 | new_bio = true; | ||
561 | } | ||
562 | } | ||
560 | 563 | ||
561 | return nsegs; | 564 | return nsegs; |
562 | } | 565 | } |
@@ -572,9 +575,9 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, | |||
572 | int nsegs = 0; | 575 | int nsegs = 0; |
573 | 576 | ||
574 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) | 577 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) |
575 | nsegs = __blk_bvec_map_sg(q, rq->special_vec, sglist, &sg); | 578 | nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, &sg); |
576 | else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME) | 579 | else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME) |
577 | nsegs = __blk_bvec_map_sg(q, bio_iovec(rq->bio), sglist, &sg); | 580 | nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, &sg); |
578 | else if (rq->bio) | 581 | else if (rq->bio) |
579 | nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); | 582 | nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); |
580 | 583 | ||
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 03a534820271..48bebf00a5f3 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * CPU <-> hardware queue mapping helpers | 3 | * CPU <-> hardware queue mapping helpers |
3 | * | 4 | * |
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index ec1d18cb643c..6aea0ebc3a73 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c | |||
@@ -1,17 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright (C) 2017 Facebook | 3 | * Copyright (C) 2017 Facebook |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
15 | */ | 4 | */ |
16 | 5 | ||
17 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index 1dce18553984..ad4545a2a98b 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c | |||
@@ -1,14 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright (c) 2016 Christoph Hellwig. | 3 | * Copyright (c) 2016 Christoph Hellwig. |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | 4 | */ |
13 | #include <linux/kobject.h> | 5 | #include <linux/kobject.h> |
14 | #include <linux/blkdev.h> | 6 | #include <linux/blkdev.h> |
diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c index 45030a81a1ed..cc921e6ba709 100644 --- a/block/blk-mq-rdma.c +++ b/block/blk-mq-rdma.c | |||
@@ -1,14 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright (c) 2017 Sagi Grimberg. | 3 | * Copyright (c) 2017 Sagi Grimberg. |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | 4 | */ |
13 | #include <linux/blk-mq.h> | 5 | #include <linux/blk-mq.h> |
14 | #include <linux/blk-mq-rdma.h> | 6 | #include <linux/blk-mq-rdma.h> |
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index aa6bc5c02643..74c6bb871f7e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * blk-mq scheduling framework | 3 | * blk-mq scheduling framework |
3 | * | 4 | * |
@@ -413,6 +414,14 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, | |||
413 | struct list_head *list, bool run_queue_async) | 414 | struct list_head *list, bool run_queue_async) |
414 | { | 415 | { |
415 | struct elevator_queue *e; | 416 | struct elevator_queue *e; |
417 | struct request_queue *q = hctx->queue; | ||
418 | |||
419 | /* | ||
420 | * blk_mq_sched_insert_requests() is called from flush plug | ||
421 | * context only, and hold one usage counter to prevent queue | ||
422 | * from being released. | ||
423 | */ | ||
424 | percpu_ref_get(&q->q_usage_counter); | ||
416 | 425 | ||
417 | e = hctx->queue->elevator; | 426 | e = hctx->queue->elevator; |
418 | if (e && e->type->ops.insert_requests) | 427 | if (e && e->type->ops.insert_requests) |
@@ -426,12 +435,14 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, | |||
426 | if (!hctx->dispatch_busy && !e && !run_queue_async) { | 435 | if (!hctx->dispatch_busy && !e && !run_queue_async) { |
427 | blk_mq_try_issue_list_directly(hctx, list); | 436 | blk_mq_try_issue_list_directly(hctx, list); |
428 | if (list_empty(list)) | 437 | if (list_empty(list)) |
429 | return; | 438 | goto out; |
430 | } | 439 | } |
431 | blk_mq_insert_requests(hctx, ctx, list); | 440 | blk_mq_insert_requests(hctx, ctx, list); |
432 | } | 441 | } |
433 | 442 | ||
434 | blk_mq_run_hw_queue(hctx, run_queue_async); | 443 | blk_mq_run_hw_queue(hctx, run_queue_async); |
444 | out: | ||
445 | percpu_ref_put(&q->q_usage_counter); | ||
435 | } | 446 | } |
436 | 447 | ||
437 | static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, | 448 | static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, |
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 5315e538b3b1..d6e1a9bd7131 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 3 | #include <linux/module.h> |
3 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
@@ -10,6 +11,7 @@ | |||
10 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
11 | 12 | ||
12 | #include <linux/blk-mq.h> | 13 | #include <linux/blk-mq.h> |
14 | #include "blk.h" | ||
13 | #include "blk-mq.h" | 15 | #include "blk-mq.h" |
14 | #include "blk-mq-tag.h" | 16 | #include "blk-mq-tag.h" |
15 | 17 | ||
@@ -33,6 +35,13 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj) | |||
33 | { | 35 | { |
34 | struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, | 36 | struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, |
35 | kobj); | 37 | kobj); |
38 | |||
39 | cancel_delayed_work_sync(&hctx->run_work); | ||
40 | |||
41 | if (hctx->flags & BLK_MQ_F_BLOCKING) | ||
42 | cleanup_srcu_struct(hctx->srcu); | ||
43 | blk_free_flush_queue(hctx->fq); | ||
44 | sbitmap_free(&hctx->ctx_map); | ||
36 | free_cpumask_var(hctx->cpumask); | 45 | free_cpumask_var(hctx->cpumask); |
37 | kfree(hctx->ctxs); | 46 | kfree(hctx->ctxs); |
38 | kfree(hctx); | 47 | kfree(hctx); |
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index a4931fc7be8a..7513c8eaabee 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Tag allocation using scalable bitmaps. Uses active queue tracking to support | 3 | * Tag allocation using scalable bitmaps. Uses active queue tracking to support |
3 | * fairer distribution of tags between multiple submitters when a shared tag map | 4 | * fairer distribution of tags between multiple submitters when a shared tag map |
diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c index 370827163835..75a52c18a8f6 100644 --- a/block/blk-mq-virtio.c +++ b/block/blk-mq-virtio.c | |||
@@ -1,14 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright (c) 2016 Christoph Hellwig. | 3 | * Copyright (c) 2016 Christoph Hellwig. |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | 4 | */ |
13 | #include <linux/device.h> | 5 | #include <linux/device.h> |
14 | #include <linux/blk-mq.h> | 6 | #include <linux/blk-mq.h> |
diff --git a/block/blk-mq.c b/block/blk-mq.c index fc60ed7e940e..08a6248d8536 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Block multiqueue core code | 3 | * Block multiqueue core code |
3 | * | 4 | * |
@@ -2062,7 +2063,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, | |||
2062 | list_del_init(&page->lru); | 2063 | list_del_init(&page->lru); |
2063 | /* | 2064 | /* |
2064 | * Remove kmemleak object previously allocated in | 2065 | * Remove kmemleak object previously allocated in |
2065 | * blk_mq_init_rq_map(). | 2066 | * blk_mq_alloc_rqs(). |
2066 | */ | 2067 | */ |
2067 | kmemleak_free(page_address(page)); | 2068 | kmemleak_free(page_address(page)); |
2068 | __free_pages(page, page->private); | 2069 | __free_pages(page, page->private); |
@@ -2267,12 +2268,11 @@ static void blk_mq_exit_hctx(struct request_queue *q, | |||
2267 | if (set->ops->exit_hctx) | 2268 | if (set->ops->exit_hctx) |
2268 | set->ops->exit_hctx(hctx, hctx_idx); | 2269 | set->ops->exit_hctx(hctx, hctx_idx); |
2269 | 2270 | ||
2270 | if (hctx->flags & BLK_MQ_F_BLOCKING) | ||
2271 | cleanup_srcu_struct(hctx->srcu); | ||
2272 | |||
2273 | blk_mq_remove_cpuhp(hctx); | 2271 | blk_mq_remove_cpuhp(hctx); |
2274 | blk_free_flush_queue(hctx->fq); | 2272 | |
2275 | sbitmap_free(&hctx->ctx_map); | 2273 | spin_lock(&q->unused_hctx_lock); |
2274 | list_add(&hctx->hctx_list, &q->unused_hctx_list); | ||
2275 | spin_unlock(&q->unused_hctx_lock); | ||
2276 | } | 2276 | } |
2277 | 2277 | ||
2278 | static void blk_mq_exit_hw_queues(struct request_queue *q, | 2278 | static void blk_mq_exit_hw_queues(struct request_queue *q, |
@@ -2289,15 +2289,65 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, | |||
2289 | } | 2289 | } |
2290 | } | 2290 | } |
2291 | 2291 | ||
2292 | static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) | ||
2293 | { | ||
2294 | int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); | ||
2295 | |||
2296 | BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu), | ||
2297 | __alignof__(struct blk_mq_hw_ctx)) != | ||
2298 | sizeof(struct blk_mq_hw_ctx)); | ||
2299 | |||
2300 | if (tag_set->flags & BLK_MQ_F_BLOCKING) | ||
2301 | hw_ctx_size += sizeof(struct srcu_struct); | ||
2302 | |||
2303 | return hw_ctx_size; | ||
2304 | } | ||
2305 | |||
2292 | static int blk_mq_init_hctx(struct request_queue *q, | 2306 | static int blk_mq_init_hctx(struct request_queue *q, |
2293 | struct blk_mq_tag_set *set, | 2307 | struct blk_mq_tag_set *set, |
2294 | struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) | 2308 | struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) |
2295 | { | 2309 | { |
2296 | int node; | 2310 | hctx->queue_num = hctx_idx; |
2311 | |||
2312 | cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); | ||
2313 | |||
2314 | hctx->tags = set->tags[hctx_idx]; | ||
2315 | |||
2316 | if (set->ops->init_hctx && | ||
2317 | set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) | ||
2318 | goto unregister_cpu_notifier; | ||
2319 | |||
2320 | if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, | ||
2321 | hctx->numa_node)) | ||
2322 | goto exit_hctx; | ||
2323 | return 0; | ||
2324 | |||
2325 | exit_hctx: | ||
2326 | if (set->ops->exit_hctx) | ||
2327 | set->ops->exit_hctx(hctx, hctx_idx); | ||
2328 | unregister_cpu_notifier: | ||
2329 | blk_mq_remove_cpuhp(hctx); | ||
2330 | return -1; | ||
2331 | } | ||
2332 | |||
2333 | static struct blk_mq_hw_ctx * | ||
2334 | blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, | ||
2335 | int node) | ||
2336 | { | ||
2337 | struct blk_mq_hw_ctx *hctx; | ||
2338 | gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY; | ||
2339 | |||
2340 | hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node); | ||
2341 | if (!hctx) | ||
2342 | goto fail_alloc_hctx; | ||
2297 | 2343 | ||
2298 | node = hctx->numa_node; | 2344 | if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node)) |
2345 | goto free_hctx; | ||
2346 | |||
2347 | atomic_set(&hctx->nr_active, 0); | ||
2299 | if (node == NUMA_NO_NODE) | 2348 | if (node == NUMA_NO_NODE) |
2300 | node = hctx->numa_node = set->numa_node; | 2349 | node = set->numa_node; |
2350 | hctx->numa_node = node; | ||
2301 | 2351 | ||
2302 | INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); | 2352 | INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); |
2303 | spin_lock_init(&hctx->lock); | 2353 | spin_lock_init(&hctx->lock); |
@@ -2305,58 +2355,47 @@ static int blk_mq_init_hctx(struct request_queue *q, | |||
2305 | hctx->queue = q; | 2355 | hctx->queue = q; |
2306 | hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; | 2356 | hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; |
2307 | 2357 | ||
2308 | cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); | 2358 | INIT_LIST_HEAD(&hctx->hctx_list); |
2309 | |||
2310 | hctx->tags = set->tags[hctx_idx]; | ||
2311 | 2359 | ||
2312 | /* | 2360 | /* |
2313 | * Allocate space for all possible cpus to avoid allocation at | 2361 | * Allocate space for all possible cpus to avoid allocation at |
2314 | * runtime | 2362 | * runtime |
2315 | */ | 2363 | */ |
2316 | hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), | 2364 | hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), |
2317 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); | 2365 | gfp, node); |
2318 | if (!hctx->ctxs) | 2366 | if (!hctx->ctxs) |
2319 | goto unregister_cpu_notifier; | 2367 | goto free_cpumask; |
2320 | 2368 | ||
2321 | if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), | 2369 | if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), |
2322 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node)) | 2370 | gfp, node)) |
2323 | goto free_ctxs; | 2371 | goto free_ctxs; |
2324 | |||
2325 | hctx->nr_ctx = 0; | 2372 | hctx->nr_ctx = 0; |
2326 | 2373 | ||
2327 | spin_lock_init(&hctx->dispatch_wait_lock); | 2374 | spin_lock_init(&hctx->dispatch_wait_lock); |
2328 | init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); | 2375 | init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); |
2329 | INIT_LIST_HEAD(&hctx->dispatch_wait.entry); | 2376 | INIT_LIST_HEAD(&hctx->dispatch_wait.entry); |
2330 | 2377 | ||
2331 | if (set->ops->init_hctx && | ||
2332 | set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) | ||
2333 | goto free_bitmap; | ||
2334 | |||
2335 | hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, | 2378 | hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, |
2336 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); | 2379 | gfp); |
2337 | if (!hctx->fq) | 2380 | if (!hctx->fq) |
2338 | goto exit_hctx; | 2381 | goto free_bitmap; |
2339 | |||
2340 | if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node)) | ||
2341 | goto free_fq; | ||
2342 | 2382 | ||
2343 | if (hctx->flags & BLK_MQ_F_BLOCKING) | 2383 | if (hctx->flags & BLK_MQ_F_BLOCKING) |
2344 | init_srcu_struct(hctx->srcu); | 2384 | init_srcu_struct(hctx->srcu); |
2385 | blk_mq_hctx_kobj_init(hctx); | ||
2345 | 2386 | ||
2346 | return 0; | 2387 | return hctx; |
2347 | 2388 | ||
2348 | free_fq: | ||
2349 | blk_free_flush_queue(hctx->fq); | ||
2350 | exit_hctx: | ||
2351 | if (set->ops->exit_hctx) | ||
2352 | set->ops->exit_hctx(hctx, hctx_idx); | ||
2353 | free_bitmap: | 2389 | free_bitmap: |
2354 | sbitmap_free(&hctx->ctx_map); | 2390 | sbitmap_free(&hctx->ctx_map); |
2355 | free_ctxs: | 2391 | free_ctxs: |
2356 | kfree(hctx->ctxs); | 2392 | kfree(hctx->ctxs); |
2357 | unregister_cpu_notifier: | 2393 | free_cpumask: |
2358 | blk_mq_remove_cpuhp(hctx); | 2394 | free_cpumask_var(hctx->cpumask); |
2359 | return -1; | 2395 | free_hctx: |
2396 | kfree(hctx); | ||
2397 | fail_alloc_hctx: | ||
2398 | return NULL; | ||
2360 | } | 2399 | } |
2361 | 2400 | ||
2362 | static void blk_mq_init_cpu_queues(struct request_queue *q, | 2401 | static void blk_mq_init_cpu_queues(struct request_queue *q, |
@@ -2631,13 +2670,17 @@ static int blk_mq_alloc_ctxs(struct request_queue *q) | |||
2631 | */ | 2670 | */ |
2632 | void blk_mq_release(struct request_queue *q) | 2671 | void blk_mq_release(struct request_queue *q) |
2633 | { | 2672 | { |
2634 | struct blk_mq_hw_ctx *hctx; | 2673 | struct blk_mq_hw_ctx *hctx, *next; |
2635 | unsigned int i; | 2674 | int i; |
2636 | 2675 | ||
2637 | /* hctx kobj stays in hctx */ | 2676 | cancel_delayed_work_sync(&q->requeue_work); |
2638 | queue_for_each_hw_ctx(q, hctx, i) { | 2677 | |
2639 | if (!hctx) | 2678 | queue_for_each_hw_ctx(q, hctx, i) |
2640 | continue; | 2679 | WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list)); |
2680 | |||
2681 | /* all hctx are in .unused_hctx_list now */ | ||
2682 | list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) { | ||
2683 | list_del_init(&hctx->hctx_list); | ||
2641 | kobject_put(&hctx->kobj); | 2684 | kobject_put(&hctx->kobj); |
2642 | } | 2685 | } |
2643 | 2686 | ||
@@ -2700,51 +2743,38 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, | |||
2700 | } | 2743 | } |
2701 | EXPORT_SYMBOL(blk_mq_init_sq_queue); | 2744 | EXPORT_SYMBOL(blk_mq_init_sq_queue); |
2702 | 2745 | ||
2703 | static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) | ||
2704 | { | ||
2705 | int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); | ||
2706 | |||
2707 | BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu), | ||
2708 | __alignof__(struct blk_mq_hw_ctx)) != | ||
2709 | sizeof(struct blk_mq_hw_ctx)); | ||
2710 | |||
2711 | if (tag_set->flags & BLK_MQ_F_BLOCKING) | ||
2712 | hw_ctx_size += sizeof(struct srcu_struct); | ||
2713 | |||
2714 | return hw_ctx_size; | ||
2715 | } | ||
2716 | |||
2717 | static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( | 2746 | static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( |
2718 | struct blk_mq_tag_set *set, struct request_queue *q, | 2747 | struct blk_mq_tag_set *set, struct request_queue *q, |
2719 | int hctx_idx, int node) | 2748 | int hctx_idx, int node) |
2720 | { | 2749 | { |
2721 | struct blk_mq_hw_ctx *hctx; | 2750 | struct blk_mq_hw_ctx *hctx = NULL, *tmp; |
2722 | |||
2723 | hctx = kzalloc_node(blk_mq_hw_ctx_size(set), | ||
2724 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | ||
2725 | node); | ||
2726 | if (!hctx) | ||
2727 | return NULL; | ||
2728 | 2751 | ||
2729 | if (!zalloc_cpumask_var_node(&hctx->cpumask, | 2752 | /* reuse dead hctx first */ |
2730 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | 2753 | spin_lock(&q->unused_hctx_lock); |
2731 | node)) { | 2754 | list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) { |
2732 | kfree(hctx); | 2755 | if (tmp->numa_node == node) { |
2733 | return NULL; | 2756 | hctx = tmp; |
2757 | break; | ||
2758 | } | ||
2734 | } | 2759 | } |
2760 | if (hctx) | ||
2761 | list_del_init(&hctx->hctx_list); | ||
2762 | spin_unlock(&q->unused_hctx_lock); | ||
2735 | 2763 | ||
2736 | atomic_set(&hctx->nr_active, 0); | 2764 | if (!hctx) |
2737 | hctx->numa_node = node; | 2765 | hctx = blk_mq_alloc_hctx(q, set, node); |
2738 | hctx->queue_num = hctx_idx; | 2766 | if (!hctx) |
2767 | goto fail; | ||
2739 | 2768 | ||
2740 | if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) { | 2769 | if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) |
2741 | free_cpumask_var(hctx->cpumask); | 2770 | goto free_hctx; |
2742 | kfree(hctx); | ||
2743 | return NULL; | ||
2744 | } | ||
2745 | blk_mq_hctx_kobj_init(hctx); | ||
2746 | 2771 | ||
2747 | return hctx; | 2772 | return hctx; |
2773 | |||
2774 | free_hctx: | ||
2775 | kobject_put(&hctx->kobj); | ||
2776 | fail: | ||
2777 | return NULL; | ||
2748 | } | 2778 | } |
2749 | 2779 | ||
2750 | static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, | 2780 | static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, |
@@ -2770,10 +2800,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, | |||
2770 | 2800 | ||
2771 | hctx = blk_mq_alloc_and_init_hctx(set, q, i, node); | 2801 | hctx = blk_mq_alloc_and_init_hctx(set, q, i, node); |
2772 | if (hctx) { | 2802 | if (hctx) { |
2773 | if (hctxs[i]) { | 2803 | if (hctxs[i]) |
2774 | blk_mq_exit_hctx(q, set, hctxs[i], i); | 2804 | blk_mq_exit_hctx(q, set, hctxs[i], i); |
2775 | kobject_put(&hctxs[i]->kobj); | ||
2776 | } | ||
2777 | hctxs[i] = hctx; | 2805 | hctxs[i] = hctx; |
2778 | } else { | 2806 | } else { |
2779 | if (hctxs[i]) | 2807 | if (hctxs[i]) |
@@ -2804,9 +2832,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, | |||
2804 | if (hctx->tags) | 2832 | if (hctx->tags) |
2805 | blk_mq_free_map_and_requests(set, j); | 2833 | blk_mq_free_map_and_requests(set, j); |
2806 | blk_mq_exit_hctx(q, set, hctx, j); | 2834 | blk_mq_exit_hctx(q, set, hctx, j); |
2807 | kobject_put(&hctx->kobj); | ||
2808 | hctxs[j] = NULL; | 2835 | hctxs[j] = NULL; |
2809 | |||
2810 | } | 2836 | } |
2811 | } | 2837 | } |
2812 | mutex_unlock(&q->sysfs_lock); | 2838 | mutex_unlock(&q->sysfs_lock); |
@@ -2849,6 +2875,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | |||
2849 | if (!q->queue_hw_ctx) | 2875 | if (!q->queue_hw_ctx) |
2850 | goto err_sys_init; | 2876 | goto err_sys_init; |
2851 | 2877 | ||
2878 | INIT_LIST_HEAD(&q->unused_hctx_list); | ||
2879 | spin_lock_init(&q->unused_hctx_lock); | ||
2880 | |||
2852 | blk_mq_realloc_hw_ctxs(set, q); | 2881 | blk_mq_realloc_hw_ctxs(set, q); |
2853 | if (!q->nr_hw_queues) | 2882 | if (!q->nr_hw_queues) |
2854 | goto err_hctxs; | 2883 | goto err_hctxs; |
@@ -2905,7 +2934,8 @@ err_exit: | |||
2905 | } | 2934 | } |
2906 | EXPORT_SYMBOL(blk_mq_init_allocated_queue); | 2935 | EXPORT_SYMBOL(blk_mq_init_allocated_queue); |
2907 | 2936 | ||
2908 | void blk_mq_free_queue(struct request_queue *q) | 2937 | /* tags can _not_ be used after returning from blk_mq_exit_queue */ |
2938 | void blk_mq_exit_queue(struct request_queue *q) | ||
2909 | { | 2939 | { |
2910 | struct blk_mq_tag_set *set = q->tag_set; | 2940 | struct blk_mq_tag_set *set = q->tag_set; |
2911 | 2941 | ||
diff --git a/block/blk-mq.h b/block/blk-mq.h index 423ea88ab6fb..633a5a77ee8b 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -37,7 +37,7 @@ struct blk_mq_ctx { | |||
37 | struct kobject kobj; | 37 | struct kobject kobj; |
38 | } ____cacheline_aligned_in_smp; | 38 | } ____cacheline_aligned_in_smp; |
39 | 39 | ||
40 | void blk_mq_free_queue(struct request_queue *q); | 40 | void blk_mq_exit_queue(struct request_queue *q); |
41 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); | 41 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); |
42 | void blk_mq_wake_waiters(struct request_queue *q); | 42 | void blk_mq_wake_waiters(struct request_queue *q); |
43 | bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); | 43 | bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); |
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index d169d7188fa6..3f55b56f24bc 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c | |||
@@ -1,3 +1,5 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
1 | #include "blk-rq-qos.h" | 3 | #include "blk-rq-qos.h" |
2 | 4 | ||
3 | /* | 5 | /* |
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 564851889550..2300e038b9fa 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | #ifndef RQ_QOS_H | 2 | #ifndef RQ_QOS_H |
2 | #define RQ_QOS_H | 3 | #define RQ_QOS_H |
3 | 4 | ||
diff --git a/block/blk-settings.c b/block/blk-settings.c index 6375afaedcec..3facc41476be 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Functions related to setting various queue properties from drivers | 3 | * Functions related to setting various queue properties from drivers |
3 | */ | 4 | */ |
@@ -663,22 +664,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | |||
663 | EXPORT_SYMBOL(disk_stack_limits); | 664 | EXPORT_SYMBOL(disk_stack_limits); |
664 | 665 | ||
665 | /** | 666 | /** |
666 | * blk_queue_dma_pad - set pad mask | ||
667 | * @q: the request queue for the device | ||
668 | * @mask: pad mask | ||
669 | * | ||
670 | * Set dma pad mask. | ||
671 | * | ||
672 | * Appending pad buffer to a request modifies the last entry of a | ||
673 | * scatter list such that it includes the pad buffer. | ||
674 | **/ | ||
675 | void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) | ||
676 | { | ||
677 | q->dma_pad_mask = mask; | ||
678 | } | ||
679 | EXPORT_SYMBOL(blk_queue_dma_pad); | ||
680 | |||
681 | /** | ||
682 | * blk_queue_update_dma_pad - update pad mask | 667 | * blk_queue_update_dma_pad - update pad mask |
683 | * @q: the request queue for the device | 668 | * @q: the request queue for the device |
684 | * @mask: pad mask | 669 | * @mask: pad mask |
diff --git a/block/blk-stat.c b/block/blk-stat.c index 696a04176e4d..940f15d600f8 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Block stat tracking code | 3 | * Block stat tracking code |
3 | * | 4 | * |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 7a95a1eb27e1..a16a02c52a85 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -728,7 +728,7 @@ static struct queue_sysfs_entry throtl_sample_time_entry = { | |||
728 | }; | 728 | }; |
729 | #endif | 729 | #endif |
730 | 730 | ||
731 | static struct attribute *default_attrs[] = { | 731 | static struct attribute *queue_attrs[] = { |
732 | &queue_requests_entry.attr, | 732 | &queue_requests_entry.attr, |
733 | &queue_ra_entry.attr, | 733 | &queue_ra_entry.attr, |
734 | &queue_max_hw_sectors_entry.attr, | 734 | &queue_max_hw_sectors_entry.attr, |
@@ -769,7 +769,25 @@ static struct attribute *default_attrs[] = { | |||
769 | #endif | 769 | #endif |
770 | NULL, | 770 | NULL, |
771 | }; | 771 | }; |
772 | ATTRIBUTE_GROUPS(default); | 772 | |
773 | static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, | ||
774 | int n) | ||
775 | { | ||
776 | struct request_queue *q = | ||
777 | container_of(kobj, struct request_queue, kobj); | ||
778 | |||
779 | if (attr == &queue_io_timeout_entry.attr && | ||
780 | (!q->mq_ops || !q->mq_ops->timeout)) | ||
781 | return 0; | ||
782 | |||
783 | return attr->mode; | ||
784 | } | ||
785 | |||
786 | static struct attribute_group queue_attr_group = { | ||
787 | .attrs = queue_attrs, | ||
788 | .is_visible = queue_attr_visible, | ||
789 | }; | ||
790 | |||
773 | 791 | ||
774 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) | 792 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) |
775 | 793 | ||
@@ -891,7 +909,6 @@ static const struct sysfs_ops queue_sysfs_ops = { | |||
891 | 909 | ||
892 | struct kobj_type blk_queue_ktype = { | 910 | struct kobj_type blk_queue_ktype = { |
893 | .sysfs_ops = &queue_sysfs_ops, | 911 | .sysfs_ops = &queue_sysfs_ops, |
894 | .default_groups = default_groups, | ||
895 | .release = blk_release_queue, | 912 | .release = blk_release_queue, |
896 | }; | 913 | }; |
897 | 914 | ||
@@ -940,6 +957,14 @@ int blk_register_queue(struct gendisk *disk) | |||
940 | goto unlock; | 957 | goto unlock; |
941 | } | 958 | } |
942 | 959 | ||
960 | ret = sysfs_create_group(&q->kobj, &queue_attr_group); | ||
961 | if (ret) { | ||
962 | blk_trace_remove_sysfs(dev); | ||
963 | kobject_del(&q->kobj); | ||
964 | kobject_put(&dev->kobj); | ||
965 | goto unlock; | ||
966 | } | ||
967 | |||
943 | if (queue_is_mq(q)) { | 968 | if (queue_is_mq(q)) { |
944 | __blk_mq_register_dev(dev, q); | 969 | __blk_mq_register_dev(dev, q); |
945 | blk_mq_debugfs_register(q); | 970 | blk_mq_debugfs_register(q); |
diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 124c26128bf6..8aa68fae96ad 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Functions related to generic timeout handling of requests. | 3 | * Functions related to generic timeout handling of requests. |
3 | */ | 4 | */ |
diff --git a/block/blk-wbt.c b/block/blk-wbt.c index fd166fbb0f65..313f45a37e9d 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * buffered writeback throttling. loosely based on CoDel. We can't drop | 3 | * buffered writeback throttling. loosely based on CoDel. We can't drop |
3 | * packets for IO scheduling, so the logic is something like this: | 4 | * packets for IO scheduling, so the logic is something like this: |
diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 2d98803faec2..ae7e91bd0618 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Zoned block device handling | 3 | * Zoned block device handling |
3 | * | 4 | * |
diff --git a/block/blk.h b/block/blk.h index 5d636ee41663..e27fd1512e4b 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -75,7 +75,7 @@ static inline bool biovec_phys_mergeable(struct request_queue *q, | |||
75 | 75 | ||
76 | if (addr1 + vec1->bv_len != addr2) | 76 | if (addr1 + vec1->bv_len != addr2) |
77 | return false; | 77 | return false; |
78 | if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2)) | 78 | if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page)) |
79 | return false; | 79 | return false; |
80 | if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) | 80 | if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) |
81 | return false; | 81 | return false; |
diff --git a/block/bounce.c b/block/bounce.c index 47eb7e936e22..f8ed677a1bf7 100644 --- a/block/bounce.c +++ b/block/bounce.c | |||
@@ -163,14 +163,13 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool) | |||
163 | { | 163 | { |
164 | struct bio *bio_orig = bio->bi_private; | 164 | struct bio *bio_orig = bio->bi_private; |
165 | struct bio_vec *bvec, orig_vec; | 165 | struct bio_vec *bvec, orig_vec; |
166 | int i; | ||
167 | struct bvec_iter orig_iter = bio_orig->bi_iter; | 166 | struct bvec_iter orig_iter = bio_orig->bi_iter; |
168 | struct bvec_iter_all iter_all; | 167 | struct bvec_iter_all iter_all; |
169 | 168 | ||
170 | /* | 169 | /* |
171 | * free up bounce indirect pages used | 170 | * free up bounce indirect pages used |
172 | */ | 171 | */ |
173 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 172 | bio_for_each_segment_all(bvec, bio, iter_all) { |
174 | orig_vec = bio_iter_iovec(bio_orig, orig_iter); | 173 | orig_vec = bio_iter_iovec(bio_orig, orig_iter); |
175 | if (bvec->bv_page != orig_vec.bv_page) { | 174 | if (bvec->bv_page != orig_vec.bv_page) { |
176 | dec_zone_page_state(bvec->bv_page, NR_BOUNCE); | 175 | dec_zone_page_state(bvec->bv_page, NR_BOUNCE); |
diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 005e2b75d775..b898a1cdf872 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c | |||
@@ -1,24 +1,10 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
1 | /* | 2 | /* |
2 | * BSG helper library | 3 | * BSG helper library |
3 | * | 4 | * |
4 | * Copyright (C) 2008 James Smart, Emulex Corporation | 5 | * Copyright (C) 2008 James Smart, Emulex Corporation |
5 | * Copyright (C) 2011 Red Hat, Inc. All rights reserved. | 6 | * Copyright (C) 2011 Red Hat, Inc. All rights reserved. |
6 | * Copyright (C) 2011 Mike Christie | 7 | * Copyright (C) 2011 Mike Christie |
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | * | ||
22 | */ | 8 | */ |
23 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
24 | #include <linux/blk-mq.h> | 10 | #include <linux/blk-mq.h> |
diff --git a/block/bsg.c b/block/bsg.c index f306853c6b08..833c44b3d458 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -1,13 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * bsg.c - block layer implementation of the sg v4 interface | 3 | * bsg.c - block layer implementation of the sg v4 interface |
3 | * | ||
4 | * Copyright (C) 2004 Jens Axboe <axboe@suse.de> SUSE Labs | ||
5 | * Copyright (C) 2004 Peter M. Jones <pjones@redhat.com> | ||
6 | * | ||
7 | * This file is subject to the terms and conditions of the GNU General Public | ||
8 | * License version 2. See the file "COPYING" in the main directory of this | ||
9 | * archive for more details. | ||
10 | * | ||
11 | */ | 4 | */ |
12 | #include <linux/module.h> | 5 | #include <linux/module.h> |
13 | #include <linux/init.h> | 6 | #include <linux/init.h> |
diff --git a/block/elevator.c b/block/elevator.c index d6d835a08de6..ec55d5fc0b3e 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Block device elevator/IO-scheduler. | 3 | * Block device elevator/IO-scheduler. |
3 | * | 4 | * |
@@ -509,8 +510,6 @@ void elv_unregister_queue(struct request_queue *q) | |||
509 | 510 | ||
510 | int elv_register(struct elevator_type *e) | 511 | int elv_register(struct elevator_type *e) |
511 | { | 512 | { |
512 | char *def = ""; | ||
513 | |||
514 | /* create icq_cache if requested */ | 513 | /* create icq_cache if requested */ |
515 | if (e->icq_size) { | 514 | if (e->icq_size) { |
516 | if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || | 515 | if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || |
@@ -535,8 +534,8 @@ int elv_register(struct elevator_type *e) | |||
535 | list_add_tail(&e->list, &elv_list); | 534 | list_add_tail(&e->list, &elv_list); |
536 | spin_unlock(&elv_list_lock); | 535 | spin_unlock(&elv_list_lock); |
537 | 536 | ||
538 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, | 537 | printk(KERN_INFO "io scheduler %s registered\n", e->elevator_name); |
539 | def); | 538 | |
540 | return 0; | 539 | return 0; |
541 | } | 540 | } |
542 | EXPORT_SYMBOL_GPL(elv_register); | 541 | EXPORT_SYMBOL_GPL(elv_register); |
diff --git a/block/genhd.c b/block/genhd.c index 703267865f14..ad6826628e79 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * gendisk handling | 3 | * gendisk handling |
3 | */ | 4 | */ |
@@ -531,6 +532,18 @@ void blk_free_devt(dev_t devt) | |||
531 | } | 532 | } |
532 | } | 533 | } |
533 | 534 | ||
535 | /** | ||
536 | * We invalidate devt by assigning NULL pointer for devt in idr. | ||
537 | */ | ||
538 | void blk_invalidate_devt(dev_t devt) | ||
539 | { | ||
540 | if (MAJOR(devt) == BLOCK_EXT_MAJOR) { | ||
541 | spin_lock_bh(&ext_devt_lock); | ||
542 | idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt))); | ||
543 | spin_unlock_bh(&ext_devt_lock); | ||
544 | } | ||
545 | } | ||
546 | |||
534 | static char *bdevt_str(dev_t devt, char *buf) | 547 | static char *bdevt_str(dev_t devt, char *buf) |
535 | { | 548 | { |
536 | if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { | 549 | if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { |
@@ -793,6 +806,13 @@ void del_gendisk(struct gendisk *disk) | |||
793 | 806 | ||
794 | if (!(disk->flags & GENHD_FL_HIDDEN)) | 807 | if (!(disk->flags & GENHD_FL_HIDDEN)) |
795 | blk_unregister_region(disk_devt(disk), disk->minors); | 808 | blk_unregister_region(disk_devt(disk), disk->minors); |
809 | /* | ||
810 | * Remove gendisk pointer from idr so that it cannot be looked up | ||
811 | * while RCU period before freeing gendisk is running to prevent | ||
812 | * use-after-free issues. Note that the device number stays | ||
813 | * "in-use" until we really free the gendisk. | ||
814 | */ | ||
815 | blk_invalidate_devt(disk_devt(disk)); | ||
796 | 816 | ||
797 | kobject_put(disk->part0.holder_dir); | 817 | kobject_put(disk->part0.holder_dir); |
798 | kobject_put(disk->slave_dir); | 818 | kobject_put(disk->slave_dir); |
@@ -1628,12 +1648,11 @@ static unsigned long disk_events_poll_jiffies(struct gendisk *disk) | |||
1628 | 1648 | ||
1629 | /* | 1649 | /* |
1630 | * If device-specific poll interval is set, always use it. If | 1650 | * If device-specific poll interval is set, always use it. If |
1631 | * the default is being used, poll iff there are events which | 1651 | * the default is being used, poll if the POLL flag is set. |
1632 | * can't be monitored asynchronously. | ||
1633 | */ | 1652 | */ |
1634 | if (ev->poll_msecs >= 0) | 1653 | if (ev->poll_msecs >= 0) |
1635 | intv_msecs = ev->poll_msecs; | 1654 | intv_msecs = ev->poll_msecs; |
1636 | else if (disk->events & ~disk->async_events) | 1655 | else if (disk->event_flags & DISK_EVENT_FLAG_POLL) |
1637 | intv_msecs = disk_events_dfl_poll_msecs; | 1656 | intv_msecs = disk_events_dfl_poll_msecs; |
1638 | 1657 | ||
1639 | return msecs_to_jiffies(intv_msecs); | 1658 | return msecs_to_jiffies(intv_msecs); |
@@ -1843,11 +1862,13 @@ static void disk_check_events(struct disk_events *ev, | |||
1843 | 1862 | ||
1844 | /* | 1863 | /* |
1845 | * Tell userland about new events. Only the events listed in | 1864 | * Tell userland about new events. Only the events listed in |
1846 | * @disk->events are reported. Unlisted events are processed the | 1865 | * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT |
1847 | * same internally but never get reported to userland. | 1866 | * is set. Otherwise, events are processed internally but never |
1867 | * get reported to userland. | ||
1848 | */ | 1868 | */ |
1849 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) | 1869 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) |
1850 | if (events & disk->events & (1 << i)) | 1870 | if ((events & disk->events & (1 << i)) && |
1871 | (disk->event_flags & DISK_EVENT_FLAG_UEVENT)) | ||
1851 | envp[nr_events++] = disk_uevents[i]; | 1872 | envp[nr_events++] = disk_uevents[i]; |
1852 | 1873 | ||
1853 | if (nr_events) | 1874 | if (nr_events) |
@@ -1860,6 +1881,7 @@ static void disk_check_events(struct disk_events *ev, | |||
1860 | * | 1881 | * |
1861 | * events : list of all supported events | 1882 | * events : list of all supported events |
1862 | * events_async : list of events which can be detected w/o polling | 1883 | * events_async : list of events which can be detected w/o polling |
1884 | * (always empty, only for backwards compatibility) | ||
1863 | * events_poll_msecs : polling interval, 0: disable, -1: system default | 1885 | * events_poll_msecs : polling interval, 0: disable, -1: system default |
1864 | */ | 1886 | */ |
1865 | static ssize_t __disk_events_show(unsigned int events, char *buf) | 1887 | static ssize_t __disk_events_show(unsigned int events, char *buf) |
@@ -1884,15 +1906,16 @@ static ssize_t disk_events_show(struct device *dev, | |||
1884 | { | 1906 | { |
1885 | struct gendisk *disk = dev_to_disk(dev); | 1907 | struct gendisk *disk = dev_to_disk(dev); |
1886 | 1908 | ||
1909 | if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT)) | ||
1910 | return 0; | ||
1911 | |||
1887 | return __disk_events_show(disk->events, buf); | 1912 | return __disk_events_show(disk->events, buf); |
1888 | } | 1913 | } |
1889 | 1914 | ||
1890 | static ssize_t disk_events_async_show(struct device *dev, | 1915 | static ssize_t disk_events_async_show(struct device *dev, |
1891 | struct device_attribute *attr, char *buf) | 1916 | struct device_attribute *attr, char *buf) |
1892 | { | 1917 | { |
1893 | struct gendisk *disk = dev_to_disk(dev); | 1918 | return 0; |
1894 | |||
1895 | return __disk_events_show(disk->async_events, buf); | ||
1896 | } | 1919 | } |
1897 | 1920 | ||
1898 | static ssize_t disk_events_poll_msecs_show(struct device *dev, | 1921 | static ssize_t disk_events_poll_msecs_show(struct device *dev, |
@@ -1901,6 +1924,9 @@ static ssize_t disk_events_poll_msecs_show(struct device *dev, | |||
1901 | { | 1924 | { |
1902 | struct gendisk *disk = dev_to_disk(dev); | 1925 | struct gendisk *disk = dev_to_disk(dev); |
1903 | 1926 | ||
1927 | if (!disk->ev) | ||
1928 | return sprintf(buf, "-1\n"); | ||
1929 | |||
1904 | return sprintf(buf, "%ld\n", disk->ev->poll_msecs); | 1930 | return sprintf(buf, "%ld\n", disk->ev->poll_msecs); |
1905 | } | 1931 | } |
1906 | 1932 | ||
@@ -1917,6 +1943,9 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev, | |||
1917 | if (intv < 0 && intv != -1) | 1943 | if (intv < 0 && intv != -1) |
1918 | return -EINVAL; | 1944 | return -EINVAL; |
1919 | 1945 | ||
1946 | if (!disk->ev) | ||
1947 | return -ENODEV; | ||
1948 | |||
1920 | disk_block_events(disk); | 1949 | disk_block_events(disk); |
1921 | disk->ev->poll_msecs = intv; | 1950 | disk->ev->poll_msecs = intv; |
1922 | __disk_unblock_events(disk, true); | 1951 | __disk_unblock_events(disk, true); |
@@ -1981,7 +2010,7 @@ static void disk_alloc_events(struct gendisk *disk) | |||
1981 | { | 2010 | { |
1982 | struct disk_events *ev; | 2011 | struct disk_events *ev; |
1983 | 2012 | ||
1984 | if (!disk->fops->check_events) | 2013 | if (!disk->fops->check_events || !disk->events) |
1985 | return; | 2014 | return; |
1986 | 2015 | ||
1987 | ev = kzalloc(sizeof(*ev), GFP_KERNEL); | 2016 | ev = kzalloc(sizeof(*ev), GFP_KERNEL); |
@@ -2003,14 +2032,14 @@ static void disk_alloc_events(struct gendisk *disk) | |||
2003 | 2032 | ||
2004 | static void disk_add_events(struct gendisk *disk) | 2033 | static void disk_add_events(struct gendisk *disk) |
2005 | { | 2034 | { |
2006 | if (!disk->ev) | ||
2007 | return; | ||
2008 | |||
2009 | /* FIXME: error handling */ | 2035 | /* FIXME: error handling */ |
2010 | if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0) | 2036 | if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0) |
2011 | pr_warn("%s: failed to create sysfs files for events\n", | 2037 | pr_warn("%s: failed to create sysfs files for events\n", |
2012 | disk->disk_name); | 2038 | disk->disk_name); |
2013 | 2039 | ||
2040 | if (!disk->ev) | ||
2041 | return; | ||
2042 | |||
2014 | mutex_lock(&disk_events_mutex); | 2043 | mutex_lock(&disk_events_mutex); |
2015 | list_add_tail(&disk->ev->node, &disk_events); | 2044 | list_add_tail(&disk->ev->node, &disk_events); |
2016 | mutex_unlock(&disk_events_mutex); | 2045 | mutex_unlock(&disk_events_mutex); |
@@ -2024,14 +2053,13 @@ static void disk_add_events(struct gendisk *disk) | |||
2024 | 2053 | ||
2025 | static void disk_del_events(struct gendisk *disk) | 2054 | static void disk_del_events(struct gendisk *disk) |
2026 | { | 2055 | { |
2027 | if (!disk->ev) | 2056 | if (disk->ev) { |
2028 | return; | 2057 | disk_block_events(disk); |
2029 | |||
2030 | disk_block_events(disk); | ||
2031 | 2058 | ||
2032 | mutex_lock(&disk_events_mutex); | 2059 | mutex_lock(&disk_events_mutex); |
2033 | list_del_init(&disk->ev->node); | 2060 | list_del_init(&disk->ev->node); |
2034 | mutex_unlock(&disk_events_mutex); | 2061 | mutex_unlock(&disk_events_mutex); |
2062 | } | ||
2035 | 2063 | ||
2036 | sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); | 2064 | sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); |
2037 | } | 2065 | } |
diff --git a/block/ioctl.c b/block/ioctl.c index 4825c78a6baa..15a0eb80ada9 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | #include <linux/capability.h> | 2 | #include <linux/capability.h> |
2 | #include <linux/blkdev.h> | 3 | #include <linux/blkdev.h> |
3 | #include <linux/export.h> | 4 | #include <linux/export.h> |
diff --git a/block/ioprio.c b/block/ioprio.c index f9821080c92c..2e0559f157c8 100644 --- a/block/ioprio.c +++ b/block/ioprio.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * fs/ioprio.c | 3 | * fs/ioprio.c |
3 | * | 4 | * |
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index ec6a04e01bc1..c3b05119cebd 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c | |||
@@ -1,20 +1,9 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * The Kyber I/O scheduler. Controls latency by throttling queue depths using | 3 | * The Kyber I/O scheduler. Controls latency by throttling queue depths using |
3 | * scalable techniques. | 4 | * scalable techniques. |
4 | * | 5 | * |
5 | * Copyright (C) 2017 Facebook | 6 | * Copyright (C) 2017 Facebook |
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public | ||
9 | * License v2 as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
18 | */ | 7 | */ |
19 | 8 | ||
20 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 14288f864e94..1876f5712bfd 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, | 3 | * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, |
3 | * for the blk-mq scheduling framework | 4 | * for the blk-mq scheduling framework |
diff --git a/block/opal_proto.h b/block/opal_proto.h index e20be8258854..d9a05ad02eb5 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h | |||
@@ -1,18 +1,10 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * Copyright © 2016 Intel Corporation | 3 | * Copyright © 2016 Intel Corporation |
3 | * | 4 | * |
4 | * Authors: | 5 | * Authors: |
5 | * Rafael Antognolli <rafael.antognolli@intel.com> | 6 | * Rafael Antognolli <rafael.antognolli@intel.com> |
6 | * Scott Bauer <scott.bauer@intel.com> | 7 | * Scott Bauer <scott.bauer@intel.com> |
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | 8 | */ |
17 | #include <linux/types.h> | 9 | #include <linux/types.h> |
18 | 10 | ||
@@ -170,6 +162,8 @@ enum opal_token { | |||
170 | OPAL_READLOCKED = 0x07, | 162 | OPAL_READLOCKED = 0x07, |
171 | OPAL_WRITELOCKED = 0x08, | 163 | OPAL_WRITELOCKED = 0x08, |
172 | OPAL_ACTIVEKEY = 0x0A, | 164 | OPAL_ACTIVEKEY = 0x0A, |
165 | /* lockingsp table */ | ||
166 | OPAL_LIFECYCLE = 0x06, | ||
173 | /* locking info table */ | 167 | /* locking info table */ |
174 | OPAL_MAXRANGES = 0x04, | 168 | OPAL_MAXRANGES = 0x04, |
175 | /* mbr control */ | 169 | /* mbr control */ |
diff --git a/block/partition-generic.c b/block/partition-generic.c index 8e596a8dff32..aee643ce13d1 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
@@ -285,6 +285,13 @@ void delete_partition(struct gendisk *disk, int partno) | |||
285 | kobject_put(part->holder_dir); | 285 | kobject_put(part->holder_dir); |
286 | device_del(part_to_dev(part)); | 286 | device_del(part_to_dev(part)); |
287 | 287 | ||
288 | /* | ||
289 | * Remove gendisk pointer from idr so that it cannot be looked up | ||
290 | * while RCU period before freeing gendisk is running to prevent | ||
291 | * use-after-free issues. Note that the device number stays | ||
292 | * "in-use" until we really free the gendisk. | ||
293 | */ | ||
294 | blk_invalidate_devt(part_devt(part)); | ||
288 | hd_struct_kill(part); | 295 | hd_struct_kill(part); |
289 | } | 296 | } |
290 | 297 | ||
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c index fbeb697374d5..7587700fad4a 100644 --- a/block/partitions/acorn.c +++ b/block/partitions/acorn.c | |||
@@ -1,12 +1,7 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * linux/fs/partitions/acorn.c | ||
3 | * | ||
4 | * Copyright (c) 1996-2000 Russell King. | 3 | * Copyright (c) 1996-2000 Russell King. |
5 | * | 4 | * |
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * Scan ADFS partitions on hard disk drives. Unfortunately, there | 5 | * Scan ADFS partitions on hard disk drives. Unfortunately, there |
11 | * isn't a standard for partitioning drives on Acorn machines, so | 6 | * isn't a standard for partitioning drives on Acorn machines, so |
12 | * every single manufacturer of SCSI and IDE cards created their own | 7 | * every single manufacturer of SCSI and IDE cards created their own |
diff --git a/block/partitions/aix.h b/block/partitions/aix.h index e0c66a987523..b4449f0b9f2b 100644 --- a/block/partitions/aix.h +++ b/block/partitions/aix.h | |||
@@ -1 +1,2 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | extern int aix_partition(struct parsed_partitions *state); | 2 | extern int aix_partition(struct parsed_partitions *state); |
diff --git a/block/partitions/amiga.h b/block/partitions/amiga.h index d094585cadaa..7e63f4d9d969 100644 --- a/block/partitions/amiga.h +++ b/block/partitions/amiga.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * fs/partitions/amiga.h | 3 | * fs/partitions/amiga.h |
3 | */ | 4 | */ |
diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 39f70d968754..db2fef7dfc47 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
1 | /************************************************************ | 2 | /************************************************************ |
2 | * EFI GUID Partition Table handling | 3 | * EFI GUID Partition Table handling |
3 | * | 4 | * |
@@ -7,21 +8,6 @@ | |||
7 | * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com> | 8 | * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com> |
8 | * Copyright 2000,2001,2002,2004 Dell Inc. | 9 | * Copyright 2000,2001,2002,2004 Dell Inc. |
9 | * | 10 | * |
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | * | ||
24 | * | ||
25 | * TODO: | 11 | * TODO: |
26 | * | 12 | * |
27 | * Changelog: | 13 | * Changelog: |
diff --git a/block/partitions/efi.h b/block/partitions/efi.h index abd0b19288a6..3e8576157575 100644 --- a/block/partitions/efi.h +++ b/block/partitions/efi.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
1 | /************************************************************ | 2 | /************************************************************ |
2 | * EFI GUID Partition Table | 3 | * EFI GUID Partition Table |
3 | * Per Intel EFI Specification v1.02 | 4 | * Per Intel EFI Specification v1.02 |
@@ -5,21 +6,6 @@ | |||
5 | * | 6 | * |
6 | * By Matt Domsch <Matt_Domsch@dell.com> Fri Sep 22 22:15:56 CDT 2000 | 7 | * By Matt Domsch <Matt_Domsch@dell.com> Fri Sep 22 22:15:56 CDT 2000 |
7 | * Copyright 2000,2001 Dell Inc. | 8 | * Copyright 2000,2001 Dell Inc. |
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | * | ||
23 | ************************************************************/ | 9 | ************************************************************/ |
24 | 10 | ||
25 | #ifndef FS_PART_EFI_H_INCLUDED | 11 | #ifndef FS_PART_EFI_H_INCLUDED |
diff --git a/block/partitions/ibm.h b/block/partitions/ibm.h index 08fb0804a812..8bf13febb2b6 100644 --- a/block/partitions/ibm.h +++ b/block/partitions/ibm.h | |||
@@ -1 +1,2 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | int ibm_partition(struct parsed_partitions *); | 2 | int ibm_partition(struct parsed_partitions *); |
diff --git a/block/partitions/karma.h b/block/partitions/karma.h index c764b2e9df21..48e074d417fb 100644 --- a/block/partitions/karma.h +++ b/block/partitions/karma.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * fs/partitions/karma.h | 3 | * fs/partitions/karma.h |
3 | */ | 4 | */ |
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index 16766f267559..6db573f33219 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
1 | /** | 2 | /** |
2 | * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) | 3 | * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) |
3 | * | 4 | * |
@@ -6,21 +7,6 @@ | |||
6 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> | 7 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> |
7 | * | 8 | * |
8 | * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads | 9 | * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads |
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it under | ||
11 | * the terms of the GNU General Public License as published by the Free Software | ||
12 | * Foundation; either version 2 of the License, or (at your option) any later | ||
13 | * version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
17 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
18 | * details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License along with | ||
21 | * this program (in the main directory of the source in the file COPYING); if | ||
22 | * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, | ||
23 | * Boston, MA 02111-1307 USA | ||
24 | */ | 10 | */ |
25 | 11 | ||
26 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index f4c6055df956..1ca63e97bccc 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h | |||
@@ -1,3 +1,4 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
1 | /** | 2 | /** |
2 | * ldm - Part of the Linux-NTFS project. | 3 | * ldm - Part of the Linux-NTFS project. |
3 | * | 4 | * |
@@ -6,21 +7,6 @@ | |||
6 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> | 7 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> |
7 | * | 8 | * |
8 | * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads | 9 | * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads |
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the Free | ||
12 | * Software Foundation; either version 2 of the License, or (at your option) | ||
13 | * any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program (in the main directory of the Linux-NTFS source | ||
22 | * in the file COPYING); if not, write to the Free Software Foundation, | ||
23 | * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
24 | */ | 10 | */ |
25 | 11 | ||
26 | #ifndef _FS_PT_LDM_H_ | 12 | #ifndef _FS_PT_LDM_H_ |
diff --git a/block/partitions/msdos.h b/block/partitions/msdos.h index 38c781c490b3..fcacfc486092 100644 --- a/block/partitions/msdos.h +++ b/block/partitions/msdos.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * fs/partitions/msdos.h | 3 | * fs/partitions/msdos.h |
3 | */ | 4 | */ |
diff --git a/block/partitions/osf.h b/block/partitions/osf.h index 20ed2315ec16..4d8088e7ea8c 100644 --- a/block/partitions/osf.h +++ b/block/partitions/osf.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * fs/partitions/osf.h | 3 | * fs/partitions/osf.h |
3 | */ | 4 | */ |
diff --git a/block/partitions/sgi.h b/block/partitions/sgi.h index b9553ebdd5a9..a5b77c3987cf 100644 --- a/block/partitions/sgi.h +++ b/block/partitions/sgi.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * fs/partitions/sgi.h | 3 | * fs/partitions/sgi.h |
3 | */ | 4 | */ |
diff --git a/block/partitions/sun.h b/block/partitions/sun.h index 2424baa8319f..ae1b9eed3fd7 100644 --- a/block/partitions/sun.h +++ b/block/partitions/sun.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * fs/partitions/sun.h | 3 | * fs/partitions/sun.h |
3 | */ | 4 | */ |
diff --git a/block/partitions/sysv68.h b/block/partitions/sysv68.h index bf2f5ffa97ac..4fb6b8ec78ae 100644 --- a/block/partitions/sysv68.h +++ b/block/partitions/sysv68.h | |||
@@ -1 +1,2 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | extern int sysv68_partition(struct parsed_partitions *state); | 2 | extern int sysv68_partition(struct parsed_partitions *state); |
diff --git a/block/partitions/ultrix.h b/block/partitions/ultrix.h index a3cc00b2bded..9f676cead222 100644 --- a/block/partitions/ultrix.h +++ b/block/partitions/ultrix.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * fs/partitions/ultrix.h | 3 | * fs/partitions/ultrix.h |
3 | */ | 4 | */ |
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 533f4aee8567..f5e0ad65e86a 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
@@ -1,20 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright (C) 2001 Jens Axboe <axboe@suse.de> | 3 | * Copyright (C) 2001 Jens Axboe <axboe@suse.de> |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public Licens | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- | ||
17 | * | ||
18 | */ | 4 | */ |
19 | #include <linux/kernel.h> | 5 | #include <linux/kernel.h> |
20 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
diff --git a/block/sed-opal.c b/block/sed-opal.c index e0de4dd448b3..a46e8d13e16d 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c | |||
@@ -1,18 +1,10 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * Copyright © 2016 Intel Corporation | 3 | * Copyright © 2016 Intel Corporation |
3 | * | 4 | * |
4 | * Authors: | 5 | * Authors: |
5 | * Scott Bauer <scott.bauer@intel.com> | 6 | * Scott Bauer <scott.bauer@intel.com> |
6 | * Rafael Antognolli <rafael.antognolli@intel.com> | 7 | * Rafael Antognolli <rafael.antognolli@intel.com> |
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | 8 | */ |
17 | 9 | ||
18 | #define pr_fmt(fmt) KBUILD_MODNAME ":OPAL: " fmt | 10 | #define pr_fmt(fmt) KBUILD_MODNAME ":OPAL: " fmt |
@@ -85,7 +77,6 @@ struct opal_dev { | |||
85 | void *data; | 77 | void *data; |
86 | sec_send_recv *send_recv; | 78 | sec_send_recv *send_recv; |
87 | 79 | ||
88 | const struct opal_step *steps; | ||
89 | struct mutex dev_lock; | 80 | struct mutex dev_lock; |
90 | u16 comid; | 81 | u16 comid; |
91 | u32 hsn; | 82 | u32 hsn; |
@@ -157,7 +148,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = { | |||
157 | 148 | ||
158 | /* C_PIN_TABLE object ID's */ | 149 | /* C_PIN_TABLE object ID's */ |
159 | 150 | ||
160 | [OPAL_C_PIN_MSID] = | 151 | [OPAL_C_PIN_MSID] = |
161 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x84, 0x02}, | 152 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x84, 0x02}, |
162 | [OPAL_C_PIN_SID] = | 153 | [OPAL_C_PIN_SID] = |
163 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01}, | 154 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01}, |
@@ -181,7 +172,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = { | |||
181 | * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 | 172 | * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 |
182 | * Section: 6.3 Assigned UIDs | 173 | * Section: 6.3 Assigned UIDs |
183 | */ | 174 | */ |
184 | static const u8 opalmethod[][OPAL_UID_LENGTH] = { | 175 | static const u8 opalmethod[][OPAL_METHOD_LENGTH] = { |
185 | [OPAL_PROPERTIES] = | 176 | [OPAL_PROPERTIES] = |
186 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01 }, | 177 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01 }, |
187 | [OPAL_STARTSESSION] = | 178 | [OPAL_STARTSESSION] = |
@@ -217,6 +208,7 @@ static const u8 opalmethod[][OPAL_UID_LENGTH] = { | |||
217 | }; | 208 | }; |
218 | 209 | ||
219 | static int end_opal_session_error(struct opal_dev *dev); | 210 | static int end_opal_session_error(struct opal_dev *dev); |
211 | static int opal_discovery0_step(struct opal_dev *dev); | ||
220 | 212 | ||
221 | struct opal_suspend_data { | 213 | struct opal_suspend_data { |
222 | struct opal_lock_unlock unlk; | 214 | struct opal_lock_unlock unlk; |
@@ -382,37 +374,50 @@ static void check_geometry(struct opal_dev *dev, const void *data) | |||
382 | dev->lowest_lba = geo->lowest_aligned_lba; | 374 | dev->lowest_lba = geo->lowest_aligned_lba; |
383 | } | 375 | } |
384 | 376 | ||
385 | static int next(struct opal_dev *dev) | 377 | static int execute_step(struct opal_dev *dev, |
378 | const struct opal_step *step, size_t stepIndex) | ||
386 | { | 379 | { |
387 | const struct opal_step *step; | 380 | int error = step->fn(dev, step->data); |
388 | int state = 0, error = 0; | ||
389 | 381 | ||
390 | do { | 382 | if (error) { |
391 | step = &dev->steps[state]; | 383 | pr_debug("Step %zu (%pS) failed with error %d: %s\n", |
392 | if (!step->fn) | 384 | stepIndex, step->fn, error, |
393 | break; | 385 | opal_error_to_human(error)); |
386 | } | ||
394 | 387 | ||
395 | error = step->fn(dev, step->data); | 388 | return error; |
396 | if (error) { | 389 | } |
397 | pr_debug("Error on step function: %d with error %d: %s\n", | ||
398 | state, error, | ||
399 | opal_error_to_human(error)); | ||
400 | |||
401 | /* For each OPAL command we do a discovery0 then we | ||
402 | * start some sort of session. | ||
403 | * If we haven't passed state 1 then there was an error | ||
404 | * on discovery0 or during the attempt to start a | ||
405 | * session. Therefore we shouldn't attempt to terminate | ||
406 | * a session, as one has not yet been created. | ||
407 | */ | ||
408 | if (state > 1) { | ||
409 | end_opal_session_error(dev); | ||
410 | return error; | ||
411 | } | ||
412 | 390 | ||
413 | } | 391 | static int execute_steps(struct opal_dev *dev, |
414 | state++; | 392 | const struct opal_step *steps, size_t n_steps) |
415 | } while (!error); | 393 | { |
394 | size_t state = 0; | ||
395 | int error; | ||
396 | |||
397 | /* first do a discovery0 */ | ||
398 | error = opal_discovery0_step(dev); | ||
399 | if (error) | ||
400 | return error; | ||
401 | |||
402 | for (state = 0; state < n_steps; state++) { | ||
403 | error = execute_step(dev, &steps[state], state); | ||
404 | if (error) | ||
405 | goto out_error; | ||
406 | } | ||
407 | |||
408 | return 0; | ||
409 | |||
410 | out_error: | ||
411 | /* | ||
412 | * For each OPAL command the first step in steps starts some sort of | ||
413 | * session. If an error occurred in the initial discovery0 or if an | ||
414 | * error occurred in the first step (and thus stopping the loop with | ||
415 | * state == 0) then there was an error before or during the attempt to | ||
416 | * start a session. Therefore we shouldn't attempt to terminate a | ||
417 | * session, as one has not yet been created. | ||
418 | */ | ||
419 | if (state > 0) | ||
420 | end_opal_session_error(dev); | ||
416 | 421 | ||
417 | return error; | 422 | return error; |
418 | } | 423 | } |
@@ -510,15 +515,32 @@ static int opal_discovery0(struct opal_dev *dev, void *data) | |||
510 | return opal_discovery0_end(dev); | 515 | return opal_discovery0_end(dev); |
511 | } | 516 | } |
512 | 517 | ||
513 | static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok) | 518 | static int opal_discovery0_step(struct opal_dev *dev) |
519 | { | ||
520 | const struct opal_step discovery0_step = { | ||
521 | opal_discovery0, | ||
522 | }; | ||
523 | return execute_step(dev, &discovery0_step, 0); | ||
524 | } | ||
525 | |||
526 | static bool can_add(int *err, struct opal_dev *cmd, size_t len) | ||
514 | { | 527 | { |
515 | if (*err) | 528 | if (*err) |
516 | return; | 529 | return false; |
517 | if (cmd->pos >= IO_BUFFER_LENGTH - 1) { | 530 | |
518 | pr_debug("Error adding u8: end of buffer.\n"); | 531 | if (len > IO_BUFFER_LENGTH || cmd->pos > IO_BUFFER_LENGTH - len) { |
532 | pr_debug("Error adding %zu bytes: end of buffer.\n", len); | ||
519 | *err = -ERANGE; | 533 | *err = -ERANGE; |
520 | return; | 534 | return false; |
521 | } | 535 | } |
536 | |||
537 | return true; | ||
538 | } | ||
539 | |||
540 | static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok) | ||
541 | { | ||
542 | if (!can_add(err, cmd, 1)) | ||
543 | return; | ||
522 | cmd->cmd[cmd->pos++] = tok; | 544 | cmd->cmd[cmd->pos++] = tok; |
523 | } | 545 | } |
524 | 546 | ||
@@ -551,7 +573,6 @@ static void add_medium_atom_header(struct opal_dev *cmd, bool bytestring, | |||
551 | 573 | ||
552 | static void add_token_u64(int *err, struct opal_dev *cmd, u64 number) | 574 | static void add_token_u64(int *err, struct opal_dev *cmd, u64 number) |
553 | { | 575 | { |
554 | |||
555 | size_t len; | 576 | size_t len; |
556 | int msb; | 577 | int msb; |
557 | 578 | ||
@@ -563,9 +584,8 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number) | |||
563 | msb = fls64(number); | 584 | msb = fls64(number); |
564 | len = DIV_ROUND_UP(msb, 8); | 585 | len = DIV_ROUND_UP(msb, 8); |
565 | 586 | ||
566 | if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) { | 587 | if (!can_add(err, cmd, len + 1)) { |
567 | pr_debug("Error adding u64: end of buffer.\n"); | 588 | pr_debug("Error adding u64: end of buffer.\n"); |
568 | *err = -ERANGE; | ||
569 | return; | 589 | return; |
570 | } | 590 | } |
571 | add_short_atom_header(cmd, false, false, len); | 591 | add_short_atom_header(cmd, false, false, len); |
@@ -573,24 +593,19 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number) | |||
573 | add_token_u8(err, cmd, number >> (len * 8)); | 593 | add_token_u8(err, cmd, number >> (len * 8)); |
574 | } | 594 | } |
575 | 595 | ||
576 | static void add_token_bytestring(int *err, struct opal_dev *cmd, | 596 | static u8 *add_bytestring_header(int *err, struct opal_dev *cmd, size_t len) |
577 | const u8 *bytestring, size_t len) | ||
578 | { | 597 | { |
579 | size_t header_len = 1; | 598 | size_t header_len = 1; |
580 | bool is_short_atom = true; | 599 | bool is_short_atom = true; |
581 | 600 | ||
582 | if (*err) | ||
583 | return; | ||
584 | |||
585 | if (len & ~SHORT_ATOM_LEN_MASK) { | 601 | if (len & ~SHORT_ATOM_LEN_MASK) { |
586 | header_len = 2; | 602 | header_len = 2; |
587 | is_short_atom = false; | 603 | is_short_atom = false; |
588 | } | 604 | } |
589 | 605 | ||
590 | if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) { | 606 | if (!can_add(err, cmd, header_len + len)) { |
591 | pr_debug("Error adding bytestring: end of buffer.\n"); | 607 | pr_debug("Error adding bytestring: end of buffer.\n"); |
592 | *err = -ERANGE; | 608 | return NULL; |
593 | return; | ||
594 | } | 609 | } |
595 | 610 | ||
596 | if (is_short_atom) | 611 | if (is_short_atom) |
@@ -598,9 +613,19 @@ static void add_token_bytestring(int *err, struct opal_dev *cmd, | |||
598 | else | 613 | else |
599 | add_medium_atom_header(cmd, true, false, len); | 614 | add_medium_atom_header(cmd, true, false, len); |
600 | 615 | ||
601 | memcpy(&cmd->cmd[cmd->pos], bytestring, len); | 616 | return &cmd->cmd[cmd->pos]; |
602 | cmd->pos += len; | 617 | } |
618 | |||
619 | static void add_token_bytestring(int *err, struct opal_dev *cmd, | ||
620 | const u8 *bytestring, size_t len) | ||
621 | { | ||
622 | u8 *start; | ||
603 | 623 | ||
624 | start = add_bytestring_header(err, cmd, len); | ||
625 | if (!start) | ||
626 | return; | ||
627 | memcpy(start, bytestring, len); | ||
628 | cmd->pos += len; | ||
604 | } | 629 | } |
605 | 630 | ||
606 | static int build_locking_range(u8 *buffer, size_t length, u8 lr) | 631 | static int build_locking_range(u8 *buffer, size_t length, u8 lr) |
@@ -623,7 +648,7 @@ static int build_locking_range(u8 *buffer, size_t length, u8 lr) | |||
623 | static int build_locking_user(u8 *buffer, size_t length, u8 lr) | 648 | static int build_locking_user(u8 *buffer, size_t length, u8 lr) |
624 | { | 649 | { |
625 | if (length > OPAL_UID_LENGTH) { | 650 | if (length > OPAL_UID_LENGTH) { |
626 | pr_debug("Can't build locking range user, Length OOB\n"); | 651 | pr_debug("Can't build locking range user. Length OOB\n"); |
627 | return -ERANGE; | 652 | return -ERANGE; |
628 | } | 653 | } |
629 | 654 | ||
@@ -649,6 +674,9 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn) | |||
649 | struct opal_header *hdr; | 674 | struct opal_header *hdr; |
650 | int err = 0; | 675 | int err = 0; |
651 | 676 | ||
677 | /* close the parameter list opened from cmd_start */ | ||
678 | add_token_u8(&err, cmd, OPAL_ENDLIST); | ||
679 | |||
652 | add_token_u8(&err, cmd, OPAL_ENDOFDATA); | 680 | add_token_u8(&err, cmd, OPAL_ENDOFDATA); |
653 | add_token_u8(&err, cmd, OPAL_STARTLIST); | 681 | add_token_u8(&err, cmd, OPAL_STARTLIST); |
654 | add_token_u8(&err, cmd, 0); | 682 | add_token_u8(&err, cmd, 0); |
@@ -687,6 +715,11 @@ static const struct opal_resp_tok *response_get_token( | |||
687 | { | 715 | { |
688 | const struct opal_resp_tok *tok; | 716 | const struct opal_resp_tok *tok; |
689 | 717 | ||
718 | if (!resp) { | ||
719 | pr_debug("Response is NULL\n"); | ||
720 | return ERR_PTR(-EINVAL); | ||
721 | } | ||
722 | |||
690 | if (n >= resp->num) { | 723 | if (n >= resp->num) { |
691 | pr_debug("Token number doesn't exist: %d, resp: %d\n", | 724 | pr_debug("Token number doesn't exist: %d, resp: %d\n", |
692 | n, resp->num); | 725 | n, resp->num); |
@@ -869,27 +902,19 @@ static size_t response_get_string(const struct parsed_resp *resp, int n, | |||
869 | const char **store) | 902 | const char **store) |
870 | { | 903 | { |
871 | u8 skip; | 904 | u8 skip; |
872 | const struct opal_resp_tok *token; | 905 | const struct opal_resp_tok *tok; |
873 | 906 | ||
874 | *store = NULL; | 907 | *store = NULL; |
875 | if (!resp) { | 908 | tok = response_get_token(resp, n); |
876 | pr_debug("Response is NULL\n"); | 909 | if (IS_ERR(tok)) |
877 | return 0; | ||
878 | } | ||
879 | |||
880 | if (n >= resp->num) { | ||
881 | pr_debug("Response has %d tokens. Can't access %d\n", | ||
882 | resp->num, n); | ||
883 | return 0; | 910 | return 0; |
884 | } | ||
885 | 911 | ||
886 | token = &resp->toks[n]; | 912 | if (tok->type != OPAL_DTA_TOKENID_BYTESTRING) { |
887 | if (token->type != OPAL_DTA_TOKENID_BYTESTRING) { | ||
888 | pr_debug("Token is not a byte string!\n"); | 913 | pr_debug("Token is not a byte string!\n"); |
889 | return 0; | 914 | return 0; |
890 | } | 915 | } |
891 | 916 | ||
892 | switch (token->width) { | 917 | switch (tok->width) { |
893 | case OPAL_WIDTH_TINY: | 918 | case OPAL_WIDTH_TINY: |
894 | case OPAL_WIDTH_SHORT: | 919 | case OPAL_WIDTH_SHORT: |
895 | skip = 1; | 920 | skip = 1; |
@@ -905,37 +930,29 @@ static size_t response_get_string(const struct parsed_resp *resp, int n, | |||
905 | return 0; | 930 | return 0; |
906 | } | 931 | } |
907 | 932 | ||
908 | *store = token->pos + skip; | 933 | *store = tok->pos + skip; |
909 | return token->len - skip; | 934 | return tok->len - skip; |
910 | } | 935 | } |
911 | 936 | ||
912 | static u64 response_get_u64(const struct parsed_resp *resp, int n) | 937 | static u64 response_get_u64(const struct parsed_resp *resp, int n) |
913 | { | 938 | { |
914 | if (!resp) { | 939 | const struct opal_resp_tok *tok; |
915 | pr_debug("Response is NULL\n"); | ||
916 | return 0; | ||
917 | } | ||
918 | 940 | ||
919 | if (n >= resp->num) { | 941 | tok = response_get_token(resp, n); |
920 | pr_debug("Response has %d tokens. Can't access %d\n", | 942 | if (IS_ERR(tok)) |
921 | resp->num, n); | ||
922 | return 0; | 943 | return 0; |
923 | } | ||
924 | 944 | ||
925 | if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) { | 945 | if (tok->type != OPAL_DTA_TOKENID_UINT) { |
926 | pr_debug("Token is not unsigned it: %d\n", | 946 | pr_debug("Token is not unsigned int: %d\n", tok->type); |
927 | resp->toks[n].type); | ||
928 | return 0; | 947 | return 0; |
929 | } | 948 | } |
930 | 949 | ||
931 | if (!(resp->toks[n].width == OPAL_WIDTH_TINY || | 950 | if (tok->width != OPAL_WIDTH_TINY && tok->width != OPAL_WIDTH_SHORT) { |
932 | resp->toks[n].width == OPAL_WIDTH_SHORT)) { | 951 | pr_debug("Atom is not short or tiny: %d\n", tok->width); |
933 | pr_debug("Atom is not short or tiny: %d\n", | ||
934 | resp->toks[n].width); | ||
935 | return 0; | 952 | return 0; |
936 | } | 953 | } |
937 | 954 | ||
938 | return resp->toks[n].stored.u; | 955 | return tok->stored.u; |
939 | } | 956 | } |
940 | 957 | ||
941 | static bool response_token_matches(const struct opal_resp_tok *token, u8 match) | 958 | static bool response_token_matches(const struct opal_resp_tok *token, u8 match) |
@@ -991,6 +1008,27 @@ static void clear_opal_cmd(struct opal_dev *dev) | |||
991 | memset(dev->cmd, 0, IO_BUFFER_LENGTH); | 1008 | memset(dev->cmd, 0, IO_BUFFER_LENGTH); |
992 | } | 1009 | } |
993 | 1010 | ||
1011 | static int cmd_start(struct opal_dev *dev, const u8 *uid, const u8 *method) | ||
1012 | { | ||
1013 | int err = 0; | ||
1014 | |||
1015 | clear_opal_cmd(dev); | ||
1016 | set_comid(dev, dev->comid); | ||
1017 | |||
1018 | add_token_u8(&err, dev, OPAL_CALL); | ||
1019 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1020 | add_token_bytestring(&err, dev, method, OPAL_METHOD_LENGTH); | ||
1021 | |||
1022 | /* | ||
1023 | * Every method call is followed by its parameters enclosed within | ||
1024 | * OPAL_STARTLIST and OPAL_ENDLIST tokens. We automatically open the | ||
1025 | * parameter list here and close it later in cmd_finalize. | ||
1026 | */ | ||
1027 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1028 | |||
1029 | return err; | ||
1030 | } | ||
1031 | |||
994 | static int start_opal_session_cont(struct opal_dev *dev) | 1032 | static int start_opal_session_cont(struct opal_dev *dev) |
995 | { | 1033 | { |
996 | u32 hsn, tsn; | 1034 | u32 hsn, tsn; |
@@ -1050,24 +1088,47 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont) | |||
1050 | return opal_send_recv(dev, cont); | 1088 | return opal_send_recv(dev, cont); |
1051 | } | 1089 | } |
1052 | 1090 | ||
1091 | /* | ||
1092 | * request @column from table @table on device @dev. On success, the column | ||
1093 | * data will be available in dev->resp->tok[4] | ||
1094 | */ | ||
1095 | static int generic_get_column(struct opal_dev *dev, const u8 *table, | ||
1096 | u64 column) | ||
1097 | { | ||
1098 | int err; | ||
1099 | |||
1100 | err = cmd_start(dev, table, opalmethod[OPAL_GET]); | ||
1101 | |||
1102 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1103 | |||
1104 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1105 | add_token_u8(&err, dev, OPAL_STARTCOLUMN); | ||
1106 | add_token_u64(&err, dev, column); | ||
1107 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1108 | |||
1109 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1110 | add_token_u8(&err, dev, OPAL_ENDCOLUMN); | ||
1111 | add_token_u64(&err, dev, column); | ||
1112 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1113 | |||
1114 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1115 | |||
1116 | if (err) | ||
1117 | return err; | ||
1118 | |||
1119 | return finalize_and_send(dev, parse_and_check_status); | ||
1120 | } | ||
1121 | |||
1053 | static int gen_key(struct opal_dev *dev, void *data) | 1122 | static int gen_key(struct opal_dev *dev, void *data) |
1054 | { | 1123 | { |
1055 | u8 uid[OPAL_UID_LENGTH]; | 1124 | u8 uid[OPAL_UID_LENGTH]; |
1056 | int err = 0; | 1125 | int err; |
1057 | |||
1058 | clear_opal_cmd(dev); | ||
1059 | set_comid(dev, dev->comid); | ||
1060 | 1126 | ||
1061 | memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); | 1127 | memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); |
1062 | kfree(dev->prev_data); | 1128 | kfree(dev->prev_data); |
1063 | dev->prev_data = NULL; | 1129 | dev->prev_data = NULL; |
1064 | 1130 | ||
1065 | add_token_u8(&err, dev, OPAL_CALL); | 1131 | err = cmd_start(dev, uid, opalmethod[OPAL_GENKEY]); |
1066 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1067 | add_token_bytestring(&err, dev, opalmethod[OPAL_GENKEY], | ||
1068 | OPAL_UID_LENGTH); | ||
1069 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1070 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1071 | 1132 | ||
1072 | if (err) { | 1133 | if (err) { |
1073 | pr_debug("Error building gen key command\n"); | 1134 | pr_debug("Error building gen key command\n"); |
@@ -1105,62 +1166,39 @@ static int get_active_key_cont(struct opal_dev *dev) | |||
1105 | static int get_active_key(struct opal_dev *dev, void *data) | 1166 | static int get_active_key(struct opal_dev *dev, void *data) |
1106 | { | 1167 | { |
1107 | u8 uid[OPAL_UID_LENGTH]; | 1168 | u8 uid[OPAL_UID_LENGTH]; |
1108 | int err = 0; | 1169 | int err; |
1109 | u8 *lr = data; | 1170 | u8 *lr = data; |
1110 | 1171 | ||
1111 | clear_opal_cmd(dev); | ||
1112 | set_comid(dev, dev->comid); | ||
1113 | |||
1114 | err = build_locking_range(uid, sizeof(uid), *lr); | 1172 | err = build_locking_range(uid, sizeof(uid), *lr); |
1115 | if (err) | 1173 | if (err) |
1116 | return err; | 1174 | return err; |
1117 | 1175 | ||
1118 | err = 0; | 1176 | err = generic_get_column(dev, uid, OPAL_ACTIVEKEY); |
1119 | add_token_u8(&err, dev, OPAL_CALL); | 1177 | if (err) |
1120 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1121 | add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); | ||
1122 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1123 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1124 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1125 | add_token_u8(&err, dev, 3); /* startCloumn */ | ||
1126 | add_token_u8(&err, dev, 10); /* ActiveKey */ | ||
1127 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1128 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1129 | add_token_u8(&err, dev, 4); /* endColumn */ | ||
1130 | add_token_u8(&err, dev, 10); /* ActiveKey */ | ||
1131 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1132 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1133 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1134 | if (err) { | ||
1135 | pr_debug("Error building get active key command\n"); | ||
1136 | return err; | 1178 | return err; |
1137 | } | ||
1138 | 1179 | ||
1139 | return finalize_and_send(dev, get_active_key_cont); | 1180 | return get_active_key_cont(dev); |
1140 | } | 1181 | } |
1141 | 1182 | ||
1142 | static int generic_lr_enable_disable(struct opal_dev *dev, | 1183 | static int generic_lr_enable_disable(struct opal_dev *dev, |
1143 | u8 *uid, bool rle, bool wle, | 1184 | u8 *uid, bool rle, bool wle, |
1144 | bool rl, bool wl) | 1185 | bool rl, bool wl) |
1145 | { | 1186 | { |
1146 | int err = 0; | 1187 | int err; |
1147 | 1188 | ||
1148 | add_token_u8(&err, dev, OPAL_CALL); | 1189 | err = cmd_start(dev, uid, opalmethod[OPAL_SET]); |
1149 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1150 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1151 | 1190 | ||
1152 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1153 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1191 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1154 | add_token_u8(&err, dev, OPAL_VALUES); | 1192 | add_token_u8(&err, dev, OPAL_VALUES); |
1155 | add_token_u8(&err, dev, OPAL_STARTLIST); | 1193 | add_token_u8(&err, dev, OPAL_STARTLIST); |
1156 | 1194 | ||
1157 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1195 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1158 | add_token_u8(&err, dev, 5); /* ReadLockEnabled */ | 1196 | add_token_u8(&err, dev, OPAL_READLOCKENABLED); |
1159 | add_token_u8(&err, dev, rle); | 1197 | add_token_u8(&err, dev, rle); |
1160 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1198 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1161 | 1199 | ||
1162 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1200 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1163 | add_token_u8(&err, dev, 6); /* WriteLockEnabled */ | 1201 | add_token_u8(&err, dev, OPAL_WRITELOCKENABLED); |
1164 | add_token_u8(&err, dev, wle); | 1202 | add_token_u8(&err, dev, wle); |
1165 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1203 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1166 | 1204 | ||
@@ -1176,7 +1214,6 @@ static int generic_lr_enable_disable(struct opal_dev *dev, | |||
1176 | 1214 | ||
1177 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1215 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1178 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1216 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1179 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1180 | return err; | 1217 | return err; |
1181 | } | 1218 | } |
1182 | 1219 | ||
@@ -1197,10 +1234,7 @@ static int setup_locking_range(struct opal_dev *dev, void *data) | |||
1197 | u8 uid[OPAL_UID_LENGTH]; | 1234 | u8 uid[OPAL_UID_LENGTH]; |
1198 | struct opal_user_lr_setup *setup = data; | 1235 | struct opal_user_lr_setup *setup = data; |
1199 | u8 lr; | 1236 | u8 lr; |
1200 | int err = 0; | 1237 | int err; |
1201 | |||
1202 | clear_opal_cmd(dev); | ||
1203 | set_comid(dev, dev->comid); | ||
1204 | 1238 | ||
1205 | lr = setup->session.opal_key.lr; | 1239 | lr = setup->session.opal_key.lr; |
1206 | err = build_locking_range(uid, sizeof(uid), lr); | 1240 | err = build_locking_range(uid, sizeof(uid), lr); |
@@ -1210,40 +1244,34 @@ static int setup_locking_range(struct opal_dev *dev, void *data) | |||
1210 | if (lr == 0) | 1244 | if (lr == 0) |
1211 | err = enable_global_lr(dev, uid, setup); | 1245 | err = enable_global_lr(dev, uid, setup); |
1212 | else { | 1246 | else { |
1213 | add_token_u8(&err, dev, OPAL_CALL); | 1247 | err = cmd_start(dev, uid, opalmethod[OPAL_SET]); |
1214 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1215 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], | ||
1216 | OPAL_UID_LENGTH); | ||
1217 | 1248 | ||
1218 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1219 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1249 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1220 | add_token_u8(&err, dev, OPAL_VALUES); | 1250 | add_token_u8(&err, dev, OPAL_VALUES); |
1221 | add_token_u8(&err, dev, OPAL_STARTLIST); | 1251 | add_token_u8(&err, dev, OPAL_STARTLIST); |
1222 | 1252 | ||
1223 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1253 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1224 | add_token_u8(&err, dev, 3); /* Ranges Start */ | 1254 | add_token_u8(&err, dev, OPAL_RANGESTART); |
1225 | add_token_u64(&err, dev, setup->range_start); | 1255 | add_token_u64(&err, dev, setup->range_start); |
1226 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1256 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1227 | 1257 | ||
1228 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1258 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1229 | add_token_u8(&err, dev, 4); /* Ranges length */ | 1259 | add_token_u8(&err, dev, OPAL_RANGELENGTH); |
1230 | add_token_u64(&err, dev, setup->range_length); | 1260 | add_token_u64(&err, dev, setup->range_length); |
1231 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1261 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1232 | 1262 | ||
1233 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1263 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1234 | add_token_u8(&err, dev, 5); /*ReadLockEnabled */ | 1264 | add_token_u8(&err, dev, OPAL_READLOCKENABLED); |
1235 | add_token_u64(&err, dev, !!setup->RLE); | 1265 | add_token_u64(&err, dev, !!setup->RLE); |
1236 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1266 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1237 | 1267 | ||
1238 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1268 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1239 | add_token_u8(&err, dev, 6); /*WriteLockEnabled*/ | 1269 | add_token_u8(&err, dev, OPAL_WRITELOCKENABLED); |
1240 | add_token_u64(&err, dev, !!setup->WLE); | 1270 | add_token_u64(&err, dev, !!setup->WLE); |
1241 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1271 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1242 | 1272 | ||
1243 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1273 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1244 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1274 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1245 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1246 | |||
1247 | } | 1275 | } |
1248 | if (err) { | 1276 | if (err) { |
1249 | pr_debug("Error building Setup Locking range command.\n"); | 1277 | pr_debug("Error building Setup Locking range command.\n"); |
@@ -1261,29 +1289,21 @@ static int start_generic_opal_session(struct opal_dev *dev, | |||
1261 | u8 key_len) | 1289 | u8 key_len) |
1262 | { | 1290 | { |
1263 | u32 hsn; | 1291 | u32 hsn; |
1264 | int err = 0; | 1292 | int err; |
1265 | 1293 | ||
1266 | if (key == NULL && auth != OPAL_ANYBODY_UID) | 1294 | if (key == NULL && auth != OPAL_ANYBODY_UID) |
1267 | return OPAL_INVAL_PARAM; | 1295 | return OPAL_INVAL_PARAM; |
1268 | 1296 | ||
1269 | clear_opal_cmd(dev); | ||
1270 | |||
1271 | set_comid(dev, dev->comid); | ||
1272 | hsn = GENERIC_HOST_SESSION_NUM; | 1297 | hsn = GENERIC_HOST_SESSION_NUM; |
1298 | err = cmd_start(dev, opaluid[OPAL_SMUID_UID], | ||
1299 | opalmethod[OPAL_STARTSESSION]); | ||
1273 | 1300 | ||
1274 | add_token_u8(&err, dev, OPAL_CALL); | ||
1275 | add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID], | ||
1276 | OPAL_UID_LENGTH); | ||
1277 | add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION], | ||
1278 | OPAL_UID_LENGTH); | ||
1279 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1280 | add_token_u64(&err, dev, hsn); | 1301 | add_token_u64(&err, dev, hsn); |
1281 | add_token_bytestring(&err, dev, opaluid[sp_type], OPAL_UID_LENGTH); | 1302 | add_token_bytestring(&err, dev, opaluid[sp_type], OPAL_UID_LENGTH); |
1282 | add_token_u8(&err, dev, 1); | 1303 | add_token_u8(&err, dev, 1); |
1283 | 1304 | ||
1284 | switch (auth) { | 1305 | switch (auth) { |
1285 | case OPAL_ANYBODY_UID: | 1306 | case OPAL_ANYBODY_UID: |
1286 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1287 | break; | 1307 | break; |
1288 | case OPAL_ADMIN1_UID: | 1308 | case OPAL_ADMIN1_UID: |
1289 | case OPAL_SID_UID: | 1309 | case OPAL_SID_UID: |
@@ -1296,7 +1316,6 @@ static int start_generic_opal_session(struct opal_dev *dev, | |||
1296 | add_token_bytestring(&err, dev, opaluid[auth], | 1316 | add_token_bytestring(&err, dev, opaluid[auth], |
1297 | OPAL_UID_LENGTH); | 1317 | OPAL_UID_LENGTH); |
1298 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1318 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1299 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1300 | break; | 1319 | break; |
1301 | default: | 1320 | default: |
1302 | pr_debug("Cannot start Admin SP session with auth %d\n", auth); | 1321 | pr_debug("Cannot start Admin SP session with auth %d\n", auth); |
@@ -1324,6 +1343,7 @@ static int start_SIDASP_opal_session(struct opal_dev *dev, void *data) | |||
1324 | 1343 | ||
1325 | if (!key) { | 1344 | if (!key) { |
1326 | const struct opal_key *okey = data; | 1345 | const struct opal_key *okey = data; |
1346 | |||
1327 | ret = start_generic_opal_session(dev, OPAL_SID_UID, | 1347 | ret = start_generic_opal_session(dev, OPAL_SID_UID, |
1328 | OPAL_ADMINSP_UID, | 1348 | OPAL_ADMINSP_UID, |
1329 | okey->key, | 1349 | okey->key, |
@@ -1341,6 +1361,7 @@ static int start_SIDASP_opal_session(struct opal_dev *dev, void *data) | |||
1341 | static int start_admin1LSP_opal_session(struct opal_dev *dev, void *data) | 1361 | static int start_admin1LSP_opal_session(struct opal_dev *dev, void *data) |
1342 | { | 1362 | { |
1343 | struct opal_key *key = data; | 1363 | struct opal_key *key = data; |
1364 | |||
1344 | return start_generic_opal_session(dev, OPAL_ADMIN1_UID, | 1365 | return start_generic_opal_session(dev, OPAL_ADMIN1_UID, |
1345 | OPAL_LOCKINGSP_UID, | 1366 | OPAL_LOCKINGSP_UID, |
1346 | key->key, key->key_len); | 1367 | key->key, key->key_len); |
@@ -1356,30 +1377,21 @@ static int start_auth_opal_session(struct opal_dev *dev, void *data) | |||
1356 | u8 *key = session->opal_key.key; | 1377 | u8 *key = session->opal_key.key; |
1357 | u32 hsn = GENERIC_HOST_SESSION_NUM; | 1378 | u32 hsn = GENERIC_HOST_SESSION_NUM; |
1358 | 1379 | ||
1359 | clear_opal_cmd(dev); | 1380 | if (session->sum) |
1360 | set_comid(dev, dev->comid); | ||
1361 | |||
1362 | if (session->sum) { | ||
1363 | err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), | 1381 | err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), |
1364 | session->opal_key.lr); | 1382 | session->opal_key.lr); |
1365 | if (err) | 1383 | else if (session->who != OPAL_ADMIN1 && !session->sum) |
1366 | return err; | ||
1367 | |||
1368 | } else if (session->who != OPAL_ADMIN1 && !session->sum) { | ||
1369 | err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), | 1384 | err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), |
1370 | session->who - 1); | 1385 | session->who - 1); |
1371 | if (err) | 1386 | else |
1372 | return err; | ||
1373 | } else | ||
1374 | memcpy(lk_ul_user, opaluid[OPAL_ADMIN1_UID], OPAL_UID_LENGTH); | 1387 | memcpy(lk_ul_user, opaluid[OPAL_ADMIN1_UID], OPAL_UID_LENGTH); |
1375 | 1388 | ||
1376 | add_token_u8(&err, dev, OPAL_CALL); | 1389 | if (err) |
1377 | add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID], | 1390 | return err; |
1378 | OPAL_UID_LENGTH); | 1391 | |
1379 | add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION], | 1392 | err = cmd_start(dev, opaluid[OPAL_SMUID_UID], |
1380 | OPAL_UID_LENGTH); | 1393 | opalmethod[OPAL_STARTSESSION]); |
1381 | 1394 | ||
1382 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1383 | add_token_u64(&err, dev, hsn); | 1395 | add_token_u64(&err, dev, hsn); |
1384 | add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], | 1396 | add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], |
1385 | OPAL_UID_LENGTH); | 1397 | OPAL_UID_LENGTH); |
@@ -1392,7 +1404,6 @@ static int start_auth_opal_session(struct opal_dev *dev, void *data) | |||
1392 | add_token_u8(&err, dev, 3); | 1404 | add_token_u8(&err, dev, 3); |
1393 | add_token_bytestring(&err, dev, lk_ul_user, OPAL_UID_LENGTH); | 1405 | add_token_bytestring(&err, dev, lk_ul_user, OPAL_UID_LENGTH); |
1394 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1406 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1395 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1396 | 1407 | ||
1397 | if (err) { | 1408 | if (err) { |
1398 | pr_debug("Error building STARTSESSION command.\n"); | 1409 | pr_debug("Error building STARTSESSION command.\n"); |
@@ -1404,18 +1415,10 @@ static int start_auth_opal_session(struct opal_dev *dev, void *data) | |||
1404 | 1415 | ||
1405 | static int revert_tper(struct opal_dev *dev, void *data) | 1416 | static int revert_tper(struct opal_dev *dev, void *data) |
1406 | { | 1417 | { |
1407 | int err = 0; | 1418 | int err; |
1408 | |||
1409 | clear_opal_cmd(dev); | ||
1410 | set_comid(dev, dev->comid); | ||
1411 | 1419 | ||
1412 | add_token_u8(&err, dev, OPAL_CALL); | 1420 | err = cmd_start(dev, opaluid[OPAL_ADMINSP_UID], |
1413 | add_token_bytestring(&err, dev, opaluid[OPAL_ADMINSP_UID], | 1421 | opalmethod[OPAL_REVERT]); |
1414 | OPAL_UID_LENGTH); | ||
1415 | add_token_bytestring(&err, dev, opalmethod[OPAL_REVERT], | ||
1416 | OPAL_UID_LENGTH); | ||
1417 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1418 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1419 | if (err) { | 1422 | if (err) { |
1420 | pr_debug("Error building REVERT TPER command.\n"); | 1423 | pr_debug("Error building REVERT TPER command.\n"); |
1421 | return err; | 1424 | return err; |
@@ -1428,18 +1431,12 @@ static int internal_activate_user(struct opal_dev *dev, void *data) | |||
1428 | { | 1431 | { |
1429 | struct opal_session_info *session = data; | 1432 | struct opal_session_info *session = data; |
1430 | u8 uid[OPAL_UID_LENGTH]; | 1433 | u8 uid[OPAL_UID_LENGTH]; |
1431 | int err = 0; | 1434 | int err; |
1432 | |||
1433 | clear_opal_cmd(dev); | ||
1434 | set_comid(dev, dev->comid); | ||
1435 | 1435 | ||
1436 | memcpy(uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); | 1436 | memcpy(uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); |
1437 | uid[7] = session->who; | 1437 | uid[7] = session->who; |
1438 | 1438 | ||
1439 | add_token_u8(&err, dev, OPAL_CALL); | 1439 | err = cmd_start(dev, uid, opalmethod[OPAL_SET]); |
1440 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1441 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1442 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1443 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1440 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1444 | add_token_u8(&err, dev, OPAL_VALUES); | 1441 | add_token_u8(&err, dev, OPAL_VALUES); |
1445 | add_token_u8(&err, dev, OPAL_STARTLIST); | 1442 | add_token_u8(&err, dev, OPAL_STARTLIST); |
@@ -1449,7 +1446,6 @@ static int internal_activate_user(struct opal_dev *dev, void *data) | |||
1449 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1446 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1450 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1447 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1451 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1448 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1452 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1453 | 1449 | ||
1454 | if (err) { | 1450 | if (err) { |
1455 | pr_debug("Error building Activate UserN command.\n"); | 1451 | pr_debug("Error building Activate UserN command.\n"); |
@@ -1463,20 +1459,12 @@ static int erase_locking_range(struct opal_dev *dev, void *data) | |||
1463 | { | 1459 | { |
1464 | struct opal_session_info *session = data; | 1460 | struct opal_session_info *session = data; |
1465 | u8 uid[OPAL_UID_LENGTH]; | 1461 | u8 uid[OPAL_UID_LENGTH]; |
1466 | int err = 0; | 1462 | int err; |
1467 | |||
1468 | clear_opal_cmd(dev); | ||
1469 | set_comid(dev, dev->comid); | ||
1470 | 1463 | ||
1471 | if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0) | 1464 | if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0) |
1472 | return -ERANGE; | 1465 | return -ERANGE; |
1473 | 1466 | ||
1474 | add_token_u8(&err, dev, OPAL_CALL); | 1467 | err = cmd_start(dev, uid, opalmethod[OPAL_ERASE]); |
1475 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1476 | add_token_bytestring(&err, dev, opalmethod[OPAL_ERASE], | ||
1477 | OPAL_UID_LENGTH); | ||
1478 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1479 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1480 | 1468 | ||
1481 | if (err) { | 1469 | if (err) { |
1482 | pr_debug("Error building Erase Locking Range Command.\n"); | 1470 | pr_debug("Error building Erase Locking Range Command.\n"); |
@@ -1488,26 +1476,20 @@ static int erase_locking_range(struct opal_dev *dev, void *data) | |||
1488 | static int set_mbr_done(struct opal_dev *dev, void *data) | 1476 | static int set_mbr_done(struct opal_dev *dev, void *data) |
1489 | { | 1477 | { |
1490 | u8 *mbr_done_tf = data; | 1478 | u8 *mbr_done_tf = data; |
1491 | int err = 0; | 1479 | int err; |
1492 | 1480 | ||
1493 | clear_opal_cmd(dev); | 1481 | err = cmd_start(dev, opaluid[OPAL_MBRCONTROL], |
1494 | set_comid(dev, dev->comid); | 1482 | opalmethod[OPAL_SET]); |
1495 | 1483 | ||
1496 | add_token_u8(&err, dev, OPAL_CALL); | ||
1497 | add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL], | ||
1498 | OPAL_UID_LENGTH); | ||
1499 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1500 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1501 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1484 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1502 | add_token_u8(&err, dev, OPAL_VALUES); | 1485 | add_token_u8(&err, dev, OPAL_VALUES); |
1503 | add_token_u8(&err, dev, OPAL_STARTLIST); | 1486 | add_token_u8(&err, dev, OPAL_STARTLIST); |
1504 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1487 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1505 | add_token_u8(&err, dev, 2); /* Done */ | 1488 | add_token_u8(&err, dev, OPAL_MBRDONE); |
1506 | add_token_u8(&err, dev, *mbr_done_tf); /* Done T or F */ | 1489 | add_token_u8(&err, dev, *mbr_done_tf); /* Done T or F */ |
1507 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1490 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1508 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1491 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1509 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1492 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1510 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1511 | 1493 | ||
1512 | if (err) { | 1494 | if (err) { |
1513 | pr_debug("Error Building set MBR Done command\n"); | 1495 | pr_debug("Error Building set MBR Done command\n"); |
@@ -1520,26 +1502,20 @@ static int set_mbr_done(struct opal_dev *dev, void *data) | |||
1520 | static int set_mbr_enable_disable(struct opal_dev *dev, void *data) | 1502 | static int set_mbr_enable_disable(struct opal_dev *dev, void *data) |
1521 | { | 1503 | { |
1522 | u8 *mbr_en_dis = data; | 1504 | u8 *mbr_en_dis = data; |
1523 | int err = 0; | 1505 | int err; |
1524 | 1506 | ||
1525 | clear_opal_cmd(dev); | 1507 | err = cmd_start(dev, opaluid[OPAL_MBRCONTROL], |
1526 | set_comid(dev, dev->comid); | 1508 | opalmethod[OPAL_SET]); |
1527 | 1509 | ||
1528 | add_token_u8(&err, dev, OPAL_CALL); | ||
1529 | add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL], | ||
1530 | OPAL_UID_LENGTH); | ||
1531 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1532 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1533 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1510 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1534 | add_token_u8(&err, dev, OPAL_VALUES); | 1511 | add_token_u8(&err, dev, OPAL_VALUES); |
1535 | add_token_u8(&err, dev, OPAL_STARTLIST); | 1512 | add_token_u8(&err, dev, OPAL_STARTLIST); |
1536 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1513 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1537 | add_token_u8(&err, dev, 1); | 1514 | add_token_u8(&err, dev, OPAL_MBRENABLE); |
1538 | add_token_u8(&err, dev, *mbr_en_dis); | 1515 | add_token_u8(&err, dev, *mbr_en_dis); |
1539 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1516 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1540 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1517 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1541 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1518 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1542 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1543 | 1519 | ||
1544 | if (err) { | 1520 | if (err) { |
1545 | pr_debug("Error Building set MBR done command\n"); | 1521 | pr_debug("Error Building set MBR done command\n"); |
@@ -1552,26 +1528,19 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data) | |||
1552 | static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, | 1528 | static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, |
1553 | struct opal_dev *dev) | 1529 | struct opal_dev *dev) |
1554 | { | 1530 | { |
1555 | int err = 0; | 1531 | int err; |
1556 | 1532 | ||
1557 | clear_opal_cmd(dev); | 1533 | err = cmd_start(dev, cpin_uid, opalmethod[OPAL_SET]); |
1558 | set_comid(dev, dev->comid); | ||
1559 | 1534 | ||
1560 | add_token_u8(&err, dev, OPAL_CALL); | ||
1561 | add_token_bytestring(&err, dev, cpin_uid, OPAL_UID_LENGTH); | ||
1562 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], | ||
1563 | OPAL_UID_LENGTH); | ||
1564 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1565 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1535 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1566 | add_token_u8(&err, dev, OPAL_VALUES); | 1536 | add_token_u8(&err, dev, OPAL_VALUES); |
1567 | add_token_u8(&err, dev, OPAL_STARTLIST); | 1537 | add_token_u8(&err, dev, OPAL_STARTLIST); |
1568 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1538 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1569 | add_token_u8(&err, dev, 3); /* PIN */ | 1539 | add_token_u8(&err, dev, OPAL_PIN); |
1570 | add_token_bytestring(&err, dev, key, key_len); | 1540 | add_token_bytestring(&err, dev, key, key_len); |
1571 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1541 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1572 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1542 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1573 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1543 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1574 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1575 | 1544 | ||
1576 | return err; | 1545 | return err; |
1577 | } | 1546 | } |
@@ -1619,10 +1588,7 @@ static int add_user_to_lr(struct opal_dev *dev, void *data) | |||
1619 | u8 lr_buffer[OPAL_UID_LENGTH]; | 1588 | u8 lr_buffer[OPAL_UID_LENGTH]; |
1620 | u8 user_uid[OPAL_UID_LENGTH]; | 1589 | u8 user_uid[OPAL_UID_LENGTH]; |
1621 | struct opal_lock_unlock *lkul = data; | 1590 | struct opal_lock_unlock *lkul = data; |
1622 | int err = 0; | 1591 | int err; |
1623 | |||
1624 | clear_opal_cmd(dev); | ||
1625 | set_comid(dev, dev->comid); | ||
1626 | 1592 | ||
1627 | memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED], | 1593 | memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED], |
1628 | OPAL_UID_LENGTH); | 1594 | OPAL_UID_LENGTH); |
@@ -1637,12 +1603,8 @@ static int add_user_to_lr(struct opal_dev *dev, void *data) | |||
1637 | 1603 | ||
1638 | user_uid[7] = lkul->session.who; | 1604 | user_uid[7] = lkul->session.who; |
1639 | 1605 | ||
1640 | add_token_u8(&err, dev, OPAL_CALL); | 1606 | err = cmd_start(dev, lr_buffer, opalmethod[OPAL_SET]); |
1641 | add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH); | ||
1642 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], | ||
1643 | OPAL_UID_LENGTH); | ||
1644 | 1607 | ||
1645 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1646 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1608 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1647 | add_token_u8(&err, dev, OPAL_VALUES); | 1609 | add_token_u8(&err, dev, OPAL_VALUES); |
1648 | 1610 | ||
@@ -1680,7 +1642,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data) | |||
1680 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1642 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1681 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1643 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1682 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1644 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1683 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1684 | 1645 | ||
1685 | if (err) { | 1646 | if (err) { |
1686 | pr_debug("Error building add user to locking range command.\n"); | 1647 | pr_debug("Error building add user to locking range command.\n"); |
@@ -1697,9 +1658,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data) | |||
1697 | u8 read_locked = 1, write_locked = 1; | 1658 | u8 read_locked = 1, write_locked = 1; |
1698 | int err = 0; | 1659 | int err = 0; |
1699 | 1660 | ||
1700 | clear_opal_cmd(dev); | ||
1701 | set_comid(dev, dev->comid); | ||
1702 | |||
1703 | if (build_locking_range(lr_buffer, sizeof(lr_buffer), | 1661 | if (build_locking_range(lr_buffer, sizeof(lr_buffer), |
1704 | lkul->session.opal_key.lr) < 0) | 1662 | lkul->session.opal_key.lr) < 0) |
1705 | return -ERANGE; | 1663 | return -ERANGE; |
@@ -1714,17 +1672,15 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data) | |||
1714 | write_locked = 0; | 1672 | write_locked = 0; |
1715 | break; | 1673 | break; |
1716 | case OPAL_LK: | 1674 | case OPAL_LK: |
1717 | /* vars are initalized to locked */ | 1675 | /* vars are initialized to locked */ |
1718 | break; | 1676 | break; |
1719 | default: | 1677 | default: |
1720 | pr_debug("Tried to set an invalid locking state... returning to uland\n"); | 1678 | pr_debug("Tried to set an invalid locking state... returning to uland\n"); |
1721 | return OPAL_INVAL_PARAM; | 1679 | return OPAL_INVAL_PARAM; |
1722 | } | 1680 | } |
1723 | 1681 | ||
1724 | add_token_u8(&err, dev, OPAL_CALL); | 1682 | err = cmd_start(dev, lr_buffer, opalmethod[OPAL_SET]); |
1725 | add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH); | 1683 | |
1726 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1727 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1728 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1684 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1729 | add_token_u8(&err, dev, OPAL_VALUES); | 1685 | add_token_u8(&err, dev, OPAL_VALUES); |
1730 | add_token_u8(&err, dev, OPAL_STARTLIST); | 1686 | add_token_u8(&err, dev, OPAL_STARTLIST); |
@@ -1741,7 +1697,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data) | |||
1741 | 1697 | ||
1742 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1698 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1743 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1699 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1744 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1745 | 1700 | ||
1746 | if (err) { | 1701 | if (err) { |
1747 | pr_debug("Error building SET command.\n"); | 1702 | pr_debug("Error building SET command.\n"); |
@@ -1775,7 +1730,7 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data) | |||
1775 | write_locked = 0; | 1730 | write_locked = 0; |
1776 | break; | 1731 | break; |
1777 | case OPAL_LK: | 1732 | case OPAL_LK: |
1778 | /* vars are initalized to locked */ | 1733 | /* vars are initialized to locked */ |
1779 | break; | 1734 | break; |
1780 | default: | 1735 | default: |
1781 | pr_debug("Tried to set an invalid locking state.\n"); | 1736 | pr_debug("Tried to set an invalid locking state.\n"); |
@@ -1796,17 +1751,10 @@ static int activate_lsp(struct opal_dev *dev, void *data) | |||
1796 | struct opal_lr_act *opal_act = data; | 1751 | struct opal_lr_act *opal_act = data; |
1797 | u8 user_lr[OPAL_UID_LENGTH]; | 1752 | u8 user_lr[OPAL_UID_LENGTH]; |
1798 | u8 uint_3 = 0x83; | 1753 | u8 uint_3 = 0x83; |
1799 | int err = 0, i; | 1754 | int err, i; |
1800 | |||
1801 | clear_opal_cmd(dev); | ||
1802 | set_comid(dev, dev->comid); | ||
1803 | |||
1804 | add_token_u8(&err, dev, OPAL_CALL); | ||
1805 | add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], | ||
1806 | OPAL_UID_LENGTH); | ||
1807 | add_token_bytestring(&err, dev, opalmethod[OPAL_ACTIVATE], | ||
1808 | OPAL_UID_LENGTH); | ||
1809 | 1755 | ||
1756 | err = cmd_start(dev, opaluid[OPAL_LOCKINGSP_UID], | ||
1757 | opalmethod[OPAL_ACTIVATE]); | ||
1810 | 1758 | ||
1811 | if (opal_act->sum) { | 1759 | if (opal_act->sum) { |
1812 | err = build_locking_range(user_lr, sizeof(user_lr), | 1760 | err = build_locking_range(user_lr, sizeof(user_lr), |
@@ -1814,7 +1762,6 @@ static int activate_lsp(struct opal_dev *dev, void *data) | |||
1814 | if (err) | 1762 | if (err) |
1815 | return err; | 1763 | return err; |
1816 | 1764 | ||
1817 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1818 | add_token_u8(&err, dev, OPAL_STARTNAME); | 1765 | add_token_u8(&err, dev, OPAL_STARTNAME); |
1819 | add_token_u8(&err, dev, uint_3); | 1766 | add_token_u8(&err, dev, uint_3); |
1820 | add_token_u8(&err, dev, 6); | 1767 | add_token_u8(&err, dev, 6); |
@@ -1829,11 +1776,6 @@ static int activate_lsp(struct opal_dev *dev, void *data) | |||
1829 | } | 1776 | } |
1830 | add_token_u8(&err, dev, OPAL_ENDLIST); | 1777 | add_token_u8(&err, dev, OPAL_ENDLIST); |
1831 | add_token_u8(&err, dev, OPAL_ENDNAME); | 1778 | add_token_u8(&err, dev, OPAL_ENDNAME); |
1832 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1833 | |||
1834 | } else { | ||
1835 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1836 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1837 | } | 1779 | } |
1838 | 1780 | ||
1839 | if (err) { | 1781 | if (err) { |
@@ -1844,17 +1786,19 @@ static int activate_lsp(struct opal_dev *dev, void *data) | |||
1844 | return finalize_and_send(dev, parse_and_check_status); | 1786 | return finalize_and_send(dev, parse_and_check_status); |
1845 | } | 1787 | } |
1846 | 1788 | ||
1847 | static int get_lsp_lifecycle_cont(struct opal_dev *dev) | 1789 | /* Determine if we're in the Manufactured Inactive or Active state */ |
1790 | static int get_lsp_lifecycle(struct opal_dev *dev, void *data) | ||
1848 | { | 1791 | { |
1849 | u8 lc_status; | 1792 | u8 lc_status; |
1850 | int error = 0; | 1793 | int err; |
1851 | 1794 | ||
1852 | error = parse_and_check_status(dev); | 1795 | err = generic_get_column(dev, opaluid[OPAL_LOCKINGSP_UID], |
1853 | if (error) | 1796 | OPAL_LIFECYCLE); |
1854 | return error; | 1797 | if (err) |
1798 | return err; | ||
1855 | 1799 | ||
1856 | lc_status = response_get_u64(&dev->parsed, 4); | 1800 | lc_status = response_get_u64(&dev->parsed, 4); |
1857 | /* 0x08 is Manufacured Inactive */ | 1801 | /* 0x08 is Manufactured Inactive */ |
1858 | /* 0x09 is Manufactured */ | 1802 | /* 0x09 is Manufactured */ |
1859 | if (lc_status != OPAL_MANUFACTURED_INACTIVE) { | 1803 | if (lc_status != OPAL_MANUFACTURED_INACTIVE) { |
1860 | pr_debug("Couldn't determine the status of the Lifecycle state\n"); | 1804 | pr_debug("Couldn't determine the status of the Lifecycle state\n"); |
@@ -1864,56 +1808,19 @@ static int get_lsp_lifecycle_cont(struct opal_dev *dev) | |||
1864 | return 0; | 1808 | return 0; |
1865 | } | 1809 | } |
1866 | 1810 | ||
1867 | /* Determine if we're in the Manufactured Inactive or Active state */ | 1811 | static int get_msid_cpin_pin(struct opal_dev *dev, void *data) |
1868 | static int get_lsp_lifecycle(struct opal_dev *dev, void *data) | ||
1869 | { | ||
1870 | int err = 0; | ||
1871 | |||
1872 | clear_opal_cmd(dev); | ||
1873 | set_comid(dev, dev->comid); | ||
1874 | |||
1875 | add_token_u8(&err, dev, OPAL_CALL); | ||
1876 | add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], | ||
1877 | OPAL_UID_LENGTH); | ||
1878 | add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); | ||
1879 | |||
1880 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1881 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1882 | |||
1883 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1884 | add_token_u8(&err, dev, 3); /* Start Column */ | ||
1885 | add_token_u8(&err, dev, 6); /* Lifecycle Column */ | ||
1886 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1887 | |||
1888 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1889 | add_token_u8(&err, dev, 4); /* End Column */ | ||
1890 | add_token_u8(&err, dev, 6); /* Lifecycle Column */ | ||
1891 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1892 | |||
1893 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1894 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1895 | |||
1896 | if (err) { | ||
1897 | pr_debug("Error Building GET Lifecycle Status command\n"); | ||
1898 | return err; | ||
1899 | } | ||
1900 | |||
1901 | return finalize_and_send(dev, get_lsp_lifecycle_cont); | ||
1902 | } | ||
1903 | |||
1904 | static int get_msid_cpin_pin_cont(struct opal_dev *dev) | ||
1905 | { | 1812 | { |
1906 | const char *msid_pin; | 1813 | const char *msid_pin; |
1907 | size_t strlen; | 1814 | size_t strlen; |
1908 | int error = 0; | 1815 | int err; |
1909 | 1816 | ||
1910 | error = parse_and_check_status(dev); | 1817 | err = generic_get_column(dev, opaluid[OPAL_C_PIN_MSID], OPAL_PIN); |
1911 | if (error) | 1818 | if (err) |
1912 | return error; | 1819 | return err; |
1913 | 1820 | ||
1914 | strlen = response_get_string(&dev->parsed, 4, &msid_pin); | 1821 | strlen = response_get_string(&dev->parsed, 4, &msid_pin); |
1915 | if (!msid_pin) { | 1822 | if (!msid_pin) { |
1916 | pr_debug("%s: Couldn't extract PIN from response\n", __func__); | 1823 | pr_debug("Couldn't extract MSID_CPIN from response\n"); |
1917 | return OPAL_INVAL_PARAM; | 1824 | return OPAL_INVAL_PARAM; |
1918 | } | 1825 | } |
1919 | 1826 | ||
@@ -1926,42 +1833,6 @@ static int get_msid_cpin_pin_cont(struct opal_dev *dev) | |||
1926 | return 0; | 1833 | return 0; |
1927 | } | 1834 | } |
1928 | 1835 | ||
1929 | static int get_msid_cpin_pin(struct opal_dev *dev, void *data) | ||
1930 | { | ||
1931 | int err = 0; | ||
1932 | |||
1933 | clear_opal_cmd(dev); | ||
1934 | set_comid(dev, dev->comid); | ||
1935 | |||
1936 | add_token_u8(&err, dev, OPAL_CALL); | ||
1937 | add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID], | ||
1938 | OPAL_UID_LENGTH); | ||
1939 | add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); | ||
1940 | |||
1941 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1942 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1943 | |||
1944 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1945 | add_token_u8(&err, dev, 3); /* Start Column */ | ||
1946 | add_token_u8(&err, dev, 3); /* PIN */ | ||
1947 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1948 | |||
1949 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1950 | add_token_u8(&err, dev, 4); /* End Column */ | ||
1951 | add_token_u8(&err, dev, 3); /* Lifecycle Column */ | ||
1952 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1953 | |||
1954 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1955 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1956 | |||
1957 | if (err) { | ||
1958 | pr_debug("Error building Get MSID CPIN PIN command.\n"); | ||
1959 | return err; | ||
1960 | } | ||
1961 | |||
1962 | return finalize_and_send(dev, get_msid_cpin_pin_cont); | ||
1963 | } | ||
1964 | |||
1965 | static int end_opal_session(struct opal_dev *dev, void *data) | 1836 | static int end_opal_session(struct opal_dev *dev, void *data) |
1966 | { | 1837 | { |
1967 | int err = 0; | 1838 | int err = 0; |
@@ -1977,18 +1848,14 @@ static int end_opal_session(struct opal_dev *dev, void *data) | |||
1977 | 1848 | ||
1978 | static int end_opal_session_error(struct opal_dev *dev) | 1849 | static int end_opal_session_error(struct opal_dev *dev) |
1979 | { | 1850 | { |
1980 | const struct opal_step error_end_session[] = { | 1851 | const struct opal_step error_end_session = { |
1981 | { end_opal_session, }, | 1852 | end_opal_session, |
1982 | { NULL, } | ||
1983 | }; | 1853 | }; |
1984 | dev->steps = error_end_session; | 1854 | return execute_step(dev, &error_end_session, 0); |
1985 | return next(dev); | ||
1986 | } | 1855 | } |
1987 | 1856 | ||
1988 | static inline void setup_opal_dev(struct opal_dev *dev, | 1857 | static inline void setup_opal_dev(struct opal_dev *dev) |
1989 | const struct opal_step *steps) | ||
1990 | { | 1858 | { |
1991 | dev->steps = steps; | ||
1992 | dev->tsn = 0; | 1859 | dev->tsn = 0; |
1993 | dev->hsn = 0; | 1860 | dev->hsn = 0; |
1994 | dev->prev_data = NULL; | 1861 | dev->prev_data = NULL; |
@@ -1996,15 +1863,11 @@ static inline void setup_opal_dev(struct opal_dev *dev, | |||
1996 | 1863 | ||
1997 | static int check_opal_support(struct opal_dev *dev) | 1864 | static int check_opal_support(struct opal_dev *dev) |
1998 | { | 1865 | { |
1999 | const struct opal_step steps[] = { | ||
2000 | { opal_discovery0, }, | ||
2001 | { NULL, } | ||
2002 | }; | ||
2003 | int ret; | 1866 | int ret; |
2004 | 1867 | ||
2005 | mutex_lock(&dev->dev_lock); | 1868 | mutex_lock(&dev->dev_lock); |
2006 | setup_opal_dev(dev, steps); | 1869 | setup_opal_dev(dev); |
2007 | ret = next(dev); | 1870 | ret = opal_discovery0_step(dev); |
2008 | dev->supported = !ret; | 1871 | dev->supported = !ret; |
2009 | mutex_unlock(&dev->dev_lock); | 1872 | mutex_unlock(&dev->dev_lock); |
2010 | return ret; | 1873 | return ret; |
@@ -2057,18 +1920,16 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev, | |||
2057 | struct opal_session_info *opal_session) | 1920 | struct opal_session_info *opal_session) |
2058 | { | 1921 | { |
2059 | const struct opal_step erase_steps[] = { | 1922 | const struct opal_step erase_steps[] = { |
2060 | { opal_discovery0, }, | ||
2061 | { start_auth_opal_session, opal_session }, | 1923 | { start_auth_opal_session, opal_session }, |
2062 | { get_active_key, &opal_session->opal_key.lr }, | 1924 | { get_active_key, &opal_session->opal_key.lr }, |
2063 | { gen_key, }, | 1925 | { gen_key, }, |
2064 | { end_opal_session, }, | 1926 | { end_opal_session, } |
2065 | { NULL, } | ||
2066 | }; | 1927 | }; |
2067 | int ret; | 1928 | int ret; |
2068 | 1929 | ||
2069 | mutex_lock(&dev->dev_lock); | 1930 | mutex_lock(&dev->dev_lock); |
2070 | setup_opal_dev(dev, erase_steps); | 1931 | setup_opal_dev(dev); |
2071 | ret = next(dev); | 1932 | ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps)); |
2072 | mutex_unlock(&dev->dev_lock); | 1933 | mutex_unlock(&dev->dev_lock); |
2073 | return ret; | 1934 | return ret; |
2074 | } | 1935 | } |
@@ -2077,17 +1938,15 @@ static int opal_erase_locking_range(struct opal_dev *dev, | |||
2077 | struct opal_session_info *opal_session) | 1938 | struct opal_session_info *opal_session) |
2078 | { | 1939 | { |
2079 | const struct opal_step erase_steps[] = { | 1940 | const struct opal_step erase_steps[] = { |
2080 | { opal_discovery0, }, | ||
2081 | { start_auth_opal_session, opal_session }, | 1941 | { start_auth_opal_session, opal_session }, |
2082 | { erase_locking_range, opal_session }, | 1942 | { erase_locking_range, opal_session }, |
2083 | { end_opal_session, }, | 1943 | { end_opal_session, } |
2084 | { NULL, } | ||
2085 | }; | 1944 | }; |
2086 | int ret; | 1945 | int ret; |
2087 | 1946 | ||
2088 | mutex_lock(&dev->dev_lock); | 1947 | mutex_lock(&dev->dev_lock); |
2089 | setup_opal_dev(dev, erase_steps); | 1948 | setup_opal_dev(dev); |
2090 | ret = next(dev); | 1949 | ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps)); |
2091 | mutex_unlock(&dev->dev_lock); | 1950 | mutex_unlock(&dev->dev_lock); |
2092 | return ret; | 1951 | return ret; |
2093 | } | 1952 | } |
@@ -2095,15 +1954,16 @@ static int opal_erase_locking_range(struct opal_dev *dev, | |||
2095 | static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, | 1954 | static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, |
2096 | struct opal_mbr_data *opal_mbr) | 1955 | struct opal_mbr_data *opal_mbr) |
2097 | { | 1956 | { |
1957 | u8 enable_disable = opal_mbr->enable_disable == OPAL_MBR_ENABLE ? | ||
1958 | OPAL_TRUE : OPAL_FALSE; | ||
1959 | |||
2098 | const struct opal_step mbr_steps[] = { | 1960 | const struct opal_step mbr_steps[] = { |
2099 | { opal_discovery0, }, | ||
2100 | { start_admin1LSP_opal_session, &opal_mbr->key }, | 1961 | { start_admin1LSP_opal_session, &opal_mbr->key }, |
2101 | { set_mbr_done, &opal_mbr->enable_disable }, | 1962 | { set_mbr_done, &enable_disable }, |
2102 | { end_opal_session, }, | 1963 | { end_opal_session, }, |
2103 | { start_admin1LSP_opal_session, &opal_mbr->key }, | 1964 | { start_admin1LSP_opal_session, &opal_mbr->key }, |
2104 | { set_mbr_enable_disable, &opal_mbr->enable_disable }, | 1965 | { set_mbr_enable_disable, &enable_disable }, |
2105 | { end_opal_session, }, | 1966 | { end_opal_session, } |
2106 | { NULL, } | ||
2107 | }; | 1967 | }; |
2108 | int ret; | 1968 | int ret; |
2109 | 1969 | ||
@@ -2112,8 +1972,8 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, | |||
2112 | return -EINVAL; | 1972 | return -EINVAL; |
2113 | 1973 | ||
2114 | mutex_lock(&dev->dev_lock); | 1974 | mutex_lock(&dev->dev_lock); |
2115 | setup_opal_dev(dev, mbr_steps); | 1975 | setup_opal_dev(dev); |
2116 | ret = next(dev); | 1976 | ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps)); |
2117 | mutex_unlock(&dev->dev_lock); | 1977 | mutex_unlock(&dev->dev_lock); |
2118 | return ret; | 1978 | return ret; |
2119 | } | 1979 | } |
@@ -2130,7 +1990,7 @@ static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) | |||
2130 | suspend->lr = lk_unlk->session.opal_key.lr; | 1990 | suspend->lr = lk_unlk->session.opal_key.lr; |
2131 | 1991 | ||
2132 | mutex_lock(&dev->dev_lock); | 1992 | mutex_lock(&dev->dev_lock); |
2133 | setup_opal_dev(dev, NULL); | 1993 | setup_opal_dev(dev); |
2134 | add_suspend_info(dev, suspend); | 1994 | add_suspend_info(dev, suspend); |
2135 | mutex_unlock(&dev->dev_lock); | 1995 | mutex_unlock(&dev->dev_lock); |
2136 | return 0; | 1996 | return 0; |
@@ -2140,11 +2000,9 @@ static int opal_add_user_to_lr(struct opal_dev *dev, | |||
2140 | struct opal_lock_unlock *lk_unlk) | 2000 | struct opal_lock_unlock *lk_unlk) |
2141 | { | 2001 | { |
2142 | const struct opal_step steps[] = { | 2002 | const struct opal_step steps[] = { |
2143 | { opal_discovery0, }, | ||
2144 | { start_admin1LSP_opal_session, &lk_unlk->session.opal_key }, | 2003 | { start_admin1LSP_opal_session, &lk_unlk->session.opal_key }, |
2145 | { add_user_to_lr, lk_unlk }, | 2004 | { add_user_to_lr, lk_unlk }, |
2146 | { end_opal_session, }, | 2005 | { end_opal_session, } |
2147 | { NULL, } | ||
2148 | }; | 2006 | }; |
2149 | int ret; | 2007 | int ret; |
2150 | 2008 | ||
@@ -2166,8 +2024,8 @@ static int opal_add_user_to_lr(struct opal_dev *dev, | |||
2166 | } | 2024 | } |
2167 | 2025 | ||
2168 | mutex_lock(&dev->dev_lock); | 2026 | mutex_lock(&dev->dev_lock); |
2169 | setup_opal_dev(dev, steps); | 2027 | setup_opal_dev(dev); |
2170 | ret = next(dev); | 2028 | ret = execute_steps(dev, steps, ARRAY_SIZE(steps)); |
2171 | mutex_unlock(&dev->dev_lock); | 2029 | mutex_unlock(&dev->dev_lock); |
2172 | return ret; | 2030 | return ret; |
2173 | } | 2031 | } |
@@ -2175,16 +2033,14 @@ static int opal_add_user_to_lr(struct opal_dev *dev, | |||
2175 | static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal) | 2033 | static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal) |
2176 | { | 2034 | { |
2177 | const struct opal_step revert_steps[] = { | 2035 | const struct opal_step revert_steps[] = { |
2178 | { opal_discovery0, }, | ||
2179 | { start_SIDASP_opal_session, opal }, | 2036 | { start_SIDASP_opal_session, opal }, |
2180 | { revert_tper, }, /* controller will terminate session */ | 2037 | { revert_tper, } /* controller will terminate session */ |
2181 | { NULL, } | ||
2182 | }; | 2038 | }; |
2183 | int ret; | 2039 | int ret; |
2184 | 2040 | ||
2185 | mutex_lock(&dev->dev_lock); | 2041 | mutex_lock(&dev->dev_lock); |
2186 | setup_opal_dev(dev, revert_steps); | 2042 | setup_opal_dev(dev); |
2187 | ret = next(dev); | 2043 | ret = execute_steps(dev, revert_steps, ARRAY_SIZE(revert_steps)); |
2188 | mutex_unlock(&dev->dev_lock); | 2044 | mutex_unlock(&dev->dev_lock); |
2189 | 2045 | ||
2190 | /* | 2046 | /* |
@@ -2201,37 +2057,34 @@ static int __opal_lock_unlock(struct opal_dev *dev, | |||
2201 | struct opal_lock_unlock *lk_unlk) | 2057 | struct opal_lock_unlock *lk_unlk) |
2202 | { | 2058 | { |
2203 | const struct opal_step unlock_steps[] = { | 2059 | const struct opal_step unlock_steps[] = { |
2204 | { opal_discovery0, }, | ||
2205 | { start_auth_opal_session, &lk_unlk->session }, | 2060 | { start_auth_opal_session, &lk_unlk->session }, |
2206 | { lock_unlock_locking_range, lk_unlk }, | 2061 | { lock_unlock_locking_range, lk_unlk }, |
2207 | { end_opal_session, }, | 2062 | { end_opal_session, } |
2208 | { NULL, } | ||
2209 | }; | 2063 | }; |
2210 | const struct opal_step unlock_sum_steps[] = { | 2064 | const struct opal_step unlock_sum_steps[] = { |
2211 | { opal_discovery0, }, | ||
2212 | { start_auth_opal_session, &lk_unlk->session }, | 2065 | { start_auth_opal_session, &lk_unlk->session }, |
2213 | { lock_unlock_locking_range_sum, lk_unlk }, | 2066 | { lock_unlock_locking_range_sum, lk_unlk }, |
2214 | { end_opal_session, }, | 2067 | { end_opal_session, } |
2215 | { NULL, } | ||
2216 | }; | 2068 | }; |
2217 | 2069 | ||
2218 | dev->steps = lk_unlk->session.sum ? unlock_sum_steps : unlock_steps; | 2070 | if (lk_unlk->session.sum) |
2219 | return next(dev); | 2071 | return execute_steps(dev, unlock_sum_steps, |
2072 | ARRAY_SIZE(unlock_sum_steps)); | ||
2073 | else | ||
2074 | return execute_steps(dev, unlock_steps, | ||
2075 | ARRAY_SIZE(unlock_steps)); | ||
2220 | } | 2076 | } |
2221 | 2077 | ||
2222 | static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key) | 2078 | static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key) |
2223 | { | 2079 | { |
2224 | u8 mbr_done_tf = 1; | 2080 | u8 mbr_done_tf = OPAL_TRUE; |
2225 | const struct opal_step mbrdone_step [] = { | 2081 | const struct opal_step mbrdone_step[] = { |
2226 | { opal_discovery0, }, | ||
2227 | { start_admin1LSP_opal_session, key }, | 2082 | { start_admin1LSP_opal_session, key }, |
2228 | { set_mbr_done, &mbr_done_tf }, | 2083 | { set_mbr_done, &mbr_done_tf }, |
2229 | { end_opal_session, }, | 2084 | { end_opal_session, } |
2230 | { NULL, } | ||
2231 | }; | 2085 | }; |
2232 | 2086 | ||
2233 | dev->steps = mbrdone_step; | 2087 | return execute_steps(dev, mbrdone_step, ARRAY_SIZE(mbrdone_step)); |
2234 | return next(dev); | ||
2235 | } | 2088 | } |
2236 | 2089 | ||
2237 | static int opal_lock_unlock(struct opal_dev *dev, | 2090 | static int opal_lock_unlock(struct opal_dev *dev, |
@@ -2252,14 +2105,12 @@ static int opal_lock_unlock(struct opal_dev *dev, | |||
2252 | static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) | 2105 | static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) |
2253 | { | 2106 | { |
2254 | const struct opal_step owner_steps[] = { | 2107 | const struct opal_step owner_steps[] = { |
2255 | { opal_discovery0, }, | ||
2256 | { start_anybodyASP_opal_session, }, | 2108 | { start_anybodyASP_opal_session, }, |
2257 | { get_msid_cpin_pin, }, | 2109 | { get_msid_cpin_pin, }, |
2258 | { end_opal_session, }, | 2110 | { end_opal_session, }, |
2259 | { start_SIDASP_opal_session, opal }, | 2111 | { start_SIDASP_opal_session, opal }, |
2260 | { set_sid_cpin_pin, opal }, | 2112 | { set_sid_cpin_pin, opal }, |
2261 | { end_opal_session, }, | 2113 | { end_opal_session, } |
2262 | { NULL, } | ||
2263 | }; | 2114 | }; |
2264 | int ret; | 2115 | int ret; |
2265 | 2116 | ||
@@ -2267,21 +2118,20 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) | |||
2267 | return -ENODEV; | 2118 | return -ENODEV; |
2268 | 2119 | ||
2269 | mutex_lock(&dev->dev_lock); | 2120 | mutex_lock(&dev->dev_lock); |
2270 | setup_opal_dev(dev, owner_steps); | 2121 | setup_opal_dev(dev); |
2271 | ret = next(dev); | 2122 | ret = execute_steps(dev, owner_steps, ARRAY_SIZE(owner_steps)); |
2272 | mutex_unlock(&dev->dev_lock); | 2123 | mutex_unlock(&dev->dev_lock); |
2273 | return ret; | 2124 | return ret; |
2274 | } | 2125 | } |
2275 | 2126 | ||
2276 | static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act) | 2127 | static int opal_activate_lsp(struct opal_dev *dev, |
2128 | struct opal_lr_act *opal_lr_act) | ||
2277 | { | 2129 | { |
2278 | const struct opal_step active_steps[] = { | 2130 | const struct opal_step active_steps[] = { |
2279 | { opal_discovery0, }, | ||
2280 | { start_SIDASP_opal_session, &opal_lr_act->key }, | 2131 | { start_SIDASP_opal_session, &opal_lr_act->key }, |
2281 | { get_lsp_lifecycle, }, | 2132 | { get_lsp_lifecycle, }, |
2282 | { activate_lsp, opal_lr_act }, | 2133 | { activate_lsp, opal_lr_act }, |
2283 | { end_opal_session, }, | 2134 | { end_opal_session, } |
2284 | { NULL, } | ||
2285 | }; | 2135 | }; |
2286 | int ret; | 2136 | int ret; |
2287 | 2137 | ||
@@ -2289,8 +2139,8 @@ static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_a | |||
2289 | return -EINVAL; | 2139 | return -EINVAL; |
2290 | 2140 | ||
2291 | mutex_lock(&dev->dev_lock); | 2141 | mutex_lock(&dev->dev_lock); |
2292 | setup_opal_dev(dev, active_steps); | 2142 | setup_opal_dev(dev); |
2293 | ret = next(dev); | 2143 | ret = execute_steps(dev, active_steps, ARRAY_SIZE(active_steps)); |
2294 | mutex_unlock(&dev->dev_lock); | 2144 | mutex_unlock(&dev->dev_lock); |
2295 | return ret; | 2145 | return ret; |
2296 | } | 2146 | } |
@@ -2299,17 +2149,15 @@ static int opal_setup_locking_range(struct opal_dev *dev, | |||
2299 | struct opal_user_lr_setup *opal_lrs) | 2149 | struct opal_user_lr_setup *opal_lrs) |
2300 | { | 2150 | { |
2301 | const struct opal_step lr_steps[] = { | 2151 | const struct opal_step lr_steps[] = { |
2302 | { opal_discovery0, }, | ||
2303 | { start_auth_opal_session, &opal_lrs->session }, | 2152 | { start_auth_opal_session, &opal_lrs->session }, |
2304 | { setup_locking_range, opal_lrs }, | 2153 | { setup_locking_range, opal_lrs }, |
2305 | { end_opal_session, }, | 2154 | { end_opal_session, } |
2306 | { NULL, } | ||
2307 | }; | 2155 | }; |
2308 | int ret; | 2156 | int ret; |
2309 | 2157 | ||
2310 | mutex_lock(&dev->dev_lock); | 2158 | mutex_lock(&dev->dev_lock); |
2311 | setup_opal_dev(dev, lr_steps); | 2159 | setup_opal_dev(dev); |
2312 | ret = next(dev); | 2160 | ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps)); |
2313 | mutex_unlock(&dev->dev_lock); | 2161 | mutex_unlock(&dev->dev_lock); |
2314 | return ret; | 2162 | return ret; |
2315 | } | 2163 | } |
@@ -2317,11 +2165,9 @@ static int opal_setup_locking_range(struct opal_dev *dev, | |||
2317 | static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) | 2165 | static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) |
2318 | { | 2166 | { |
2319 | const struct opal_step pw_steps[] = { | 2167 | const struct opal_step pw_steps[] = { |
2320 | { opal_discovery0, }, | ||
2321 | { start_auth_opal_session, &opal_pw->session }, | 2168 | { start_auth_opal_session, &opal_pw->session }, |
2322 | { set_new_pw, &opal_pw->new_user_pw }, | 2169 | { set_new_pw, &opal_pw->new_user_pw }, |
2323 | { end_opal_session, }, | 2170 | { end_opal_session, } |
2324 | { NULL } | ||
2325 | }; | 2171 | }; |
2326 | int ret; | 2172 | int ret; |
2327 | 2173 | ||
@@ -2332,8 +2178,8 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) | |||
2332 | return -EINVAL; | 2178 | return -EINVAL; |
2333 | 2179 | ||
2334 | mutex_lock(&dev->dev_lock); | 2180 | mutex_lock(&dev->dev_lock); |
2335 | setup_opal_dev(dev, pw_steps); | 2181 | setup_opal_dev(dev); |
2336 | ret = next(dev); | 2182 | ret = execute_steps(dev, pw_steps, ARRAY_SIZE(pw_steps)); |
2337 | mutex_unlock(&dev->dev_lock); | 2183 | mutex_unlock(&dev->dev_lock); |
2338 | return ret; | 2184 | return ret; |
2339 | } | 2185 | } |
@@ -2342,11 +2188,9 @@ static int opal_activate_user(struct opal_dev *dev, | |||
2342 | struct opal_session_info *opal_session) | 2188 | struct opal_session_info *opal_session) |
2343 | { | 2189 | { |
2344 | const struct opal_step act_steps[] = { | 2190 | const struct opal_step act_steps[] = { |
2345 | { opal_discovery0, }, | ||
2346 | { start_admin1LSP_opal_session, &opal_session->opal_key }, | 2191 | { start_admin1LSP_opal_session, &opal_session->opal_key }, |
2347 | { internal_activate_user, opal_session }, | 2192 | { internal_activate_user, opal_session }, |
2348 | { end_opal_session, }, | 2193 | { end_opal_session, } |
2349 | { NULL, } | ||
2350 | }; | 2194 | }; |
2351 | int ret; | 2195 | int ret; |
2352 | 2196 | ||
@@ -2358,8 +2202,8 @@ static int opal_activate_user(struct opal_dev *dev, | |||
2358 | } | 2202 | } |
2359 | 2203 | ||
2360 | mutex_lock(&dev->dev_lock); | 2204 | mutex_lock(&dev->dev_lock); |
2361 | setup_opal_dev(dev, act_steps); | 2205 | setup_opal_dev(dev); |
2362 | ret = next(dev); | 2206 | ret = execute_steps(dev, act_steps, ARRAY_SIZE(act_steps)); |
2363 | mutex_unlock(&dev->dev_lock); | 2207 | mutex_unlock(&dev->dev_lock); |
2364 | return ret; | 2208 | return ret; |
2365 | } | 2209 | } |
@@ -2376,7 +2220,7 @@ bool opal_unlock_from_suspend(struct opal_dev *dev) | |||
2376 | return false; | 2220 | return false; |
2377 | 2221 | ||
2378 | mutex_lock(&dev->dev_lock); | 2222 | mutex_lock(&dev->dev_lock); |
2379 | setup_opal_dev(dev, NULL); | 2223 | setup_opal_dev(dev); |
2380 | 2224 | ||
2381 | list_for_each_entry(suspend, &dev->unlk_lst, node) { | 2225 | list_for_each_entry(suspend, &dev->unlk_lst, node) { |
2382 | dev->tsn = 0; | 2226 | dev->tsn = 0; |
diff --git a/block/t10-pi.c b/block/t10-pi.c index 62aed77d0bb9..0c0094609dd6 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c | |||
@@ -1,24 +1,7 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
1 | /* | 2 | /* |
2 | * t10_pi.c - Functions for generating and verifying T10 Protection | 3 | * t10_pi.c - Functions for generating and verifying T10 Protection |
3 | * Information. | 4 | * Information. |
4 | * | ||
5 | * Copyright (C) 2007, 2008, 2014 Oracle Corporation | ||
6 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version | ||
10 | * 2 as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; see the file COPYING. If not, write to | ||
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
20 | * USA. | ||
21 | * | ||
22 | */ | 5 | */ |
23 | 6 | ||
24 | #include <linux/t10-pi.h> | 7 | #include <linux/t10-pi.h> |
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 0903e0803ec8..92b930cb3b72 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c | |||
@@ -1829,6 +1829,7 @@ static int __init fd_probe_drives(void) | |||
1829 | disk->major = FLOPPY_MAJOR; | 1829 | disk->major = FLOPPY_MAJOR; |
1830 | disk->first_minor = drive; | 1830 | disk->first_minor = drive; |
1831 | disk->fops = &floppy_fops; | 1831 | disk->fops = &floppy_fops; |
1832 | disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
1832 | sprintf(disk->disk_name, "fd%d", drive); | 1833 | sprintf(disk->disk_name, "fd%d", drive); |
1833 | disk->private_data = &unit[drive]; | 1834 | disk->private_data = &unit[drive]; |
1834 | set_capacity(disk, 880*2); | 1835 | set_capacity(disk, 880*2); |
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index b0dbbdfeb33e..c7b5c4671f05 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c | |||
@@ -2028,6 +2028,7 @@ static int __init atari_floppy_init (void) | |||
2028 | unit[i].disk->first_minor = i; | 2028 | unit[i].disk->first_minor = i; |
2029 | sprintf(unit[i].disk->disk_name, "fd%d", i); | 2029 | sprintf(unit[i].disk->disk_name, "fd%d", i); |
2030 | unit[i].disk->fops = &floppy_fops; | 2030 | unit[i].disk->fops = &floppy_fops; |
2031 | unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
2031 | unit[i].disk->private_data = &unit[i]; | 2032 | unit[i].disk->private_data = &unit[i]; |
2032 | set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); | 2033 | set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); |
2033 | add_disk(unit[i].disk); | 2034 | add_disk(unit[i].disk); |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c18586fccb6f..17defbf4f332 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -96,13 +96,8 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) | |||
96 | /* | 96 | /* |
97 | * Must use NOIO because we don't want to recurse back into the | 97 | * Must use NOIO because we don't want to recurse back into the |
98 | * block or filesystem layers from page reclaim. | 98 | * block or filesystem layers from page reclaim. |
99 | * | ||
100 | * Cannot support DAX and highmem, because our ->direct_access | ||
101 | * routine for DAX must return memory that is always addressable. | ||
102 | * If DAX was reworked to use pfns and kmap throughout, this | ||
103 | * restriction might be able to be lifted. | ||
104 | */ | 99 | */ |
105 | gfp_flags = GFP_NOIO | __GFP_ZERO; | 100 | gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM; |
106 | page = alloc_page(gfp_flags); | 101 | page = alloc_page(gfp_flags); |
107 | if (!page) | 102 | if (!page) |
108 | return NULL; | 103 | return NULL; |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f070f7200fc0..549c64df9708 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1317,10 +1317,6 @@ struct bm_extent { | |||
1317 | 1317 | ||
1318 | #define DRBD_MAX_SECTORS_FIXED_BM \ | 1318 | #define DRBD_MAX_SECTORS_FIXED_BM \ |
1319 | ((MD_128MB_SECT - MD_32kB_SECT - MD_4kB_SECT) * (1LL<<(BM_EXT_SHIFT-9))) | 1319 | ((MD_128MB_SECT - MD_32kB_SECT - MD_4kB_SECT) * (1LL<<(BM_EXT_SHIFT-9))) |
1320 | #if !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32 | ||
1321 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_32 | ||
1322 | #define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32 | ||
1323 | #else | ||
1324 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_FIXED_BM | 1320 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_FIXED_BM |
1325 | /* 16 TB in units of sectors */ | 1321 | /* 16 TB in units of sectors */ |
1326 | #if BITS_PER_LONG == 32 | 1322 | #if BITS_PER_LONG == 32 |
@@ -1333,7 +1329,6 @@ struct bm_extent { | |||
1333 | #define DRBD_MAX_SECTORS_FLEX (1UL << 51) | 1329 | #define DRBD_MAX_SECTORS_FLEX (1UL << 51) |
1334 | /* corresponds to (1UL << 38) bits right now. */ | 1330 | /* corresponds to (1UL << 38) bits right now. */ |
1335 | #endif | 1331 | #endif |
1336 | #endif | ||
1337 | 1332 | ||
1338 | /* Estimate max bio size as 256 * PAGE_SIZE, | 1333 | /* Estimate max bio size as 256 * PAGE_SIZE, |
1339 | * so for typical PAGE_SIZE of 4k, that is (1<<20) Byte. | 1334 | * so for typical PAGE_SIZE of 4k, that is (1<<20) Byte. |
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 49f89db0766f..b8998abd86a5 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -4540,6 +4540,7 @@ static int __init do_floppy_init(void) | |||
4540 | disks[drive]->major = FLOPPY_MAJOR; | 4540 | disks[drive]->major = FLOPPY_MAJOR; |
4541 | disks[drive]->first_minor = TOMINOR(drive); | 4541 | disks[drive]->first_minor = TOMINOR(drive); |
4542 | disks[drive]->fops = &floppy_fops; | 4542 | disks[drive]->fops = &floppy_fops; |
4543 | disks[drive]->events = DISK_EVENT_MEDIA_CHANGE; | ||
4543 | sprintf(disks[drive]->disk_name, "fd%d", drive); | 4544 | sprintf(disks[drive]->disk_name, "fd%d", drive); |
4544 | 4545 | ||
4545 | timer_setup(&motor_off_timer[drive], motor_off_callback, 0); | 4546 | timer_setup(&motor_off_timer[drive], motor_off_callback, 0); |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bf1c61cab8eb..102d79575895 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -264,12 +264,20 @@ lo_do_transfer(struct loop_device *lo, int cmd, | |||
264 | return ret; | 264 | return ret; |
265 | } | 265 | } |
266 | 266 | ||
267 | static inline void loop_iov_iter_bvec(struct iov_iter *i, | ||
268 | unsigned int direction, const struct bio_vec *bvec, | ||
269 | unsigned long nr_segs, size_t count) | ||
270 | { | ||
271 | iov_iter_bvec(i, direction, bvec, nr_segs, count); | ||
272 | i->type |= ITER_BVEC_FLAG_NO_REF; | ||
273 | } | ||
274 | |||
267 | static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) | 275 | static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) |
268 | { | 276 | { |
269 | struct iov_iter i; | 277 | struct iov_iter i; |
270 | ssize_t bw; | 278 | ssize_t bw; |
271 | 279 | ||
272 | iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); | 280 | loop_iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); |
273 | 281 | ||
274 | file_start_write(file); | 282 | file_start_write(file); |
275 | bw = vfs_iter_write(file, &i, ppos, 0); | 283 | bw = vfs_iter_write(file, &i, ppos, 0); |
@@ -347,7 +355,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, | |||
347 | ssize_t len; | 355 | ssize_t len; |
348 | 356 | ||
349 | rq_for_each_segment(bvec, rq, iter) { | 357 | rq_for_each_segment(bvec, rq, iter) { |
350 | iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); | 358 | loop_iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); |
351 | len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); | 359 | len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); |
352 | if (len < 0) | 360 | if (len < 0) |
353 | return len; | 361 | return len; |
@@ -388,7 +396,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq, | |||
388 | b.bv_offset = 0; | 396 | b.bv_offset = 0; |
389 | b.bv_len = bvec.bv_len; | 397 | b.bv_len = bvec.bv_len; |
390 | 398 | ||
391 | iov_iter_bvec(&i, READ, &b, 1, b.bv_len); | 399 | loop_iov_iter_bvec(&i, READ, &b, 1, b.bv_len); |
392 | len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); | 400 | len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); |
393 | if (len < 0) { | 401 | if (len < 0) { |
394 | ret = len; | 402 | ret = len; |
@@ -555,7 +563,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, | |||
555 | } | 563 | } |
556 | atomic_set(&cmd->ref, 2); | 564 | atomic_set(&cmd->ref, 2); |
557 | 565 | ||
558 | iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); | 566 | loop_iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); |
559 | iter.iov_offset = offset; | 567 | iter.iov_offset = offset; |
560 | 568 | ||
561 | cmd->iocb.ki_pos = pos; | 569 | cmd->iocb.ki_pos = pos; |
@@ -900,6 +908,24 @@ static int loop_prepare_queue(struct loop_device *lo) | |||
900 | return 0; | 908 | return 0; |
901 | } | 909 | } |
902 | 910 | ||
911 | static void loop_update_rotational(struct loop_device *lo) | ||
912 | { | ||
913 | struct file *file = lo->lo_backing_file; | ||
914 | struct inode *file_inode = file->f_mapping->host; | ||
915 | struct block_device *file_bdev = file_inode->i_sb->s_bdev; | ||
916 | struct request_queue *q = lo->lo_queue; | ||
917 | bool nonrot = true; | ||
918 | |||
919 | /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */ | ||
920 | if (file_bdev) | ||
921 | nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev)); | ||
922 | |||
923 | if (nonrot) | ||
924 | blk_queue_flag_set(QUEUE_FLAG_NONROT, q); | ||
925 | else | ||
926 | blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); | ||
927 | } | ||
928 | |||
903 | static int loop_set_fd(struct loop_device *lo, fmode_t mode, | 929 | static int loop_set_fd(struct loop_device *lo, fmode_t mode, |
904 | struct block_device *bdev, unsigned int arg) | 930 | struct block_device *bdev, unsigned int arg) |
905 | { | 931 | { |
@@ -963,6 +989,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, | |||
963 | if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) | 989 | if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) |
964 | blk_queue_write_cache(lo->lo_queue, true, false); | 990 | blk_queue_write_cache(lo->lo_queue, true, false); |
965 | 991 | ||
992 | loop_update_rotational(lo); | ||
966 | loop_update_dio(lo); | 993 | loop_update_dio(lo); |
967 | set_capacity(lo->lo_disk, size); | 994 | set_capacity(lo->lo_disk, size); |
968 | bd_set_size(bdev, size << 9); | 995 | bd_set_size(bdev, size << 9); |
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 83302ecdc8db..f0105d118056 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -1192,14 +1192,6 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) | |||
1192 | else | 1192 | else |
1193 | clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); | 1193 | clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); |
1194 | 1194 | ||
1195 | #ifdef MTIP_TRIM /* Disabling TRIM support temporarily */ | ||
1196 | /* Demux ID.DRAT & ID.RZAT to determine trim support */ | ||
1197 | if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) | ||
1198 | port->dd->trim_supp = true; | ||
1199 | else | ||
1200 | #endif | ||
1201 | port->dd->trim_supp = false; | ||
1202 | |||
1203 | /* Set the identify buffer as valid. */ | 1195 | /* Set the identify buffer as valid. */ |
1204 | port->identify_valid = 1; | 1196 | port->identify_valid = 1; |
1205 | 1197 | ||
@@ -1387,77 +1379,6 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, | |||
1387 | } | 1379 | } |
1388 | 1380 | ||
1389 | /* | 1381 | /* |
1390 | * Trim unused sectors | ||
1391 | * | ||
1392 | * @dd pointer to driver_data structure | ||
1393 | * @lba starting lba | ||
1394 | * @len # of 512b sectors to trim | ||
1395 | */ | ||
1396 | static blk_status_t mtip_send_trim(struct driver_data *dd, unsigned int lba, | ||
1397 | unsigned int len) | ||
1398 | { | ||
1399 | u64 tlba, tlen, sect_left; | ||
1400 | struct mtip_trim_entry *buf; | ||
1401 | dma_addr_t dma_addr; | ||
1402 | struct host_to_dev_fis fis; | ||
1403 | blk_status_t ret = BLK_STS_OK; | ||
1404 | int i; | ||
1405 | |||
1406 | if (!len || dd->trim_supp == false) | ||
1407 | return BLK_STS_IOERR; | ||
1408 | |||
1409 | /* Trim request too big */ | ||
1410 | WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); | ||
1411 | |||
1412 | /* Trim request not aligned on 4k boundary */ | ||
1413 | WARN_ON(len % 8 != 0); | ||
1414 | |||
1415 | /* Warn if vu_trim structure is too big */ | ||
1416 | WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE); | ||
1417 | |||
1418 | /* Allocate a DMA buffer for the trim structure */ | ||
1419 | buf = dma_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, | ||
1420 | GFP_KERNEL); | ||
1421 | if (!buf) | ||
1422 | return BLK_STS_RESOURCE; | ||
1423 | memset(buf, 0, ATA_SECT_SIZE); | ||
1424 | |||
1425 | for (i = 0, sect_left = len, tlba = lba; | ||
1426 | i < MTIP_MAX_TRIM_ENTRIES && sect_left; | ||
1427 | i++) { | ||
1428 | tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? | ||
1429 | MTIP_MAX_TRIM_ENTRY_LEN : | ||
1430 | sect_left); | ||
1431 | buf[i].lba = cpu_to_le32(tlba); | ||
1432 | buf[i].range = cpu_to_le16(tlen); | ||
1433 | tlba += tlen; | ||
1434 | sect_left -= tlen; | ||
1435 | } | ||
1436 | WARN_ON(sect_left != 0); | ||
1437 | |||
1438 | /* Build the fis */ | ||
1439 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1440 | fis.type = 0x27; | ||
1441 | fis.opts = 1 << 7; | ||
1442 | fis.command = 0xfb; | ||
1443 | fis.features = 0x60; | ||
1444 | fis.sect_count = 1; | ||
1445 | fis.device = ATA_DEVICE_OBS; | ||
1446 | |||
1447 | if (mtip_exec_internal_command(dd->port, | ||
1448 | &fis, | ||
1449 | 5, | ||
1450 | dma_addr, | ||
1451 | ATA_SECT_SIZE, | ||
1452 | 0, | ||
1453 | MTIP_TRIM_TIMEOUT_MS) < 0) | ||
1454 | ret = BLK_STS_IOERR; | ||
1455 | |||
1456 | dma_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); | ||
1457 | return ret; | ||
1458 | } | ||
1459 | |||
1460 | /* | ||
1461 | * Get the drive capacity. | 1382 | * Get the drive capacity. |
1462 | * | 1383 | * |
1463 | * @dd Pointer to the device data structure. | 1384 | * @dd Pointer to the device data structure. |
@@ -3590,8 +3511,6 @@ static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
3590 | 3511 | ||
3591 | blk_mq_start_request(rq); | 3512 | blk_mq_start_request(rq); |
3592 | 3513 | ||
3593 | if (req_op(rq) == REQ_OP_DISCARD) | ||
3594 | return mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); | ||
3595 | mtip_hw_submit_io(dd, rq, cmd, hctx); | 3514 | mtip_hw_submit_io(dd, rq, cmd, hctx); |
3596 | return BLK_STS_OK; | 3515 | return BLK_STS_OK; |
3597 | } | 3516 | } |
@@ -3769,14 +3688,6 @@ skip_create_disk: | |||
3769 | blk_queue_max_segment_size(dd->queue, 0x400000); | 3688 | blk_queue_max_segment_size(dd->queue, 0x400000); |
3770 | blk_queue_io_min(dd->queue, 4096); | 3689 | blk_queue_io_min(dd->queue, 4096); |
3771 | 3690 | ||
3772 | /* Signal trim support */ | ||
3773 | if (dd->trim_supp == true) { | ||
3774 | blk_queue_flag_set(QUEUE_FLAG_DISCARD, dd->queue); | ||
3775 | dd->queue->limits.discard_granularity = 4096; | ||
3776 | blk_queue_max_discard_sectors(dd->queue, | ||
3777 | MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); | ||
3778 | } | ||
3779 | |||
3780 | /* Set the capacity of the device in 512 byte sectors. */ | 3691 | /* Set the capacity of the device in 512 byte sectors. */ |
3781 | if (!(mtip_hw_get_capacity(dd, &capacity))) { | 3692 | if (!(mtip_hw_get_capacity(dd, &capacity))) { |
3782 | dev_warn(&dd->pdev->dev, | 3693 | dev_warn(&dd->pdev->dev, |
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index abce25f27f57..91c1cb5b1532 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h | |||
@@ -193,21 +193,6 @@ struct mtip_work { | |||
193 | mtip_workq_sdbfx(w->port, group, w->completed); \ | 193 | mtip_workq_sdbfx(w->port, group, w->completed); \ |
194 | } | 194 | } |
195 | 195 | ||
196 | #define MTIP_TRIM_TIMEOUT_MS 240000 | ||
197 | #define MTIP_MAX_TRIM_ENTRIES 8 | ||
198 | #define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 | ||
199 | |||
200 | struct mtip_trim_entry { | ||
201 | __le32 lba; /* starting lba of region */ | ||
202 | __le16 rsvd; /* unused */ | ||
203 | __le16 range; /* # of 512b blocks to trim */ | ||
204 | } __packed; | ||
205 | |||
206 | struct mtip_trim { | ||
207 | /* Array of regions to trim */ | ||
208 | struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES]; | ||
209 | } __packed; | ||
210 | |||
211 | /* Register Frame Information Structure (FIS), host to device. */ | 196 | /* Register Frame Information Structure (FIS), host to device. */ |
212 | struct host_to_dev_fis { | 197 | struct host_to_dev_fis { |
213 | /* | 198 | /* |
@@ -474,8 +459,6 @@ struct driver_data { | |||
474 | 459 | ||
475 | struct dentry *dfs_node; | 460 | struct dentry *dfs_node; |
476 | 461 | ||
477 | bool trim_supp; /* flag indicating trim support */ | ||
478 | |||
479 | bool sr; | 462 | bool sr; |
480 | 463 | ||
481 | int numa_node; /* NUMA support */ | 464 | int numa_node; /* NUMA support */ |
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 6d415b20fb70..001dbdcbf355 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c | |||
@@ -343,6 +343,7 @@ static void pcd_init_units(void) | |||
343 | strcpy(disk->disk_name, cd->name); /* umm... */ | 343 | strcpy(disk->disk_name, cd->name); /* umm... */ |
344 | disk->fops = &pcd_bdops; | 344 | disk->fops = &pcd_bdops; |
345 | disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; | 345 | disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; |
346 | disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
346 | } | 347 | } |
347 | } | 348 | } |
348 | 349 | ||
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 0ff9b12d0e35..6f9ad3fc716f 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c | |||
@@ -897,6 +897,7 @@ static void pd_probe_drive(struct pd_unit *disk) | |||
897 | p->fops = &pd_fops; | 897 | p->fops = &pd_fops; |
898 | p->major = major; | 898 | p->major = major; |
899 | p->first_minor = (disk - pd) << PD_BITS; | 899 | p->first_minor = (disk - pd) << PD_BITS; |
900 | p->events = DISK_EVENT_MEDIA_CHANGE; | ||
900 | disk->gd = p; | 901 | disk->gd = p; |
901 | p->private_data = disk; | 902 | p->private_data = disk; |
902 | 903 | ||
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 35e6e271b219..1e9c50a7256c 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c | |||
@@ -319,6 +319,7 @@ static void __init pf_init_units(void) | |||
319 | disk->first_minor = unit; | 319 | disk->first_minor = unit; |
320 | strcpy(disk->disk_name, pf->name); | 320 | strcpy(disk->disk_name, pf->name); |
321 | disk->fops = &pf_fops; | 321 | disk->fops = &pf_fops; |
322 | disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
322 | if (!(*drives[unit])[D_PRT]) | 323 | if (!(*drives[unit])[D_PRT]) |
323 | pf_drive_count++; | 324 | pf_drive_count++; |
324 | } | 325 | } |
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index f5a71023f76c..024060165afa 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
@@ -2761,7 +2761,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) | |||
2761 | 2761 | ||
2762 | /* inherit events of the host device */ | 2762 | /* inherit events of the host device */ |
2763 | disk->events = pd->bdev->bd_disk->events; | 2763 | disk->events = pd->bdev->bd_disk->events; |
2764 | disk->async_events = pd->bdev->bd_disk->async_events; | ||
2765 | 2764 | ||
2766 | add_disk(disk); | 2765 | add_disk(disk); |
2767 | 2766 | ||
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 4e1d9b31f60c..cc61c5ce3ad5 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c | |||
@@ -102,7 +102,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, | |||
102 | 102 | ||
103 | rq_for_each_segment(bvec, req, iter) { | 103 | rq_for_each_segment(bvec, req, iter) { |
104 | unsigned long flags; | 104 | unsigned long flags; |
105 | dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %lu\n", | 105 | dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %llu\n", |
106 | __func__, __LINE__, i, bio_sectors(iter.bio), | 106 | __func__, __LINE__, i, bio_sectors(iter.bio), |
107 | iter.bio->bi_iter.bi_sector); | 107 | iter.bio->bi_iter.bi_sector); |
108 | 108 | ||
@@ -496,7 +496,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) | |||
496 | dev->regions[dev->region_idx].size*priv->blocking_factor); | 496 | dev->regions[dev->region_idx].size*priv->blocking_factor); |
497 | 497 | ||
498 | dev_info(&dev->sbd.core, | 498 | dev_info(&dev->sbd.core, |
499 | "%s is a %s (%llu MiB total, %lu MiB for OtherOS)\n", | 499 | "%s is a %s (%llu MiB total, %llu MiB for OtherOS)\n", |
500 | gendisk->disk_name, priv->model, priv->raw_capacity >> 11, | 500 | gendisk->disk_name, priv->model, priv->raw_capacity >> 11, |
501 | get_capacity(gendisk) >> 11); | 501 | get_capacity(gendisk) >> 11); |
502 | 502 | ||
diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 3fa6fcc34790..67b5ec281c6d 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c | |||
@@ -862,6 +862,7 @@ static int swim_floppy_init(struct swim_priv *swd) | |||
862 | swd->unit[drive].disk->first_minor = drive; | 862 | swd->unit[drive].disk->first_minor = drive; |
863 | sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive); | 863 | sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive); |
864 | swd->unit[drive].disk->fops = &floppy_fops; | 864 | swd->unit[drive].disk->fops = &floppy_fops; |
865 | swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
865 | swd->unit[drive].disk->private_data = &swd->unit[drive]; | 866 | swd->unit[drive].disk->private_data = &swd->unit[drive]; |
866 | set_capacity(swd->unit[drive].disk, 2880); | 867 | set_capacity(swd->unit[drive].disk, 2880); |
867 | add_disk(swd->unit[drive].disk); | 868 | add_disk(swd->unit[drive].disk); |
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 1e2ae90d7715..cf42729c788e 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c | |||
@@ -1216,6 +1216,7 @@ static int swim3_attach(struct macio_dev *mdev, | |||
1216 | disk->first_minor = floppy_count; | 1216 | disk->first_minor = floppy_count; |
1217 | disk->fops = &floppy_fops; | 1217 | disk->fops = &floppy_fops; |
1218 | disk->private_data = fs; | 1218 | disk->private_data = fs; |
1219 | disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
1219 | disk->flags |= GENHD_FL_REMOVABLE; | 1220 | disk->flags |= GENHD_FL_REMOVABLE; |
1220 | sprintf(disk->disk_name, "fd%d", floppy_count); | 1221 | sprintf(disk->disk_name, "fd%d", floppy_count); |
1221 | set_capacity(disk, 2880); | 1222 | set_capacity(disk, 2880); |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2a7ca4a1e6f7..f1d90cd3dc47 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -693,7 +693,8 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set) | |||
693 | { | 693 | { |
694 | struct virtio_blk *vblk = set->driver_data; | 694 | struct virtio_blk *vblk = set->driver_data; |
695 | 695 | ||
696 | return blk_mq_virtio_map_queues(&set->map[0], vblk->vdev, 0); | 696 | return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], |
697 | vblk->vdev, 0); | ||
697 | } | 698 | } |
698 | 699 | ||
699 | #ifdef CONFIG_VIRTIO_BLK_SCSI | 700 | #ifdef CONFIG_VIRTIO_BLK_SCSI |
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 32a21b8d1d85..464c9092bc8b 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c | |||
@@ -1032,6 +1032,7 @@ static int ace_setup(struct ace_device *ace) | |||
1032 | ace->gd->major = ace_major; | 1032 | ace->gd->major = ace_major; |
1033 | ace->gd->first_minor = ace->id * ACE_NUM_MINORS; | 1033 | ace->gd->first_minor = ace->id * ACE_NUM_MINORS; |
1034 | ace->gd->fops = &ace_fops; | 1034 | ace->gd->fops = &ace_fops; |
1035 | ace->gd->events = DISK_EVENT_MEDIA_CHANGE; | ||
1035 | ace->gd->queue = ace->queue; | 1036 | ace->gd->queue = ace->queue; |
1036 | ace->gd->private_data = ace; | 1037 | ace->gd->private_data = ace; |
1037 | snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a'); | 1038 | snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a'); |
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index f8b7345fe1cb..5cf3bade0d57 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c | |||
@@ -786,6 +786,7 @@ static int probe_gdrom(struct platform_device *devptr) | |||
786 | goto probe_fail_cdrom_register; | 786 | goto probe_fail_cdrom_register; |
787 | } | 787 | } |
788 | gd.disk->fops = &gdrom_bdops; | 788 | gd.disk->fops = &gdrom_bdops; |
789 | gd.disk->events = DISK_EVENT_MEDIA_CHANGE; | ||
789 | /* latch on to the interrupt */ | 790 | /* latch on to the interrupt */ |
790 | err = gdrom_set_interrupt_handlers(); | 791 | err = gdrom_set_interrupt_handlers(); |
791 | if (err) | 792 | if (err) |
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 1f03884a6808..3b15adc6ce98 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c | |||
@@ -1797,6 +1797,7 @@ static int ide_cd_probe(ide_drive_t *drive) | |||
1797 | ide_cd_read_toc(drive); | 1797 | ide_cd_read_toc(drive); |
1798 | g->fops = &idecd_ops; | 1798 | g->fops = &idecd_ops; |
1799 | g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; | 1799 | g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; |
1800 | g->events = DISK_EVENT_MEDIA_CHANGE; | ||
1800 | device_add_disk(&drive->gendev, g, NULL); | 1801 | device_add_disk(&drive->gendev, g, NULL); |
1801 | return 0; | 1802 | return 0; |
1802 | 1803 | ||
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 4a6e1a413ead..46f2df288c6a 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c | |||
@@ -82,8 +82,9 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr) | |||
82 | 82 | ||
83 | /* | 83 | /* |
84 | * ide-cd always generates media changed event if media is missing, which | 84 | * ide-cd always generates media changed event if media is missing, which |
85 | * makes it impossible to use for proper event reporting, so disk->events | 85 | * makes it impossible to use for proper event reporting, so |
86 | * is cleared to 0 and the following function is used only to trigger | 86 | * DISK_EVENT_FLAG_UEVENT is cleared in disk->event_flags |
87 | * and the following function is used only to trigger | ||
87 | * revalidation and never propagated to userland. | 88 | * revalidation and never propagated to userland. |
88 | */ | 89 | */ |
89 | unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *cdi, | 90 | unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *cdi, |
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 04e008e8f6f9..f233b34ea0c0 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c | |||
@@ -299,8 +299,9 @@ static unsigned int ide_gd_check_events(struct gendisk *disk, | |||
299 | /* | 299 | /* |
300 | * The following is used to force revalidation on the first open on | 300 | * The following is used to force revalidation on the first open on |
301 | * removeable devices, and never gets reported to userland as | 301 | * removeable devices, and never gets reported to userland as |
302 | * genhd->events is 0. This is intended as removeable ide disk | 302 | * DISK_EVENT_FLAG_UEVENT isn't set in genhd->event_flags. |
303 | * can't really detect MEDIA_CHANGE events. | 303 | * This is intended as removable ide disk can't really detect |
304 | * MEDIA_CHANGE events. | ||
304 | */ | 305 | */ |
305 | ret = drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED; | 306 | ret = drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED; |
306 | drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED; | 307 | drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED; |
@@ -416,6 +417,7 @@ static int ide_gd_probe(ide_drive_t *drive) | |||
416 | if (drive->dev_flags & IDE_DFLAG_REMOVABLE) | 417 | if (drive->dev_flags & IDE_DFLAG_REMOVABLE) |
417 | g->flags = GENHD_FL_REMOVABLE; | 418 | g->flags = GENHD_FL_REMOVABLE; |
418 | g->fops = &ide_gd_ops; | 419 | g->fops = &ide_gd_ops; |
420 | g->events = DISK_EVENT_MEDIA_CHANGE; | ||
419 | device_add_disk(&drive->gendev, g, NULL); | 421 | device_add_disk(&drive->gendev, g, NULL); |
420 | return 0; | 422 | return 0; |
421 | 423 | ||
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 5002838ea476..f8986effcb50 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
@@ -327,10 +327,11 @@ static int bch_allocator_thread(void *arg) | |||
327 | * possibly issue discards to them, then we add the bucket to | 327 | * possibly issue discards to them, then we add the bucket to |
328 | * the free list: | 328 | * the free list: |
329 | */ | 329 | */ |
330 | while (!fifo_empty(&ca->free_inc)) { | 330 | while (1) { |
331 | long bucket; | 331 | long bucket; |
332 | 332 | ||
333 | fifo_pop(&ca->free_inc, bucket); | 333 | if (!fifo_pop(&ca->free_inc, bucket)) |
334 | break; | ||
334 | 335 | ||
335 | if (ca->discard) { | 336 | if (ca->discard) { |
336 | mutex_unlock(&ca->set->bucket_lock); | 337 | mutex_unlock(&ca->set->bucket_lock); |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 64def336f053..773f5fdad25f 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -429,14 +429,14 @@ static void do_btree_node_write(struct btree *b) | |||
429 | bset_sector_offset(&b->keys, i)); | 429 | bset_sector_offset(&b->keys, i)); |
430 | 430 | ||
431 | if (!bch_bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) { | 431 | if (!bch_bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) { |
432 | int j; | ||
433 | struct bio_vec *bv; | 432 | struct bio_vec *bv; |
434 | void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); | 433 | void *addr = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); |
435 | struct bvec_iter_all iter_all; | 434 | struct bvec_iter_all iter_all; |
436 | 435 | ||
437 | bio_for_each_segment_all(bv, b->bio, j, iter_all) | 436 | bio_for_each_segment_all(bv, b->bio, iter_all) { |
438 | memcpy(page_address(bv->bv_page), | 437 | memcpy(page_address(bv->bv_page), addr, PAGE_SIZE); |
439 | base + j * PAGE_SIZE, PAGE_SIZE); | 438 | addr += PAGE_SIZE; |
439 | } | ||
440 | 440 | ||
441 | bch_submit_bbio(b->bio, b->c, &k.key, 0); | 441 | bch_submit_bbio(b->bio, b->c, &k.key, 0); |
442 | 442 | ||
@@ -1476,11 +1476,11 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1476 | 1476 | ||
1477 | out_nocoalesce: | 1477 | out_nocoalesce: |
1478 | closure_sync(&cl); | 1478 | closure_sync(&cl); |
1479 | bch_keylist_free(&keylist); | ||
1480 | 1479 | ||
1481 | while ((k = bch_keylist_pop(&keylist))) | 1480 | while ((k = bch_keylist_pop(&keylist))) |
1482 | if (!bkey_cmp(k, &ZERO_KEY)) | 1481 | if (!bkey_cmp(k, &ZERO_KEY)) |
1483 | atomic_dec(&b->c->prio_blocked); | 1482 | atomic_dec(&b->c->prio_blocked); |
1483 | bch_keylist_free(&keylist); | ||
1484 | 1484 | ||
1485 | for (i = 0; i < nodes; i++) | 1485 | for (i = 0; i < nodes; i++) |
1486 | if (!IS_ERR_OR_NULL(new_nodes[i])) { | 1486 | if (!IS_ERR_OR_NULL(new_nodes[i])) { |
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index b2fd412715b1..12dae9348147 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -147,7 +147,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) | |||
147 | { | 147 | { |
148 | #define read_bucket(b) \ | 148 | #define read_bucket(b) \ |
149 | ({ \ | 149 | ({ \ |
150 | int ret = journal_read_bucket(ca, list, b); \ | 150 | ret = journal_read_bucket(ca, list, b); \ |
151 | __set_bit(b, bitmap); \ | 151 | __set_bit(b, bitmap); \ |
152 | if (ret < 0) \ | 152 | if (ret < 0) \ |
153 | return ret; \ | 153 | return ret; \ |
@@ -156,6 +156,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) | |||
156 | 156 | ||
157 | struct cache *ca; | 157 | struct cache *ca; |
158 | unsigned int iter; | 158 | unsigned int iter; |
159 | int ret = 0; | ||
159 | 160 | ||
160 | for_each_cache(ca, c, iter) { | 161 | for_each_cache(ca, c, iter) { |
161 | struct journal_device *ja = &ca->journal; | 162 | struct journal_device *ja = &ca->journal; |
@@ -267,7 +268,7 @@ bsearch: | |||
267 | struct journal_replay, | 268 | struct journal_replay, |
268 | list)->j.seq; | 269 | list)->j.seq; |
269 | 270 | ||
270 | return 0; | 271 | return ret; |
271 | #undef read_bucket | 272 | #undef read_bucket |
272 | } | 273 | } |
273 | 274 | ||
@@ -317,6 +318,18 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) | |||
317 | } | 318 | } |
318 | } | 319 | } |
319 | 320 | ||
321 | static bool is_discard_enabled(struct cache_set *s) | ||
322 | { | ||
323 | struct cache *ca; | ||
324 | unsigned int i; | ||
325 | |||
326 | for_each_cache(ca, s, i) | ||
327 | if (ca->discard) | ||
328 | return true; | ||
329 | |||
330 | return false; | ||
331 | } | ||
332 | |||
320 | int bch_journal_replay(struct cache_set *s, struct list_head *list) | 333 | int bch_journal_replay(struct cache_set *s, struct list_head *list) |
321 | { | 334 | { |
322 | int ret = 0, keys = 0, entries = 0; | 335 | int ret = 0, keys = 0, entries = 0; |
@@ -330,9 +343,17 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) | |||
330 | list_for_each_entry(i, list, list) { | 343 | list_for_each_entry(i, list, list) { |
331 | BUG_ON(i->pin && atomic_read(i->pin) != 1); | 344 | BUG_ON(i->pin && atomic_read(i->pin) != 1); |
332 | 345 | ||
333 | cache_set_err_on(n != i->j.seq, s, | 346 | if (n != i->j.seq) { |
334 | "bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", | 347 | if (n == start && is_discard_enabled(s)) |
335 | n, i->j.seq - 1, start, end); | 348 | pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)", |
349 | n, i->j.seq - 1, start, end); | ||
350 | else { | ||
351 | pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", | ||
352 | n, i->j.seq - 1, start, end); | ||
353 | ret = -EIO; | ||
354 | goto err; | ||
355 | } | ||
356 | } | ||
336 | 357 | ||
337 | for (k = i->j.start; | 358 | for (k = i->j.start; |
338 | k < bset_bkey_last(&i->j); | 359 | k < bset_bkey_last(&i->j); |
@@ -540,11 +561,11 @@ static void journal_reclaim(struct cache_set *c) | |||
540 | ca->sb.nr_this_dev); | 561 | ca->sb.nr_this_dev); |
541 | } | 562 | } |
542 | 563 | ||
543 | bkey_init(k); | 564 | if (n) { |
544 | SET_KEY_PTRS(k, n); | 565 | bkey_init(k); |
545 | 566 | SET_KEY_PTRS(k, n); | |
546 | if (n) | ||
547 | c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; | 567 | c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; |
568 | } | ||
548 | out: | 569 | out: |
549 | if (!journal_full(&c->journal)) | 570 | if (!journal_full(&c->journal)) |
550 | __closure_wake_up(&c->journal.wait); | 571 | __closure_wake_up(&c->journal.wait); |
@@ -671,6 +692,9 @@ static void journal_write_unlocked(struct closure *cl) | |||
671 | ca->journal.seq[ca->journal.cur_idx] = w->data->seq; | 692 | ca->journal.seq[ca->journal.cur_idx] = w->data->seq; |
672 | } | 693 | } |
673 | 694 | ||
695 | /* If KEY_PTRS(k) == 0, this jset gets lost in air */ | ||
696 | BUG_ON(i == 0); | ||
697 | |||
674 | atomic_dec_bug(&fifo_back(&c->journal.pin)); | 698 | atomic_dec_bug(&fifo_back(&c->journal.pin)); |
675 | bch_journal_next(&c->journal); | 699 | bch_journal_next(&c->journal); |
676 | journal_reclaim(c); | 700 | journal_reclaim(c); |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index f101bfe8657a..41adcd1546f1 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -329,12 +329,13 @@ void bch_data_insert(struct closure *cl) | |||
329 | bch_data_insert_start(cl); | 329 | bch_data_insert_start(cl); |
330 | } | 330 | } |
331 | 331 | ||
332 | /* Congested? */ | 332 | /* |
333 | 333 | * Congested? Return 0 (not congested) or the limit (in sectors) | |
334 | unsigned int bch_get_congested(struct cache_set *c) | 334 | * beyond which we should bypass the cache due to congestion. |
335 | */ | ||
336 | unsigned int bch_get_congested(const struct cache_set *c) | ||
335 | { | 337 | { |
336 | int i; | 338 | int i; |
337 | long rand; | ||
338 | 339 | ||
339 | if (!c->congested_read_threshold_us && | 340 | if (!c->congested_read_threshold_us && |
340 | !c->congested_write_threshold_us) | 341 | !c->congested_write_threshold_us) |
@@ -353,8 +354,7 @@ unsigned int bch_get_congested(struct cache_set *c) | |||
353 | if (i > 0) | 354 | if (i > 0) |
354 | i = fract_exp_two(i, 6); | 355 | i = fract_exp_two(i, 6); |
355 | 356 | ||
356 | rand = get_random_int(); | 357 | i -= hweight32(get_random_u32()); |
357 | i -= bitmap_weight(&rand, BITS_PER_LONG); | ||
358 | 358 | ||
359 | return i > 0 ? i : 1; | 359 | return i > 0 ? i : 1; |
360 | } | 360 | } |
@@ -376,7 +376,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) | |||
376 | { | 376 | { |
377 | struct cache_set *c = dc->disk.c; | 377 | struct cache_set *c = dc->disk.c; |
378 | unsigned int mode = cache_mode(dc); | 378 | unsigned int mode = cache_mode(dc); |
379 | unsigned int sectors, congested = bch_get_congested(c); | 379 | unsigned int sectors, congested; |
380 | struct task_struct *task = current; | 380 | struct task_struct *task = current; |
381 | struct io *i; | 381 | struct io *i; |
382 | 382 | ||
@@ -412,6 +412,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) | |||
412 | goto rescale; | 412 | goto rescale; |
413 | } | 413 | } |
414 | 414 | ||
415 | congested = bch_get_congested(c); | ||
415 | if (!congested && !dc->sequential_cutoff) | 416 | if (!congested && !dc->sequential_cutoff) |
416 | goto rescale; | 417 | goto rescale; |
417 | 418 | ||
@@ -706,14 +707,14 @@ static void search_free(struct closure *cl) | |||
706 | { | 707 | { |
707 | struct search *s = container_of(cl, struct search, cl); | 708 | struct search *s = container_of(cl, struct search, cl); |
708 | 709 | ||
709 | atomic_dec(&s->d->c->search_inflight); | 710 | atomic_dec(&s->iop.c->search_inflight); |
710 | 711 | ||
711 | if (s->iop.bio) | 712 | if (s->iop.bio) |
712 | bio_put(s->iop.bio); | 713 | bio_put(s->iop.bio); |
713 | 714 | ||
714 | bio_complete(s); | 715 | bio_complete(s); |
715 | closure_debug_destroy(cl); | 716 | closure_debug_destroy(cl); |
716 | mempool_free(s, &s->d->c->search); | 717 | mempool_free(s, &s->iop.c->search); |
717 | } | 718 | } |
718 | 719 | ||
719 | static inline struct search *search_alloc(struct bio *bio, | 720 | static inline struct search *search_alloc(struct bio *bio, |
@@ -756,13 +757,13 @@ static void cached_dev_bio_complete(struct closure *cl) | |||
756 | struct search *s = container_of(cl, struct search, cl); | 757 | struct search *s = container_of(cl, struct search, cl); |
757 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); | 758 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); |
758 | 759 | ||
759 | search_free(cl); | ||
760 | cached_dev_put(dc); | 760 | cached_dev_put(dc); |
761 | search_free(cl); | ||
761 | } | 762 | } |
762 | 763 | ||
763 | /* Process reads */ | 764 | /* Process reads */ |
764 | 765 | ||
765 | static void cached_dev_cache_miss_done(struct closure *cl) | 766 | static void cached_dev_read_error_done(struct closure *cl) |
766 | { | 767 | { |
767 | struct search *s = container_of(cl, struct search, cl); | 768 | struct search *s = container_of(cl, struct search, cl); |
768 | 769 | ||
@@ -800,7 +801,22 @@ static void cached_dev_read_error(struct closure *cl) | |||
800 | closure_bio_submit(s->iop.c, bio, cl); | 801 | closure_bio_submit(s->iop.c, bio, cl); |
801 | } | 802 | } |
802 | 803 | ||
803 | continue_at(cl, cached_dev_cache_miss_done, NULL); | 804 | continue_at(cl, cached_dev_read_error_done, NULL); |
805 | } | ||
806 | |||
807 | static void cached_dev_cache_miss_done(struct closure *cl) | ||
808 | { | ||
809 | struct search *s = container_of(cl, struct search, cl); | ||
810 | struct bcache_device *d = s->d; | ||
811 | |||
812 | if (s->iop.replace_collision) | ||
813 | bch_mark_cache_miss_collision(s->iop.c, s->d); | ||
814 | |||
815 | if (s->iop.bio) | ||
816 | bio_free_pages(s->iop.bio); | ||
817 | |||
818 | cached_dev_bio_complete(cl); | ||
819 | closure_put(&d->cl); | ||
804 | } | 820 | } |
805 | 821 | ||
806 | static void cached_dev_read_done(struct closure *cl) | 822 | static void cached_dev_read_done(struct closure *cl) |
@@ -833,6 +849,7 @@ static void cached_dev_read_done(struct closure *cl) | |||
833 | if (verify(dc) && s->recoverable && !s->read_dirty_data) | 849 | if (verify(dc) && s->recoverable && !s->read_dirty_data) |
834 | bch_data_verify(dc, s->orig_bio); | 850 | bch_data_verify(dc, s->orig_bio); |
835 | 851 | ||
852 | closure_get(&dc->disk.cl); | ||
836 | bio_complete(s); | 853 | bio_complete(s); |
837 | 854 | ||
838 | if (s->iop.bio && | 855 | if (s->iop.bio && |
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 721bf336ed1a..c64dbd7a91aa 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h | |||
@@ -33,7 +33,7 @@ struct data_insert_op { | |||
33 | BKEY_PADDED(replace_key); | 33 | BKEY_PADDED(replace_key); |
34 | }; | 34 | }; |
35 | 35 | ||
36 | unsigned int bch_get_congested(struct cache_set *c); | 36 | unsigned int bch_get_congested(const struct cache_set *c); |
37 | void bch_data_insert(struct closure *cl); | 37 | void bch_data_insert(struct closure *cl); |
38 | 38 | ||
39 | void bch_cached_dev_request_init(struct cached_dev *dc); | 39 | void bch_cached_dev_request_init(struct cached_dev *dc); |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a697a3a923cd..1b63ac876169 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -662,6 +662,11 @@ static const struct block_device_operations bcache_ops = { | |||
662 | void bcache_device_stop(struct bcache_device *d) | 662 | void bcache_device_stop(struct bcache_device *d) |
663 | { | 663 | { |
664 | if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags)) | 664 | if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags)) |
665 | /* | ||
666 | * closure_fn set to | ||
667 | * - cached device: cached_dev_flush() | ||
668 | * - flash dev: flash_dev_flush() | ||
669 | */ | ||
665 | closure_queue(&d->cl); | 670 | closure_queue(&d->cl); |
666 | } | 671 | } |
667 | 672 | ||
@@ -906,21 +911,18 @@ static int cached_dev_status_update(void *arg) | |||
906 | void bch_cached_dev_run(struct cached_dev *dc) | 911 | void bch_cached_dev_run(struct cached_dev *dc) |
907 | { | 912 | { |
908 | struct bcache_device *d = &dc->disk; | 913 | struct bcache_device *d = &dc->disk; |
909 | char buf[SB_LABEL_SIZE + 1]; | 914 | char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL); |
910 | char *env[] = { | 915 | char *env[] = { |
911 | "DRIVER=bcache", | 916 | "DRIVER=bcache", |
912 | kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), | 917 | kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), |
913 | NULL, | 918 | kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf ? : ""), |
914 | NULL, | 919 | NULL, |
915 | }; | 920 | }; |
916 | 921 | ||
917 | memcpy(buf, dc->sb.label, SB_LABEL_SIZE); | ||
918 | buf[SB_LABEL_SIZE] = '\0'; | ||
919 | env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); | ||
920 | |||
921 | if (atomic_xchg(&dc->running, 1)) { | 922 | if (atomic_xchg(&dc->running, 1)) { |
922 | kfree(env[1]); | 923 | kfree(env[1]); |
923 | kfree(env[2]); | 924 | kfree(env[2]); |
925 | kfree(buf); | ||
924 | return; | 926 | return; |
925 | } | 927 | } |
926 | 928 | ||
@@ -944,6 +946,7 @@ void bch_cached_dev_run(struct cached_dev *dc) | |||
944 | kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); | 946 | kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); |
945 | kfree(env[1]); | 947 | kfree(env[1]); |
946 | kfree(env[2]); | 948 | kfree(env[2]); |
949 | kfree(buf); | ||
947 | 950 | ||
948 | if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || | 951 | if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || |
949 | sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) | 952 | sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) |
@@ -1174,6 +1177,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, | |||
1174 | return 0; | 1177 | return 0; |
1175 | } | 1178 | } |
1176 | 1179 | ||
1180 | /* when dc->disk.kobj released */ | ||
1177 | void bch_cached_dev_release(struct kobject *kobj) | 1181 | void bch_cached_dev_release(struct kobject *kobj) |
1178 | { | 1182 | { |
1179 | struct cached_dev *dc = container_of(kobj, struct cached_dev, | 1183 | struct cached_dev *dc = container_of(kobj, struct cached_dev, |
@@ -1280,7 +1284,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) | |||
1280 | 1284 | ||
1281 | /* Cached device - bcache superblock */ | 1285 | /* Cached device - bcache superblock */ |
1282 | 1286 | ||
1283 | static void register_bdev(struct cache_sb *sb, struct page *sb_page, | 1287 | static int register_bdev(struct cache_sb *sb, struct page *sb_page, |
1284 | struct block_device *bdev, | 1288 | struct block_device *bdev, |
1285 | struct cached_dev *dc) | 1289 | struct cached_dev *dc) |
1286 | { | 1290 | { |
@@ -1318,14 +1322,16 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, | |||
1318 | BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) | 1322 | BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) |
1319 | bch_cached_dev_run(dc); | 1323 | bch_cached_dev_run(dc); |
1320 | 1324 | ||
1321 | return; | 1325 | return 0; |
1322 | err: | 1326 | err: |
1323 | pr_notice("error %s: %s", dc->backing_dev_name, err); | 1327 | pr_notice("error %s: %s", dc->backing_dev_name, err); |
1324 | bcache_device_stop(&dc->disk); | 1328 | bcache_device_stop(&dc->disk); |
1329 | return -EIO; | ||
1325 | } | 1330 | } |
1326 | 1331 | ||
1327 | /* Flash only volumes */ | 1332 | /* Flash only volumes */ |
1328 | 1333 | ||
1334 | /* When d->kobj released */ | ||
1329 | void bch_flash_dev_release(struct kobject *kobj) | 1335 | void bch_flash_dev_release(struct kobject *kobj) |
1330 | { | 1336 | { |
1331 | struct bcache_device *d = container_of(kobj, struct bcache_device, | 1337 | struct bcache_device *d = container_of(kobj, struct bcache_device, |
@@ -1496,6 +1502,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) | |||
1496 | return true; | 1502 | return true; |
1497 | } | 1503 | } |
1498 | 1504 | ||
1505 | /* When c->kobj released */ | ||
1499 | void bch_cache_set_release(struct kobject *kobj) | 1506 | void bch_cache_set_release(struct kobject *kobj) |
1500 | { | 1507 | { |
1501 | struct cache_set *c = container_of(kobj, struct cache_set, kobj); | 1508 | struct cache_set *c = container_of(kobj, struct cache_set, kobj); |
@@ -1516,6 +1523,7 @@ static void cache_set_free(struct closure *cl) | |||
1516 | bch_btree_cache_free(c); | 1523 | bch_btree_cache_free(c); |
1517 | bch_journal_free(c); | 1524 | bch_journal_free(c); |
1518 | 1525 | ||
1526 | mutex_lock(&bch_register_lock); | ||
1519 | for_each_cache(ca, c, i) | 1527 | for_each_cache(ca, c, i) |
1520 | if (ca) { | 1528 | if (ca) { |
1521 | ca->set = NULL; | 1529 | ca->set = NULL; |
@@ -1534,7 +1542,6 @@ static void cache_set_free(struct closure *cl) | |||
1534 | mempool_exit(&c->search); | 1542 | mempool_exit(&c->search); |
1535 | kfree(c->devices); | 1543 | kfree(c->devices); |
1536 | 1544 | ||
1537 | mutex_lock(&bch_register_lock); | ||
1538 | list_del(&c->list); | 1545 | list_del(&c->list); |
1539 | mutex_unlock(&bch_register_lock); | 1546 | mutex_unlock(&bch_register_lock); |
1540 | 1547 | ||
@@ -1673,6 +1680,7 @@ static void __cache_set_unregister(struct closure *cl) | |||
1673 | void bch_cache_set_stop(struct cache_set *c) | 1680 | void bch_cache_set_stop(struct cache_set *c) |
1674 | { | 1681 | { |
1675 | if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) | 1682 | if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) |
1683 | /* closure_fn set to __cache_set_unregister() */ | ||
1676 | closure_queue(&c->caching); | 1684 | closure_queue(&c->caching); |
1677 | } | 1685 | } |
1678 | 1686 | ||
@@ -1775,13 +1783,15 @@ err: | |||
1775 | return NULL; | 1783 | return NULL; |
1776 | } | 1784 | } |
1777 | 1785 | ||
1778 | static void run_cache_set(struct cache_set *c) | 1786 | static int run_cache_set(struct cache_set *c) |
1779 | { | 1787 | { |
1780 | const char *err = "cannot allocate memory"; | 1788 | const char *err = "cannot allocate memory"; |
1781 | struct cached_dev *dc, *t; | 1789 | struct cached_dev *dc, *t; |
1782 | struct cache *ca; | 1790 | struct cache *ca; |
1783 | struct closure cl; | 1791 | struct closure cl; |
1784 | unsigned int i; | 1792 | unsigned int i; |
1793 | LIST_HEAD(journal); | ||
1794 | struct journal_replay *l; | ||
1785 | 1795 | ||
1786 | closure_init_stack(&cl); | 1796 | closure_init_stack(&cl); |
1787 | 1797 | ||
@@ -1790,7 +1800,6 @@ static void run_cache_set(struct cache_set *c) | |||
1790 | set_gc_sectors(c); | 1800 | set_gc_sectors(c); |
1791 | 1801 | ||
1792 | if (CACHE_SYNC(&c->sb)) { | 1802 | if (CACHE_SYNC(&c->sb)) { |
1793 | LIST_HEAD(journal); | ||
1794 | struct bkey *k; | 1803 | struct bkey *k; |
1795 | struct jset *j; | 1804 | struct jset *j; |
1796 | 1805 | ||
@@ -1869,7 +1878,9 @@ static void run_cache_set(struct cache_set *c) | |||
1869 | if (j->version < BCACHE_JSET_VERSION_UUID) | 1878 | if (j->version < BCACHE_JSET_VERSION_UUID) |
1870 | __uuid_write(c); | 1879 | __uuid_write(c); |
1871 | 1880 | ||
1872 | bch_journal_replay(c, &journal); | 1881 | err = "bcache: replay journal failed"; |
1882 | if (bch_journal_replay(c, &journal)) | ||
1883 | goto err; | ||
1873 | } else { | 1884 | } else { |
1874 | pr_notice("invalidating existing data"); | 1885 | pr_notice("invalidating existing data"); |
1875 | 1886 | ||
@@ -1937,11 +1948,19 @@ static void run_cache_set(struct cache_set *c) | |||
1937 | flash_devs_run(c); | 1948 | flash_devs_run(c); |
1938 | 1949 | ||
1939 | set_bit(CACHE_SET_RUNNING, &c->flags); | 1950 | set_bit(CACHE_SET_RUNNING, &c->flags); |
1940 | return; | 1951 | return 0; |
1941 | err: | 1952 | err: |
1953 | while (!list_empty(&journal)) { | ||
1954 | l = list_first_entry(&journal, struct journal_replay, list); | ||
1955 | list_del(&l->list); | ||
1956 | kfree(l); | ||
1957 | } | ||
1958 | |||
1942 | closure_sync(&cl); | 1959 | closure_sync(&cl); |
1943 | /* XXX: test this, it's broken */ | 1960 | /* XXX: test this, it's broken */ |
1944 | bch_cache_set_error(c, "%s", err); | 1961 | bch_cache_set_error(c, "%s", err); |
1962 | |||
1963 | return -EIO; | ||
1945 | } | 1964 | } |
1946 | 1965 | ||
1947 | static bool can_attach_cache(struct cache *ca, struct cache_set *c) | 1966 | static bool can_attach_cache(struct cache *ca, struct cache_set *c) |
@@ -2005,8 +2024,11 @@ found: | |||
2005 | ca->set->cache[ca->sb.nr_this_dev] = ca; | 2024 | ca->set->cache[ca->sb.nr_this_dev] = ca; |
2006 | c->cache_by_alloc[c->caches_loaded++] = ca; | 2025 | c->cache_by_alloc[c->caches_loaded++] = ca; |
2007 | 2026 | ||
2008 | if (c->caches_loaded == c->sb.nr_in_set) | 2027 | if (c->caches_loaded == c->sb.nr_in_set) { |
2009 | run_cache_set(c); | 2028 | err = "failed to run cache set"; |
2029 | if (run_cache_set(c) < 0) | ||
2030 | goto err; | ||
2031 | } | ||
2010 | 2032 | ||
2011 | return NULL; | 2033 | return NULL; |
2012 | err: | 2034 | err: |
@@ -2016,6 +2038,7 @@ err: | |||
2016 | 2038 | ||
2017 | /* Cache device */ | 2039 | /* Cache device */ |
2018 | 2040 | ||
2041 | /* When ca->kobj released */ | ||
2019 | void bch_cache_release(struct kobject *kobj) | 2042 | void bch_cache_release(struct kobject *kobj) |
2020 | { | 2043 | { |
2021 | struct cache *ca = container_of(kobj, struct cache, kobj); | 2044 | struct cache *ca = container_of(kobj, struct cache, kobj); |
@@ -2179,6 +2202,12 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, | |||
2179 | 2202 | ||
2180 | ret = cache_alloc(ca); | 2203 | ret = cache_alloc(ca); |
2181 | if (ret != 0) { | 2204 | if (ret != 0) { |
2205 | /* | ||
2206 | * If we failed here, it means ca->kobj is not initialized yet, | ||
2207 | * kobject_put() won't be called and there is no chance to | ||
2208 | * call blkdev_put() to bdev in bch_cache_release(). So we | ||
2209 | * explicitly call blkdev_put() here. | ||
2210 | */ | ||
2182 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | 2211 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
2183 | if (ret == -ENOMEM) | 2212 | if (ret == -ENOMEM) |
2184 | err = "cache_alloc(): -ENOMEM"; | 2213 | err = "cache_alloc(): -ENOMEM"; |
@@ -2262,7 +2291,7 @@ static bool bch_is_open(struct block_device *bdev) | |||
2262 | static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, | 2291 | static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, |
2263 | const char *buffer, size_t size) | 2292 | const char *buffer, size_t size) |
2264 | { | 2293 | { |
2265 | ssize_t ret = size; | 2294 | ssize_t ret = -EINVAL; |
2266 | const char *err = "cannot allocate memory"; | 2295 | const char *err = "cannot allocate memory"; |
2267 | char *path = NULL; | 2296 | char *path = NULL; |
2268 | struct cache_sb *sb = NULL; | 2297 | struct cache_sb *sb = NULL; |
@@ -2296,7 +2325,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, | |||
2296 | if (!IS_ERR(bdev)) | 2325 | if (!IS_ERR(bdev)) |
2297 | bdput(bdev); | 2326 | bdput(bdev); |
2298 | if (attr == &ksysfs_register_quiet) | 2327 | if (attr == &ksysfs_register_quiet) |
2299 | goto out; | 2328 | goto quiet_out; |
2300 | } | 2329 | } |
2301 | goto err; | 2330 | goto err; |
2302 | } | 2331 | } |
@@ -2317,17 +2346,23 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, | |||
2317 | goto err_close; | 2346 | goto err_close; |
2318 | 2347 | ||
2319 | mutex_lock(&bch_register_lock); | 2348 | mutex_lock(&bch_register_lock); |
2320 | register_bdev(sb, sb_page, bdev, dc); | 2349 | ret = register_bdev(sb, sb_page, bdev, dc); |
2321 | mutex_unlock(&bch_register_lock); | 2350 | mutex_unlock(&bch_register_lock); |
2351 | /* blkdev_put() will be called in cached_dev_free() */ | ||
2352 | if (ret < 0) | ||
2353 | goto err; | ||
2322 | } else { | 2354 | } else { |
2323 | struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 2355 | struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
2324 | 2356 | ||
2325 | if (!ca) | 2357 | if (!ca) |
2326 | goto err_close; | 2358 | goto err_close; |
2327 | 2359 | ||
2360 | /* blkdev_put() will be called in bch_cache_release() */ | ||
2328 | if (register_cache(sb, sb_page, bdev, ca) != 0) | 2361 | if (register_cache(sb, sb_page, bdev, ca) != 0) |
2329 | goto err; | 2362 | goto err; |
2330 | } | 2363 | } |
2364 | quiet_out: | ||
2365 | ret = size; | ||
2331 | out: | 2366 | out: |
2332 | if (sb_page) | 2367 | if (sb_page) |
2333 | put_page(sb_page); | 2368 | put_page(sb_page); |
@@ -2340,7 +2375,6 @@ err_close: | |||
2340 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | 2375 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
2341 | err: | 2376 | err: |
2342 | pr_info("error %s: %s", path, err); | 2377 | pr_info("error %s: %s", path, err); |
2343 | ret = -EINVAL; | ||
2344 | goto out; | 2378 | goto out; |
2345 | } | 2379 | } |
2346 | 2380 | ||
@@ -2370,10 +2404,19 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) | |||
2370 | list_for_each_entry_safe(dc, tdc, &uncached_devices, list) | 2404 | list_for_each_entry_safe(dc, tdc, &uncached_devices, list) |
2371 | bcache_device_stop(&dc->disk); | 2405 | bcache_device_stop(&dc->disk); |
2372 | 2406 | ||
2407 | mutex_unlock(&bch_register_lock); | ||
2408 | |||
2409 | /* | ||
2410 | * Give an early chance for other kthreads and | ||
2411 | * kworkers to stop themselves | ||
2412 | */ | ||
2413 | schedule(); | ||
2414 | |||
2373 | /* What's a condition variable? */ | 2415 | /* What's a condition variable? */ |
2374 | while (1) { | 2416 | while (1) { |
2375 | long timeout = start + 2 * HZ - jiffies; | 2417 | long timeout = start + 10 * HZ - jiffies; |
2376 | 2418 | ||
2419 | mutex_lock(&bch_register_lock); | ||
2377 | stopped = list_empty(&bch_cache_sets) && | 2420 | stopped = list_empty(&bch_cache_sets) && |
2378 | list_empty(&uncached_devices); | 2421 | list_empty(&uncached_devices); |
2379 | 2422 | ||
@@ -2385,7 +2428,6 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) | |||
2385 | 2428 | ||
2386 | mutex_unlock(&bch_register_lock); | 2429 | mutex_unlock(&bch_register_lock); |
2387 | schedule_timeout(timeout); | 2430 | schedule_timeout(timeout); |
2388 | mutex_lock(&bch_register_lock); | ||
2389 | } | 2431 | } |
2390 | 2432 | ||
2391 | finish_wait(&unregister_wait, &wait); | 2433 | finish_wait(&unregister_wait, &wait); |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 17bae9c14ca0..6cd44d3cf906 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
@@ -996,8 +996,6 @@ SHOW(__bch_cache) | |||
996 | !cached[n - 1]) | 996 | !cached[n - 1]) |
997 | --n; | 997 | --n; |
998 | 998 | ||
999 | unused = ca->sb.nbuckets - n; | ||
1000 | |||
1001 | while (cached < p + n && | 999 | while (cached < p + n && |
1002 | *cached == BTREE_PRIO) | 1000 | *cached == BTREE_PRIO) |
1003 | cached++, n--; | 1001 | cached++, n--; |
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 00aab6abcfe4..1fbced94e4cc 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h | |||
@@ -560,17 +560,29 @@ static inline uint64_t bch_crc64_update(uint64_t crc, | |||
560 | return crc; | 560 | return crc; |
561 | } | 561 | } |
562 | 562 | ||
563 | /* Does linear interpolation between powers of two */ | 563 | /* |
564 | * A stepwise-linear pseudo-exponential. This returns 1 << (x >> | ||
565 | * frac_bits), with the less-significant bits filled in by linear | ||
566 | * interpolation. | ||
567 | * | ||
568 | * This can also be interpreted as a floating-point number format, | ||
569 | * where the low frac_bits are the mantissa (with implicit leading | ||
570 | * 1 bit), and the more significant bits are the exponent. | ||
571 | * The return value is 1.mantissa * 2^exponent. | ||
572 | * | ||
573 | * The way this is used, fract_bits is 6 and the largest possible | ||
574 | * input is CONGESTED_MAX-1 = 1023 (exponent 16, mantissa 0x1.fc), | ||
575 | * so the maximum output is 0x1fc00. | ||
576 | */ | ||
564 | static inline unsigned int fract_exp_two(unsigned int x, | 577 | static inline unsigned int fract_exp_two(unsigned int x, |
565 | unsigned int fract_bits) | 578 | unsigned int fract_bits) |
566 | { | 579 | { |
567 | unsigned int fract = x & ~(~0 << fract_bits); | 580 | unsigned int mantissa = 1 << fract_bits; /* Implicit bit */ |
568 | |||
569 | x >>= fract_bits; | ||
570 | x = 1 << x; | ||
571 | x += (x * fract) >> fract_bits; | ||
572 | 581 | ||
573 | return x; | 582 | mantissa += x & (mantissa - 1); |
583 | x >>= fract_bits; /* The exponent */ | ||
584 | /* Largest intermediate value 0x7f0000 */ | ||
585 | return mantissa << x >> fract_bits; | ||
574 | } | 586 | } |
575 | 587 | ||
576 | void bch_bio_map(struct bio *bio, void *base); | 588 | void bch_bio_map(struct bio *bio, void *base); |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9faed1c92b52..7f6462f74ac8 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -1442,11 +1442,10 @@ out: | |||
1442 | 1442 | ||
1443 | static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) | 1443 | static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) |
1444 | { | 1444 | { |
1445 | unsigned int i; | ||
1446 | struct bio_vec *bv; | 1445 | struct bio_vec *bv; |
1447 | struct bvec_iter_all iter_all; | 1446 | struct bvec_iter_all iter_all; |
1448 | 1447 | ||
1449 | bio_for_each_segment_all(bv, clone, i, iter_all) { | 1448 | bio_for_each_segment_all(bv, clone, iter_all) { |
1450 | BUG_ON(!bv->bv_page); | 1449 | BUG_ON(!bv->bv_page); |
1451 | mempool_free(bv->bv_page, &cc->page_pool); | 1450 | mempool_free(bv->bv_page, &cc->page_pool); |
1452 | } | 1451 | } |
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 12b5216c2cfe..721efc493942 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h | |||
@@ -135,9 +135,8 @@ struct dm_dev *dm_snap_cow(struct dm_snapshot *snap); | |||
135 | /* | 135 | /* |
136 | * Funtions to manipulate consecutive chunks | 136 | * Funtions to manipulate consecutive chunks |
137 | */ | 137 | */ |
138 | # if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) | 138 | #define DM_CHUNK_CONSECUTIVE_BITS 8 |
139 | # define DM_CHUNK_CONSECUTIVE_BITS 8 | 139 | #define DM_CHUNK_NUMBER_BITS 56 |
140 | # define DM_CHUNK_NUMBER_BITS 56 | ||
141 | 140 | ||
142 | static inline chunk_t dm_chunk_number(chunk_t chunk) | 141 | static inline chunk_t dm_chunk_number(chunk_t chunk) |
143 | { | 142 | { |
@@ -163,29 +162,6 @@ static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e) | |||
163 | e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS); | 162 | e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS); |
164 | } | 163 | } |
165 | 164 | ||
166 | # else | ||
167 | # define DM_CHUNK_CONSECUTIVE_BITS 0 | ||
168 | |||
169 | static inline chunk_t dm_chunk_number(chunk_t chunk) | ||
170 | { | ||
171 | return chunk; | ||
172 | } | ||
173 | |||
174 | static inline unsigned dm_consecutive_chunk_count(struct dm_exception *e) | ||
175 | { | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e) | ||
180 | { | ||
181 | } | ||
182 | |||
183 | static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e) | ||
184 | { | ||
185 | } | ||
186 | |||
187 | # endif | ||
188 | |||
189 | /* | 165 | /* |
190 | * Return the number of sectors in the device. | 166 | * Return the number of sectors in the device. |
191 | */ | 167 | */ |
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 95ae4bf34203..c27c32cf4a30 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c | |||
@@ -88,14 +88,10 @@ struct journal_entry { | |||
88 | 88 | ||
89 | #if BITS_PER_LONG == 64 | 89 | #if BITS_PER_LONG == 64 |
90 | #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0) | 90 | #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0) |
91 | #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) | ||
92 | #elif defined(CONFIG_LBDAF) | ||
93 | #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0) | ||
94 | #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) | ||
95 | #else | 91 | #else |
96 | #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32(0)); } while (0) | 92 | #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0) |
97 | #define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo) | ||
98 | #endif | 93 | #endif |
94 | #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) | ||
99 | #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) | 95 | #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) |
100 | #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0) | 96 | #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0) |
101 | #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) | 97 | #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) |
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 1cd4f991792c..3a62a46b75c7 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c | |||
@@ -490,10 +490,10 @@ void md_bitmap_print_sb(struct bitmap *bitmap) | |||
490 | pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); | 490 | pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); |
491 | pr_debug(" version: %d\n", le32_to_cpu(sb->version)); | 491 | pr_debug(" version: %d\n", le32_to_cpu(sb->version)); |
492 | pr_debug(" uuid: %08x.%08x.%08x.%08x\n", | 492 | pr_debug(" uuid: %08x.%08x.%08x.%08x\n", |
493 | le32_to_cpu(*(__u32 *)(sb->uuid+0)), | 493 | le32_to_cpu(*(__le32 *)(sb->uuid+0)), |
494 | le32_to_cpu(*(__u32 *)(sb->uuid+4)), | 494 | le32_to_cpu(*(__le32 *)(sb->uuid+4)), |
495 | le32_to_cpu(*(__u32 *)(sb->uuid+8)), | 495 | le32_to_cpu(*(__le32 *)(sb->uuid+8)), |
496 | le32_to_cpu(*(__u32 *)(sb->uuid+12))); | 496 | le32_to_cpu(*(__le32 *)(sb->uuid+12))); |
497 | pr_debug(" events: %llu\n", | 497 | pr_debug(" events: %llu\n", |
498 | (unsigned long long) le64_to_cpu(sb->events)); | 498 | (unsigned long long) le64_to_cpu(sb->events)); |
499 | pr_debug("events cleared: %llu\n", | 499 | pr_debug("events cleared: %llu\n", |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 05ffffb8b769..45ffa23fa85d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -88,8 +88,7 @@ static struct kobj_type md_ktype; | |||
88 | 88 | ||
89 | struct md_cluster_operations *md_cluster_ops; | 89 | struct md_cluster_operations *md_cluster_ops; |
90 | EXPORT_SYMBOL(md_cluster_ops); | 90 | EXPORT_SYMBOL(md_cluster_ops); |
91 | struct module *md_cluster_mod; | 91 | static struct module *md_cluster_mod; |
92 | EXPORT_SYMBOL(md_cluster_mod); | ||
93 | 92 | ||
94 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | 93 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); |
95 | static struct workqueue_struct *md_wq; | 94 | static struct workqueue_struct *md_wq; |
@@ -132,24 +131,6 @@ static inline int speed_max(struct mddev *mddev) | |||
132 | mddev->sync_speed_max : sysctl_speed_limit_max; | 131 | mddev->sync_speed_max : sysctl_speed_limit_max; |
133 | } | 132 | } |
134 | 133 | ||
135 | static void * flush_info_alloc(gfp_t gfp_flags, void *data) | ||
136 | { | ||
137 | return kzalloc(sizeof(struct flush_info), gfp_flags); | ||
138 | } | ||
139 | static void flush_info_free(void *flush_info, void *data) | ||
140 | { | ||
141 | kfree(flush_info); | ||
142 | } | ||
143 | |||
144 | static void * flush_bio_alloc(gfp_t gfp_flags, void *data) | ||
145 | { | ||
146 | return kzalloc(sizeof(struct flush_bio), gfp_flags); | ||
147 | } | ||
148 | static void flush_bio_free(void *flush_bio, void *data) | ||
149 | { | ||
150 | kfree(flush_bio); | ||
151 | } | ||
152 | |||
153 | static struct ctl_table_header *raid_table_header; | 134 | static struct ctl_table_header *raid_table_header; |
154 | 135 | ||
155 | static struct ctl_table raid_table[] = { | 136 | static struct ctl_table raid_table[] = { |
@@ -423,54 +404,31 @@ static int md_congested(void *data, int bits) | |||
423 | /* | 404 | /* |
424 | * Generic flush handling for md | 405 | * Generic flush handling for md |
425 | */ | 406 | */ |
426 | static void submit_flushes(struct work_struct *ws) | ||
427 | { | ||
428 | struct flush_info *fi = container_of(ws, struct flush_info, flush_work); | ||
429 | struct mddev *mddev = fi->mddev; | ||
430 | struct bio *bio = fi->bio; | ||
431 | |||
432 | bio->bi_opf &= ~REQ_PREFLUSH; | ||
433 | md_handle_request(mddev, bio); | ||
434 | |||
435 | mempool_free(fi, mddev->flush_pool); | ||
436 | } | ||
437 | 407 | ||
438 | static void md_end_flush(struct bio *fbio) | 408 | static void md_end_flush(struct bio *bio) |
439 | { | 409 | { |
440 | struct flush_bio *fb = fbio->bi_private; | 410 | struct md_rdev *rdev = bio->bi_private; |
441 | struct md_rdev *rdev = fb->rdev; | 411 | struct mddev *mddev = rdev->mddev; |
442 | struct flush_info *fi = fb->fi; | ||
443 | struct bio *bio = fi->bio; | ||
444 | struct mddev *mddev = fi->mddev; | ||
445 | 412 | ||
446 | rdev_dec_pending(rdev, mddev); | 413 | rdev_dec_pending(rdev, mddev); |
447 | 414 | ||
448 | if (atomic_dec_and_test(&fi->flush_pending)) { | 415 | if (atomic_dec_and_test(&mddev->flush_pending)) { |
449 | if (bio->bi_iter.bi_size == 0) { | 416 | /* The pre-request flush has finished */ |
450 | /* an empty barrier - all done */ | 417 | queue_work(md_wq, &mddev->flush_work); |
451 | bio_endio(bio); | ||
452 | mempool_free(fi, mddev->flush_pool); | ||
453 | } else { | ||
454 | INIT_WORK(&fi->flush_work, submit_flushes); | ||
455 | queue_work(md_wq, &fi->flush_work); | ||
456 | } | ||
457 | } | 418 | } |
458 | 419 | bio_put(bio); | |
459 | mempool_free(fb, mddev->flush_bio_pool); | ||
460 | bio_put(fbio); | ||
461 | } | 420 | } |
462 | 421 | ||
463 | void md_flush_request(struct mddev *mddev, struct bio *bio) | 422 | static void md_submit_flush_data(struct work_struct *ws); |
423 | |||
424 | static void submit_flushes(struct work_struct *ws) | ||
464 | { | 425 | { |
426 | struct mddev *mddev = container_of(ws, struct mddev, flush_work); | ||
465 | struct md_rdev *rdev; | 427 | struct md_rdev *rdev; |
466 | struct flush_info *fi; | ||
467 | |||
468 | fi = mempool_alloc(mddev->flush_pool, GFP_NOIO); | ||
469 | |||
470 | fi->bio = bio; | ||
471 | fi->mddev = mddev; | ||
472 | atomic_set(&fi->flush_pending, 1); | ||
473 | 428 | ||
429 | mddev->start_flush = ktime_get_boottime(); | ||
430 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); | ||
431 | atomic_set(&mddev->flush_pending, 1); | ||
474 | rcu_read_lock(); | 432 | rcu_read_lock(); |
475 | rdev_for_each_rcu(rdev, mddev) | 433 | rdev_for_each_rcu(rdev, mddev) |
476 | if (rdev->raid_disk >= 0 && | 434 | if (rdev->raid_disk >= 0 && |
@@ -480,37 +438,74 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) | |||
480 | * we reclaim rcu_read_lock | 438 | * we reclaim rcu_read_lock |
481 | */ | 439 | */ |
482 | struct bio *bi; | 440 | struct bio *bi; |
483 | struct flush_bio *fb; | ||
484 | atomic_inc(&rdev->nr_pending); | 441 | atomic_inc(&rdev->nr_pending); |
485 | atomic_inc(&rdev->nr_pending); | 442 | atomic_inc(&rdev->nr_pending); |
486 | rcu_read_unlock(); | 443 | rcu_read_unlock(); |
487 | |||
488 | fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO); | ||
489 | fb->fi = fi; | ||
490 | fb->rdev = rdev; | ||
491 | |||
492 | bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); | 444 | bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); |
493 | bio_set_dev(bi, rdev->bdev); | ||
494 | bi->bi_end_io = md_end_flush; | 445 | bi->bi_end_io = md_end_flush; |
495 | bi->bi_private = fb; | 446 | bi->bi_private = rdev; |
447 | bio_set_dev(bi, rdev->bdev); | ||
496 | bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; | 448 | bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; |
497 | 449 | atomic_inc(&mddev->flush_pending); | |
498 | atomic_inc(&fi->flush_pending); | ||
499 | submit_bio(bi); | 450 | submit_bio(bi); |
500 | |||
501 | rcu_read_lock(); | 451 | rcu_read_lock(); |
502 | rdev_dec_pending(rdev, mddev); | 452 | rdev_dec_pending(rdev, mddev); |
503 | } | 453 | } |
504 | rcu_read_unlock(); | 454 | rcu_read_unlock(); |
455 | if (atomic_dec_and_test(&mddev->flush_pending)) | ||
456 | queue_work(md_wq, &mddev->flush_work); | ||
457 | } | ||
458 | |||
459 | static void md_submit_flush_data(struct work_struct *ws) | ||
460 | { | ||
461 | struct mddev *mddev = container_of(ws, struct mddev, flush_work); | ||
462 | struct bio *bio = mddev->flush_bio; | ||
463 | |||
464 | /* | ||
465 | * must reset flush_bio before calling into md_handle_request to avoid a | ||
466 | * deadlock, because other bios passed md_handle_request suspend check | ||
467 | * could wait for this and below md_handle_request could wait for those | ||
468 | * bios because of suspend check | ||
469 | */ | ||
470 | mddev->last_flush = mddev->start_flush; | ||
471 | mddev->flush_bio = NULL; | ||
472 | wake_up(&mddev->sb_wait); | ||
473 | |||
474 | if (bio->bi_iter.bi_size == 0) { | ||
475 | /* an empty barrier - all done */ | ||
476 | bio_endio(bio); | ||
477 | } else { | ||
478 | bio->bi_opf &= ~REQ_PREFLUSH; | ||
479 | md_handle_request(mddev, bio); | ||
480 | } | ||
481 | } | ||
505 | 482 | ||
506 | if (atomic_dec_and_test(&fi->flush_pending)) { | 483 | void md_flush_request(struct mddev *mddev, struct bio *bio) |
507 | if (bio->bi_iter.bi_size == 0) { | 484 | { |
485 | ktime_t start = ktime_get_boottime(); | ||
486 | spin_lock_irq(&mddev->lock); | ||
487 | wait_event_lock_irq(mddev->sb_wait, | ||
488 | !mddev->flush_bio || | ||
489 | ktime_after(mddev->last_flush, start), | ||
490 | mddev->lock); | ||
491 | if (!ktime_after(mddev->last_flush, start)) { | ||
492 | WARN_ON(mddev->flush_bio); | ||
493 | mddev->flush_bio = bio; | ||
494 | bio = NULL; | ||
495 | } | ||
496 | spin_unlock_irq(&mddev->lock); | ||
497 | |||
498 | if (!bio) { | ||
499 | INIT_WORK(&mddev->flush_work, submit_flushes); | ||
500 | queue_work(md_wq, &mddev->flush_work); | ||
501 | } else { | ||
502 | /* flush was performed for some other bio while we waited. */ | ||
503 | if (bio->bi_iter.bi_size == 0) | ||
508 | /* an empty barrier - all done */ | 504 | /* an empty barrier - all done */ |
509 | bio_endio(bio); | 505 | bio_endio(bio); |
510 | mempool_free(fi, mddev->flush_pool); | 506 | else { |
511 | } else { | 507 | bio->bi_opf &= ~REQ_PREFLUSH; |
512 | INIT_WORK(&fi->flush_work, submit_flushes); | 508 | mddev->pers->make_request(mddev, bio); |
513 | queue_work(md_wq, &fi->flush_work); | ||
514 | } | 509 | } |
515 | } | 510 | } |
516 | } | 511 | } |
@@ -560,6 +555,7 @@ void mddev_init(struct mddev *mddev) | |||
560 | atomic_set(&mddev->openers, 0); | 555 | atomic_set(&mddev->openers, 0); |
561 | atomic_set(&mddev->active_io, 0); | 556 | atomic_set(&mddev->active_io, 0); |
562 | spin_lock_init(&mddev->lock); | 557 | spin_lock_init(&mddev->lock); |
558 | atomic_set(&mddev->flush_pending, 0); | ||
563 | init_waitqueue_head(&mddev->sb_wait); | 559 | init_waitqueue_head(&mddev->sb_wait); |
564 | init_waitqueue_head(&mddev->recovery_wait); | 560 | init_waitqueue_head(&mddev->recovery_wait); |
565 | mddev->reshape_position = MaxSector; | 561 | mddev->reshape_position = MaxSector; |
@@ -1109,8 +1105,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor | |||
1109 | * (not needed for Linear and RAID0 as metadata doesn't | 1105 | * (not needed for Linear and RAID0 as metadata doesn't |
1110 | * record this size) | 1106 | * record this size) |
1111 | */ | 1107 | */ |
1112 | if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) && | 1108 | if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1) |
1113 | sb->level >= 1) | ||
1114 | rdev->sectors = (sector_t)(2ULL << 32) - 2; | 1109 | rdev->sectors = (sector_t)(2ULL << 32) - 2; |
1115 | 1110 | ||
1116 | if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) | 1111 | if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) |
@@ -1408,8 +1403,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) | |||
1408 | /* Limit to 4TB as metadata cannot record more than that. | 1403 | /* Limit to 4TB as metadata cannot record more than that. |
1409 | * 4TB == 2^32 KB, or 2*2^32 sectors. | 1404 | * 4TB == 2^32 KB, or 2*2^32 sectors. |
1410 | */ | 1405 | */ |
1411 | if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) && | 1406 | if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1) |
1412 | rdev->mddev->level >= 1) | ||
1413 | num_sectors = (sector_t)(2ULL << 32) - 2; | 1407 | num_sectors = (sector_t)(2ULL << 32) - 2; |
1414 | do { | 1408 | do { |
1415 | md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, | 1409 | md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, |
@@ -1553,7 +1547,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1553 | */ | 1547 | */ |
1554 | s32 offset; | 1548 | s32 offset; |
1555 | sector_t bb_sector; | 1549 | sector_t bb_sector; |
1556 | u64 *bbp; | 1550 | __le64 *bbp; |
1557 | int i; | 1551 | int i; |
1558 | int sectors = le16_to_cpu(sb->bblog_size); | 1552 | int sectors = le16_to_cpu(sb->bblog_size); |
1559 | if (sectors > (PAGE_SIZE / 512)) | 1553 | if (sectors > (PAGE_SIZE / 512)) |
@@ -1565,7 +1559,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1565 | if (!sync_page_io(rdev, bb_sector, sectors << 9, | 1559 | if (!sync_page_io(rdev, bb_sector, sectors << 9, |
1566 | rdev->bb_page, REQ_OP_READ, 0, true)) | 1560 | rdev->bb_page, REQ_OP_READ, 0, true)) |
1567 | return -EIO; | 1561 | return -EIO; |
1568 | bbp = (u64 *)page_address(rdev->bb_page); | 1562 | bbp = (__le64 *)page_address(rdev->bb_page); |
1569 | rdev->badblocks.shift = sb->bblog_shift; | 1563 | rdev->badblocks.shift = sb->bblog_shift; |
1570 | for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) { | 1564 | for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) { |
1571 | u64 bb = le64_to_cpu(*bbp); | 1565 | u64 bb = le64_to_cpu(*bbp); |
@@ -1877,7 +1871,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1877 | md_error(mddev, rdev); | 1871 | md_error(mddev, rdev); |
1878 | else { | 1872 | else { |
1879 | struct badblocks *bb = &rdev->badblocks; | 1873 | struct badblocks *bb = &rdev->badblocks; |
1880 | u64 *bbp = (u64 *)page_address(rdev->bb_page); | 1874 | __le64 *bbp = (__le64 *)page_address(rdev->bb_page); |
1881 | u64 *p = bb->page; | 1875 | u64 *p = bb->page; |
1882 | sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS); | 1876 | sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS); |
1883 | if (bb->changed) { | 1877 | if (bb->changed) { |
@@ -2855,8 +2849,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2855 | err = 0; | 2849 | err = 0; |
2856 | } | 2850 | } |
2857 | } else if (cmd_match(buf, "re-add")) { | 2851 | } else if (cmd_match(buf, "re-add")) { |
2858 | if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) && | 2852 | if (!rdev->mddev->pers) |
2859 | rdev->saved_raid_disk >= 0) { | 2853 | err = -EINVAL; |
2854 | else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) && | ||
2855 | rdev->saved_raid_disk >= 0) { | ||
2860 | /* clear_bit is performed _after_ all the devices | 2856 | /* clear_bit is performed _after_ all the devices |
2861 | * have their local Faulty bit cleared. If any writes | 2857 | * have their local Faulty bit cleared. If any writes |
2862 | * happen in the meantime in the local node, they | 2858 | * happen in the meantime in the local node, they |
@@ -3384,10 +3380,10 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, | |||
3384 | return -EIO; | 3380 | return -EIO; |
3385 | if (!capable(CAP_SYS_ADMIN)) | 3381 | if (!capable(CAP_SYS_ADMIN)) |
3386 | return -EACCES; | 3382 | return -EACCES; |
3387 | rv = mddev ? mddev_lock(mddev): -EBUSY; | 3383 | rv = mddev ? mddev_lock(mddev) : -ENODEV; |
3388 | if (!rv) { | 3384 | if (!rv) { |
3389 | if (rdev->mddev == NULL) | 3385 | if (rdev->mddev == NULL) |
3390 | rv = -EBUSY; | 3386 | rv = -ENODEV; |
3391 | else | 3387 | else |
3392 | rv = entry->store(rdev, page, length); | 3388 | rv = entry->store(rdev, page, length); |
3393 | mddev_unlock(mddev); | 3389 | mddev_unlock(mddev); |
@@ -5511,22 +5507,6 @@ int md_run(struct mddev *mddev) | |||
5511 | if (err) | 5507 | if (err) |
5512 | return err; | 5508 | return err; |
5513 | } | 5509 | } |
5514 | if (mddev->flush_pool == NULL) { | ||
5515 | mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc, | ||
5516 | flush_info_free, mddev); | ||
5517 | if (!mddev->flush_pool) { | ||
5518 | err = -ENOMEM; | ||
5519 | goto abort; | ||
5520 | } | ||
5521 | } | ||
5522 | if (mddev->flush_bio_pool == NULL) { | ||
5523 | mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc, | ||
5524 | flush_bio_free, mddev); | ||
5525 | if (!mddev->flush_bio_pool) { | ||
5526 | err = -ENOMEM; | ||
5527 | goto abort; | ||
5528 | } | ||
5529 | } | ||
5530 | 5510 | ||
5531 | spin_lock(&pers_lock); | 5511 | spin_lock(&pers_lock); |
5532 | pers = find_pers(mddev->level, mddev->clevel); | 5512 | pers = find_pers(mddev->level, mddev->clevel); |
@@ -5686,11 +5666,8 @@ int md_run(struct mddev *mddev) | |||
5686 | return 0; | 5666 | return 0; |
5687 | 5667 | ||
5688 | abort: | 5668 | abort: |
5689 | mempool_destroy(mddev->flush_bio_pool); | 5669 | bioset_exit(&mddev->bio_set); |
5690 | mddev->flush_bio_pool = NULL; | 5670 | bioset_exit(&mddev->sync_set); |
5691 | mempool_destroy(mddev->flush_pool); | ||
5692 | mddev->flush_pool = NULL; | ||
5693 | |||
5694 | return err; | 5671 | return err; |
5695 | } | 5672 | } |
5696 | EXPORT_SYMBOL_GPL(md_run); | 5673 | EXPORT_SYMBOL_GPL(md_run); |
@@ -5894,14 +5871,6 @@ static void __md_stop(struct mddev *mddev) | |||
5894 | mddev->to_remove = &md_redundancy_group; | 5871 | mddev->to_remove = &md_redundancy_group; |
5895 | module_put(pers->owner); | 5872 | module_put(pers->owner); |
5896 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 5873 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
5897 | if (mddev->flush_bio_pool) { | ||
5898 | mempool_destroy(mddev->flush_bio_pool); | ||
5899 | mddev->flush_bio_pool = NULL; | ||
5900 | } | ||
5901 | if (mddev->flush_pool) { | ||
5902 | mempool_destroy(mddev->flush_pool); | ||
5903 | mddev->flush_pool = NULL; | ||
5904 | } | ||
5905 | } | 5874 | } |
5906 | 5875 | ||
5907 | void md_stop(struct mddev *mddev) | 5876 | void md_stop(struct mddev *mddev) |
@@ -9257,7 +9226,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) | |||
9257 | * reshape is happening in the remote node, we need to | 9226 | * reshape is happening in the remote node, we need to |
9258 | * update reshape_position and call start_reshape. | 9227 | * update reshape_position and call start_reshape. |
9259 | */ | 9228 | */ |
9260 | mddev->reshape_position = sb->reshape_position; | 9229 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); |
9261 | if (mddev->pers->update_reshape_pos) | 9230 | if (mddev->pers->update_reshape_pos) |
9262 | mddev->pers->update_reshape_pos(mddev); | 9231 | mddev->pers->update_reshape_pos(mddev); |
9263 | if (mddev->pers->start_reshape) | 9232 | if (mddev->pers->start_reshape) |
diff --git a/drivers/md/md.h b/drivers/md/md.h index c52afb52c776..257cb4c9e22b 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -252,19 +252,6 @@ enum mddev_sb_flags { | |||
252 | MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ | 252 | MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ |
253 | }; | 253 | }; |
254 | 254 | ||
255 | #define NR_FLUSH_INFOS 8 | ||
256 | #define NR_FLUSH_BIOS 64 | ||
257 | struct flush_info { | ||
258 | struct bio *bio; | ||
259 | struct mddev *mddev; | ||
260 | struct work_struct flush_work; | ||
261 | atomic_t flush_pending; | ||
262 | }; | ||
263 | struct flush_bio { | ||
264 | struct flush_info *fi; | ||
265 | struct md_rdev *rdev; | ||
266 | }; | ||
267 | |||
268 | struct mddev { | 255 | struct mddev { |
269 | void *private; | 256 | void *private; |
270 | struct md_personality *pers; | 257 | struct md_personality *pers; |
@@ -470,8 +457,16 @@ struct mddev { | |||
470 | * metadata and bitmap writes | 457 | * metadata and bitmap writes |
471 | */ | 458 | */ |
472 | 459 | ||
473 | mempool_t *flush_pool; | 460 | /* Generic flush handling. |
474 | mempool_t *flush_bio_pool; | 461 | * The last to finish preflush schedules a worker to submit |
462 | * the rest of the request (without the REQ_PREFLUSH flag). | ||
463 | */ | ||
464 | struct bio *flush_bio; | ||
465 | atomic_t flush_pending; | ||
466 | ktime_t start_flush, last_flush; /* last_flush is when the last completed | ||
467 | * flush was started. | ||
468 | */ | ||
469 | struct work_struct flush_work; | ||
475 | struct work_struct event_work; /* used by dm to report failure event */ | 470 | struct work_struct event_work; /* used by dm to report failure event */ |
476 | void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); | 471 | void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); |
477 | struct md_cluster_info *cluster_info; | 472 | struct md_cluster_info *cluster_info; |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fdf451aac369..0c8a098d220e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -2110,7 +2110,7 @@ static void process_checks(struct r1bio *r1_bio) | |||
2110 | } | 2110 | } |
2111 | r1_bio->read_disk = primary; | 2111 | r1_bio->read_disk = primary; |
2112 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2112 | for (i = 0; i < conf->raid_disks * 2; i++) { |
2113 | int j; | 2113 | int j = 0; |
2114 | struct bio *pbio = r1_bio->bios[primary]; | 2114 | struct bio *pbio = r1_bio->bios[primary]; |
2115 | struct bio *sbio = r1_bio->bios[i]; | 2115 | struct bio *sbio = r1_bio->bios[i]; |
2116 | blk_status_t status = sbio->bi_status; | 2116 | blk_status_t status = sbio->bi_status; |
@@ -2125,8 +2125,8 @@ static void process_checks(struct r1bio *r1_bio) | |||
2125 | /* Now we can 'fixup' the error value */ | 2125 | /* Now we can 'fixup' the error value */ |
2126 | sbio->bi_status = 0; | 2126 | sbio->bi_status = 0; |
2127 | 2127 | ||
2128 | bio_for_each_segment_all(bi, sbio, j, iter_all) | 2128 | bio_for_each_segment_all(bi, sbio, iter_all) |
2129 | page_len[j] = bi->bv_len; | 2129 | page_len[j++] = bi->bv_len; |
2130 | 2130 | ||
2131 | if (!status) { | 2131 | if (!status) { |
2132 | for (j = vcnt; j-- ; ) { | 2132 | for (j = vcnt; j-- ; ) { |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c033bfcb209e..7fde645d2e90 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -711,6 +711,8 @@ static bool is_full_stripe_write(struct stripe_head *sh) | |||
711 | } | 711 | } |
712 | 712 | ||
713 | static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) | 713 | static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) |
714 | __acquires(&sh1->stripe_lock) | ||
715 | __acquires(&sh2->stripe_lock) | ||
714 | { | 716 | { |
715 | if (sh1 > sh2) { | 717 | if (sh1 > sh2) { |
716 | spin_lock_irq(&sh2->stripe_lock); | 718 | spin_lock_irq(&sh2->stripe_lock); |
@@ -722,6 +724,8 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) | |||
722 | } | 724 | } |
723 | 725 | ||
724 | static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) | 726 | static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) |
727 | __releases(&sh1->stripe_lock) | ||
728 | __releases(&sh2->stripe_lock) | ||
725 | { | 729 | { |
726 | spin_unlock(&sh1->stripe_lock); | 730 | spin_unlock(&sh1->stripe_lock); |
727 | spin_unlock_irq(&sh2->stripe_lock); | 731 | spin_unlock_irq(&sh2->stripe_lock); |
@@ -4187,7 +4191,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, | |||
4187 | /* now write out any block on a failed drive, | 4191 | /* now write out any block on a failed drive, |
4188 | * or P or Q if they were recomputed | 4192 | * or P or Q if they were recomputed |
4189 | */ | 4193 | */ |
4190 | BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ | 4194 | dev = NULL; |
4191 | if (s->failed == 2) { | 4195 | if (s->failed == 2) { |
4192 | dev = &sh->dev[s->failed_num[1]]; | 4196 | dev = &sh->dev[s->failed_num[1]]; |
4193 | s->locked++; | 4197 | s->locked++; |
@@ -4212,6 +4216,14 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, | |||
4212 | set_bit(R5_LOCKED, &dev->flags); | 4216 | set_bit(R5_LOCKED, &dev->flags); |
4213 | set_bit(R5_Wantwrite, &dev->flags); | 4217 | set_bit(R5_Wantwrite, &dev->flags); |
4214 | } | 4218 | } |
4219 | if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags), | ||
4220 | "%s: disk%td not up to date\n", | ||
4221 | mdname(conf->mddev), | ||
4222 | dev - (struct r5dev *) &sh->dev)) { | ||
4223 | clear_bit(R5_LOCKED, &dev->flags); | ||
4224 | clear_bit(R5_Wantwrite, &dev->flags); | ||
4225 | s->locked--; | ||
4226 | } | ||
4215 | clear_bit(STRIPE_DEGRADED, &sh->state); | 4227 | clear_bit(STRIPE_DEGRADED, &sh->state); |
4216 | 4228 | ||
4217 | set_bit(STRIPE_INSYNC, &sh->state); | 4229 | set_bit(STRIPE_INSYNC, &sh->state); |
@@ -6166,6 +6178,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, | |||
6166 | static int handle_active_stripes(struct r5conf *conf, int group, | 6178 | static int handle_active_stripes(struct r5conf *conf, int group, |
6167 | struct r5worker *worker, | 6179 | struct r5worker *worker, |
6168 | struct list_head *temp_inactive_list) | 6180 | struct list_head *temp_inactive_list) |
6181 | __releases(&conf->device_lock) | ||
6182 | __acquires(&conf->device_lock) | ||
6169 | { | 6183 | { |
6170 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; | 6184 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; |
6171 | int i, batch_size = 0, hash; | 6185 | int i, batch_size = 0, hash; |
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index d271bd731af7..01f40672507f 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c | |||
@@ -391,7 +391,7 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) | |||
391 | bb_present = badblocks_check(&nd_region->bb, meta_start, | 391 | bb_present = badblocks_check(&nd_region->bb, meta_start, |
392 | meta_num, &first_bad, &num_bad); | 392 | meta_num, &first_bad, &num_bad); |
393 | if (bb_present) { | 393 | if (bb_present) { |
394 | dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %lx\n", | 394 | dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n", |
395 | num_bad, first_bad); | 395 | num_bad, first_bad); |
396 | nsoff = ALIGN_DOWN((nd_region->ndr_start | 396 | nsoff = ALIGN_DOWN((nd_region->ndr_start |
397 | + (first_bad << 9)) - nsio->res.start, | 397 | + (first_bad << 9)) - nsio->res.start, |
@@ -410,7 +410,7 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) | |||
410 | } | 410 | } |
411 | if (rc) { | 411 | if (rc) { |
412 | dev_err(&nd_pfn->dev, | 412 | dev_err(&nd_pfn->dev, |
413 | "error clearing %x badblocks at %lx\n", | 413 | "error clearing %x badblocks at %llx\n", |
414 | num_bad, first_bad); | 414 | num_bad, first_bad); |
415 | return rc; | 415 | return rc; |
416 | } | 416 | } |
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6265d9225ec8..a6644a2c3ef7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c | |||
@@ -1105,7 +1105,7 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, | |||
1105 | 1105 | ||
1106 | error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); | 1106 | error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); |
1107 | if (error) { | 1107 | if (error) { |
1108 | dev_warn(ctrl->device, "Identify namespace failed\n"); | 1108 | dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); |
1109 | kfree(id); | 1109 | kfree(id); |
1110 | return NULL; | 1110 | return NULL; |
1111 | } | 1111 | } |
@@ -1588,9 +1588,13 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) | |||
1588 | static void nvme_update_disk_info(struct gendisk *disk, | 1588 | static void nvme_update_disk_info(struct gendisk *disk, |
1589 | struct nvme_ns *ns, struct nvme_id_ns *id) | 1589 | struct nvme_ns *ns, struct nvme_id_ns *id) |
1590 | { | 1590 | { |
1591 | sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); | 1591 | sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); |
1592 | unsigned short bs = 1 << ns->lba_shift; | 1592 | unsigned short bs = 1 << ns->lba_shift; |
1593 | 1593 | ||
1594 | if (ns->lba_shift > PAGE_SHIFT) { | ||
1595 | /* unsupported block size, set capacity to 0 later */ | ||
1596 | bs = (1 << 9); | ||
1597 | } | ||
1594 | blk_mq_freeze_queue(disk->queue); | 1598 | blk_mq_freeze_queue(disk->queue); |
1595 | blk_integrity_unregister(disk); | 1599 | blk_integrity_unregister(disk); |
1596 | 1600 | ||
@@ -1601,7 +1605,8 @@ static void nvme_update_disk_info(struct gendisk *disk, | |||
1601 | if (ns->ms && !ns->ext && | 1605 | if (ns->ms && !ns->ext && |
1602 | (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) | 1606 | (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) |
1603 | nvme_init_integrity(disk, ns->ms, ns->pi_type); | 1607 | nvme_init_integrity(disk, ns->ms, ns->pi_type); |
1604 | if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) | 1608 | if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || |
1609 | ns->lba_shift > PAGE_SHIFT) | ||
1605 | capacity = 0; | 1610 | capacity = 0; |
1606 | 1611 | ||
1607 | set_capacity(disk, capacity); | 1612 | set_capacity(disk, capacity); |
@@ -2549,7 +2554,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) | |||
2549 | ctrl->crdt[2] = le16_to_cpu(id->crdt3); | 2554 | ctrl->crdt[2] = le16_to_cpu(id->crdt3); |
2550 | 2555 | ||
2551 | ctrl->oacs = le16_to_cpu(id->oacs); | 2556 | ctrl->oacs = le16_to_cpu(id->oacs); |
2552 | ctrl->oncs = le16_to_cpup(&id->oncs); | 2557 | ctrl->oncs = le16_to_cpu(id->oncs); |
2553 | ctrl->oaes = le32_to_cpu(id->oaes); | 2558 | ctrl->oaes = le32_to_cpu(id->oaes); |
2554 | atomic_set(&ctrl->abort_limit, id->acl + 1); | 2559 | atomic_set(&ctrl->abort_limit, id->acl + 1); |
2555 | ctrl->vwc = id->vwc; | 2560 | ctrl->vwc = id->vwc; |
@@ -3874,10 +3879,37 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) | |||
3874 | } | 3879 | } |
3875 | EXPORT_SYMBOL_GPL(nvme_start_queues); | 3880 | EXPORT_SYMBOL_GPL(nvme_start_queues); |
3876 | 3881 | ||
3877 | int __init nvme_core_init(void) | 3882 | /* |
3883 | * Check we didn't inadvertently grow the command structure sizes: | ||
3884 | */ | ||
3885 | static inline void _nvme_check_size(void) | ||
3886 | { | ||
3887 | BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); | ||
3888 | BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); | ||
3889 | BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); | ||
3890 | BUILD_BUG_ON(sizeof(struct nvme_features) != 64); | ||
3891 | BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); | ||
3892 | BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); | ||
3893 | BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); | ||
3894 | BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); | ||
3895 | BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); | ||
3896 | BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); | ||
3897 | BUILD_BUG_ON(sizeof(struct nvme_command) != 64); | ||
3898 | BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); | ||
3899 | BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); | ||
3900 | BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); | ||
3901 | BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); | ||
3902 | BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); | ||
3903 | BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); | ||
3904 | } | ||
3905 | |||
3906 | |||
3907 | static int __init nvme_core_init(void) | ||
3878 | { | 3908 | { |
3879 | int result = -ENOMEM; | 3909 | int result = -ENOMEM; |
3880 | 3910 | ||
3911 | _nvme_check_size(); | ||
3912 | |||
3881 | nvme_wq = alloc_workqueue("nvme-wq", | 3913 | nvme_wq = alloc_workqueue("nvme-wq", |
3882 | WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); | 3914 | WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); |
3883 | if (!nvme_wq) | 3915 | if (!nvme_wq) |
@@ -3924,7 +3956,7 @@ out: | |||
3924 | return result; | 3956 | return result; |
3925 | } | 3957 | } |
3926 | 3958 | ||
3927 | void __exit nvme_core_exit(void) | 3959 | static void __exit nvme_core_exit(void) |
3928 | { | 3960 | { |
3929 | ida_destroy(&nvme_subsystems_ida); | 3961 | ida_destroy(&nvme_subsystems_ida); |
3930 | class_destroy(nvme_subsys_class); | 3962 | class_destroy(nvme_subsys_class); |
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index d4cb826f58ff..592d1e61ef7e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c | |||
@@ -1188,6 +1188,7 @@ static void __exit nvmf_exit(void) | |||
1188 | class_destroy(nvmf_class); | 1188 | class_destroy(nvmf_class); |
1189 | nvmf_host_put(nvmf_default_host); | 1189 | nvmf_host_put(nvmf_default_host); |
1190 | 1190 | ||
1191 | BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); | ||
1191 | BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64); | 1192 | BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64); |
1192 | BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64); | 1193 | BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64); |
1193 | BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64); | 1194 | BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64); |
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f0716f6ce41f..5c9429d41120 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c | |||
@@ -232,6 +232,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, | |||
232 | blk_qc_t ret = BLK_QC_T_NONE; | 232 | blk_qc_t ret = BLK_QC_T_NONE; |
233 | int srcu_idx; | 233 | int srcu_idx; |
234 | 234 | ||
235 | /* | ||
236 | * The namespace might be going away and the bio might | ||
237 | * be moved to a different queue via blk_steal_bios(), | ||
238 | * so we need to use the bio_split pool from the original | ||
239 | * queue to allocate the bvecs from. | ||
240 | */ | ||
241 | blk_queue_split(q, &bio); | ||
242 | |||
235 | srcu_idx = srcu_read_lock(&head->srcu); | 243 | srcu_idx = srcu_read_lock(&head->srcu); |
236 | ns = nvme_find_path(head); | 244 | ns = nvme_find_path(head); |
237 | if (likely(ns)) { | 245 | if (likely(ns)) { |
@@ -421,7 +429,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, | |||
421 | unsigned *nr_change_groups = data; | 429 | unsigned *nr_change_groups = data; |
422 | struct nvme_ns *ns; | 430 | struct nvme_ns *ns; |
423 | 431 | ||
424 | dev_info(ctrl->device, "ANA group %d: %s.\n", | 432 | dev_dbg(ctrl->device, "ANA group %d: %s.\n", |
425 | le32_to_cpu(desc->grpid), | 433 | le32_to_cpu(desc->grpid), |
426 | nvme_ana_state_names[desc->state]); | 434 | nvme_ana_state_names[desc->state]); |
427 | 435 | ||
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 527d64545023..5ee75b5ff83f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -577,7 +577,4 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) | |||
577 | return dev_to_disk(dev)->private_data; | 577 | return dev_to_disk(dev)->private_data; |
578 | } | 578 | } |
579 | 579 | ||
580 | int __init nvme_core_init(void); | ||
581 | void __exit nvme_core_exit(void); | ||
582 | |||
583 | #endif /* _NVME_H */ | 580 | #endif /* _NVME_H */ |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a90cf5d63aac..3e4fb891a95a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -146,7 +146,7 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) | |||
146 | 146 | ||
147 | static int queue_count_set(const char *val, const struct kernel_param *kp) | 147 | static int queue_count_set(const char *val, const struct kernel_param *kp) |
148 | { | 148 | { |
149 | int n = 0, ret; | 149 | int n, ret; |
150 | 150 | ||
151 | ret = kstrtoint(val, 10, &n); | 151 | ret = kstrtoint(val, 10, &n); |
152 | if (ret) | 152 | if (ret) |
@@ -177,7 +177,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) | |||
177 | * commands and one for I/O commands). | 177 | * commands and one for I/O commands). |
178 | */ | 178 | */ |
179 | struct nvme_queue { | 179 | struct nvme_queue { |
180 | struct device *q_dmadev; | ||
181 | struct nvme_dev *dev; | 180 | struct nvme_dev *dev; |
182 | spinlock_t sq_lock; | 181 | spinlock_t sq_lock; |
183 | struct nvme_command *sq_cmds; | 182 | struct nvme_command *sq_cmds; |
@@ -189,7 +188,7 @@ struct nvme_queue { | |||
189 | dma_addr_t cq_dma_addr; | 188 | dma_addr_t cq_dma_addr; |
190 | u32 __iomem *q_db; | 189 | u32 __iomem *q_db; |
191 | u16 q_depth; | 190 | u16 q_depth; |
192 | s16 cq_vector; | 191 | u16 cq_vector; |
193 | u16 sq_tail; | 192 | u16 sq_tail; |
194 | u16 last_sq_tail; | 193 | u16 last_sq_tail; |
195 | u16 cq_head; | 194 | u16 cq_head; |
@@ -200,6 +199,7 @@ struct nvme_queue { | |||
200 | #define NVMEQ_ENABLED 0 | 199 | #define NVMEQ_ENABLED 0 |
201 | #define NVMEQ_SQ_CMB 1 | 200 | #define NVMEQ_SQ_CMB 1 |
202 | #define NVMEQ_DELETE_ERROR 2 | 201 | #define NVMEQ_DELETE_ERROR 2 |
202 | #define NVMEQ_POLLED 3 | ||
203 | u32 *dbbuf_sq_db; | 203 | u32 *dbbuf_sq_db; |
204 | u32 *dbbuf_cq_db; | 204 | u32 *dbbuf_cq_db; |
205 | u32 *dbbuf_sq_ei; | 205 | u32 *dbbuf_sq_ei; |
@@ -208,10 +208,10 @@ struct nvme_queue { | |||
208 | }; | 208 | }; |
209 | 209 | ||
210 | /* | 210 | /* |
211 | * The nvme_iod describes the data in an I/O, including the list of PRP | 211 | * The nvme_iod describes the data in an I/O. |
212 | * entries. You can't see it in this data structure because C doesn't let | 212 | * |
213 | * me express that. Use nvme_init_iod to ensure there's enough space | 213 | * The sg pointer contains the list of PRP/SGL chunk allocations in addition |
214 | * allocated to store the PRP list. | 214 | * to the actual struct scatterlist. |
215 | */ | 215 | */ |
216 | struct nvme_iod { | 216 | struct nvme_iod { |
217 | struct nvme_request req; | 217 | struct nvme_request req; |
@@ -220,33 +220,12 @@ struct nvme_iod { | |||
220 | int aborted; | 220 | int aborted; |
221 | int npages; /* In the PRP list. 0 means small pool in use */ | 221 | int npages; /* In the PRP list. 0 means small pool in use */ |
222 | int nents; /* Used in scatterlist */ | 222 | int nents; /* Used in scatterlist */ |
223 | int length; /* Of data, in bytes */ | ||
224 | dma_addr_t first_dma; | 223 | dma_addr_t first_dma; |
225 | struct scatterlist meta_sg; /* metadata requires single contiguous buffer */ | 224 | unsigned int dma_len; /* length of single DMA segment mapping */ |
225 | dma_addr_t meta_dma; | ||
226 | struct scatterlist *sg; | 226 | struct scatterlist *sg; |
227 | struct scatterlist inline_sg[0]; | ||
228 | }; | 227 | }; |
229 | 228 | ||
230 | /* | ||
231 | * Check we didin't inadvertently grow the command struct | ||
232 | */ | ||
233 | static inline void _nvme_check_size(void) | ||
234 | { | ||
235 | BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); | ||
236 | BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); | ||
237 | BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); | ||
238 | BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); | ||
239 | BUILD_BUG_ON(sizeof(struct nvme_features) != 64); | ||
240 | BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); | ||
241 | BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); | ||
242 | BUILD_BUG_ON(sizeof(struct nvme_command) != 64); | ||
243 | BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); | ||
244 | BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); | ||
245 | BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); | ||
246 | BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); | ||
247 | BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); | ||
248 | } | ||
249 | |||
250 | static unsigned int max_io_queues(void) | 229 | static unsigned int max_io_queues(void) |
251 | { | 230 | { |
252 | return num_possible_cpus() + write_queues + poll_queues; | 231 | return num_possible_cpus() + write_queues + poll_queues; |
@@ -372,12 +351,6 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, | |||
372 | } | 351 | } |
373 | 352 | ||
374 | /* | 353 | /* |
375 | * Max size of iod being embedded in the request payload | ||
376 | */ | ||
377 | #define NVME_INT_PAGES 2 | ||
378 | #define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->ctrl.page_size) | ||
379 | |||
380 | /* | ||
381 | * Will slightly overestimate the number of pages needed. This is OK | 354 | * Will slightly overestimate the number of pages needed. This is OK |
382 | * as it only leads to a small amount of wasted memory for the lifetime of | 355 | * as it only leads to a small amount of wasted memory for the lifetime of |
383 | * the I/O. | 356 | * the I/O. |
@@ -411,15 +384,6 @@ static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, | |||
411 | return alloc_size + sizeof(struct scatterlist) * nseg; | 384 | return alloc_size + sizeof(struct scatterlist) * nseg; |
412 | } | 385 | } |
413 | 386 | ||
414 | static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl) | ||
415 | { | ||
416 | unsigned int alloc_size = nvme_pci_iod_alloc_size(dev, | ||
417 | NVME_INT_BYTES(dev), NVME_INT_PAGES, | ||
418 | use_sgl); | ||
419 | |||
420 | return sizeof(struct nvme_iod) + alloc_size; | ||
421 | } | ||
422 | |||
423 | static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | 387 | static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
424 | unsigned int hctx_idx) | 388 | unsigned int hctx_idx) |
425 | { | 389 | { |
@@ -584,37 +548,26 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) | |||
584 | return true; | 548 | return true; |
585 | } | 549 | } |
586 | 550 | ||
587 | static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) | 551 | static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) |
588 | { | 552 | { |
589 | struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); | 553 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
590 | int nseg = blk_rq_nr_phys_segments(rq); | 554 | enum dma_data_direction dma_dir = rq_data_dir(req) ? |
591 | unsigned int size = blk_rq_payload_bytes(rq); | 555 | DMA_TO_DEVICE : DMA_FROM_DEVICE; |
592 | 556 | const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; | |
593 | iod->use_sgl = nvme_pci_use_sgls(dev, rq); | 557 | dma_addr_t dma_addr = iod->first_dma, next_dma_addr; |
558 | int i; | ||
594 | 559 | ||
595 | if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { | 560 | if (iod->dma_len) { |
596 | iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); | 561 | dma_unmap_page(dev->dev, dma_addr, iod->dma_len, dma_dir); |
597 | if (!iod->sg) | 562 | return; |
598 | return BLK_STS_RESOURCE; | ||
599 | } else { | ||
600 | iod->sg = iod->inline_sg; | ||
601 | } | 563 | } |
602 | 564 | ||
603 | iod->aborted = 0; | 565 | WARN_ON_ONCE(!iod->nents); |
604 | iod->npages = -1; | ||
605 | iod->nents = 0; | ||
606 | iod->length = size; | ||
607 | |||
608 | return BLK_STS_OK; | ||
609 | } | ||
610 | 566 | ||
611 | static void nvme_free_iod(struct nvme_dev *dev, struct request *req) | 567 | /* P2PDMA requests do not need to be unmapped */ |
612 | { | 568 | if (!is_pci_p2pdma_page(sg_page(iod->sg))) |
613 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | 569 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); |
614 | const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; | ||
615 | dma_addr_t dma_addr = iod->first_dma, next_dma_addr; | ||
616 | 570 | ||
617 | int i; | ||
618 | 571 | ||
619 | if (iod->npages == 0) | 572 | if (iod->npages == 0) |
620 | dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], | 573 | dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], |
@@ -638,8 +591,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) | |||
638 | dma_addr = next_dma_addr; | 591 | dma_addr = next_dma_addr; |
639 | } | 592 | } |
640 | 593 | ||
641 | if (iod->sg != iod->inline_sg) | 594 | mempool_free(iod->sg, dev->iod_mempool); |
642 | mempool_free(iod->sg, dev->iod_mempool); | ||
643 | } | 595 | } |
644 | 596 | ||
645 | static void nvme_print_sgl(struct scatterlist *sgl, int nents) | 597 | static void nvme_print_sgl(struct scatterlist *sgl, int nents) |
@@ -829,80 +781,104 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, | |||
829 | return BLK_STS_OK; | 781 | return BLK_STS_OK; |
830 | } | 782 | } |
831 | 783 | ||
784 | static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, | ||
785 | struct request *req, struct nvme_rw_command *cmnd, | ||
786 | struct bio_vec *bv) | ||
787 | { | ||
788 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | ||
789 | unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset; | ||
790 | |||
791 | iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); | ||
792 | if (dma_mapping_error(dev->dev, iod->first_dma)) | ||
793 | return BLK_STS_RESOURCE; | ||
794 | iod->dma_len = bv->bv_len; | ||
795 | |||
796 | cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); | ||
797 | if (bv->bv_len > first_prp_len) | ||
798 | cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); | ||
799 | return 0; | ||
800 | } | ||
801 | |||
802 | static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, | ||
803 | struct request *req, struct nvme_rw_command *cmnd, | ||
804 | struct bio_vec *bv) | ||
805 | { | ||
806 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | ||
807 | |||
808 | iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); | ||
809 | if (dma_mapping_error(dev->dev, iod->first_dma)) | ||
810 | return BLK_STS_RESOURCE; | ||
811 | iod->dma_len = bv->bv_len; | ||
812 | |||
813 | cmnd->flags = NVME_CMD_SGL_METABUF; | ||
814 | cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); | ||
815 | cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); | ||
816 | cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; | ||
817 | return 0; | ||
818 | } | ||
819 | |||
832 | static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, | 820 | static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, |
833 | struct nvme_command *cmnd) | 821 | struct nvme_command *cmnd) |
834 | { | 822 | { |
835 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | 823 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
836 | struct request_queue *q = req->q; | 824 | blk_status_t ret = BLK_STS_RESOURCE; |
837 | enum dma_data_direction dma_dir = rq_data_dir(req) ? | ||
838 | DMA_TO_DEVICE : DMA_FROM_DEVICE; | ||
839 | blk_status_t ret = BLK_STS_IOERR; | ||
840 | int nr_mapped; | 825 | int nr_mapped; |
841 | 826 | ||
827 | if (blk_rq_nr_phys_segments(req) == 1) { | ||
828 | struct bio_vec bv = req_bvec(req); | ||
829 | |||
830 | if (!is_pci_p2pdma_page(bv.bv_page)) { | ||
831 | if (bv.bv_offset + bv.bv_len <= dev->ctrl.page_size * 2) | ||
832 | return nvme_setup_prp_simple(dev, req, | ||
833 | &cmnd->rw, &bv); | ||
834 | |||
835 | if (iod->nvmeq->qid && | ||
836 | dev->ctrl.sgls & ((1 << 0) | (1 << 1))) | ||
837 | return nvme_setup_sgl_simple(dev, req, | ||
838 | &cmnd->rw, &bv); | ||
839 | } | ||
840 | } | ||
841 | |||
842 | iod->dma_len = 0; | ||
843 | iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); | ||
844 | if (!iod->sg) | ||
845 | return BLK_STS_RESOURCE; | ||
842 | sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); | 846 | sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); |
843 | iod->nents = blk_rq_map_sg(q, req, iod->sg); | 847 | iod->nents = blk_rq_map_sg(req->q, req, iod->sg); |
844 | if (!iod->nents) | 848 | if (!iod->nents) |
845 | goto out; | 849 | goto out; |
846 | 850 | ||
847 | ret = BLK_STS_RESOURCE; | ||
848 | |||
849 | if (is_pci_p2pdma_page(sg_page(iod->sg))) | 851 | if (is_pci_p2pdma_page(sg_page(iod->sg))) |
850 | nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents, | 852 | nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents, |
851 | dma_dir); | 853 | rq_dma_dir(req)); |
852 | else | 854 | else |
853 | nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, | 855 | nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, |
854 | dma_dir, DMA_ATTR_NO_WARN); | 856 | rq_dma_dir(req), DMA_ATTR_NO_WARN); |
855 | if (!nr_mapped) | 857 | if (!nr_mapped) |
856 | goto out; | 858 | goto out; |
857 | 859 | ||
860 | iod->use_sgl = nvme_pci_use_sgls(dev, req); | ||
858 | if (iod->use_sgl) | 861 | if (iod->use_sgl) |
859 | ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); | 862 | ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); |
860 | else | 863 | else |
861 | ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); | 864 | ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); |
862 | |||
863 | if (ret != BLK_STS_OK) | ||
864 | goto out_unmap; | ||
865 | |||
866 | ret = BLK_STS_IOERR; | ||
867 | if (blk_integrity_rq(req)) { | ||
868 | if (blk_rq_count_integrity_sg(q, req->bio) != 1) | ||
869 | goto out_unmap; | ||
870 | |||
871 | sg_init_table(&iod->meta_sg, 1); | ||
872 | if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) | ||
873 | goto out_unmap; | ||
874 | |||
875 | if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) | ||
876 | goto out_unmap; | ||
877 | |||
878 | cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); | ||
879 | } | ||
880 | |||
881 | return BLK_STS_OK; | ||
882 | |||
883 | out_unmap: | ||
884 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); | ||
885 | out: | 865 | out: |
866 | if (ret != BLK_STS_OK) | ||
867 | nvme_unmap_data(dev, req); | ||
886 | return ret; | 868 | return ret; |
887 | } | 869 | } |
888 | 870 | ||
889 | static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) | 871 | static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req, |
872 | struct nvme_command *cmnd) | ||
890 | { | 873 | { |
891 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | 874 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
892 | enum dma_data_direction dma_dir = rq_data_dir(req) ? | ||
893 | DMA_TO_DEVICE : DMA_FROM_DEVICE; | ||
894 | |||
895 | if (iod->nents) { | ||
896 | /* P2PDMA requests do not need to be unmapped */ | ||
897 | if (!is_pci_p2pdma_page(sg_page(iod->sg))) | ||
898 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); | ||
899 | |||
900 | if (blk_integrity_rq(req)) | ||
901 | dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); | ||
902 | } | ||
903 | 875 | ||
904 | nvme_cleanup_cmd(req); | 876 | iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req), |
905 | nvme_free_iod(dev, req); | 877 | rq_dma_dir(req), 0); |
878 | if (dma_mapping_error(dev->dev, iod->meta_dma)) | ||
879 | return BLK_STS_IOERR; | ||
880 | cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); | ||
881 | return 0; | ||
906 | } | 882 | } |
907 | 883 | ||
908 | /* | 884 | /* |
@@ -915,9 +891,14 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
915 | struct nvme_queue *nvmeq = hctx->driver_data; | 891 | struct nvme_queue *nvmeq = hctx->driver_data; |
916 | struct nvme_dev *dev = nvmeq->dev; | 892 | struct nvme_dev *dev = nvmeq->dev; |
917 | struct request *req = bd->rq; | 893 | struct request *req = bd->rq; |
894 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | ||
918 | struct nvme_command cmnd; | 895 | struct nvme_command cmnd; |
919 | blk_status_t ret; | 896 | blk_status_t ret; |
920 | 897 | ||
898 | iod->aborted = 0; | ||
899 | iod->npages = -1; | ||
900 | iod->nents = 0; | ||
901 | |||
921 | /* | 902 | /* |
922 | * We should not need to do this, but we're still using this to | 903 | * We should not need to do this, but we're still using this to |
923 | * ensure we can drain requests on a dying queue. | 904 | * ensure we can drain requests on a dying queue. |
@@ -929,21 +910,23 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
929 | if (ret) | 910 | if (ret) |
930 | return ret; | 911 | return ret; |
931 | 912 | ||
932 | ret = nvme_init_iod(req, dev); | ||
933 | if (ret) | ||
934 | goto out_free_cmd; | ||
935 | |||
936 | if (blk_rq_nr_phys_segments(req)) { | 913 | if (blk_rq_nr_phys_segments(req)) { |
937 | ret = nvme_map_data(dev, req, &cmnd); | 914 | ret = nvme_map_data(dev, req, &cmnd); |
938 | if (ret) | 915 | if (ret) |
939 | goto out_cleanup_iod; | 916 | goto out_free_cmd; |
917 | } | ||
918 | |||
919 | if (blk_integrity_rq(req)) { | ||
920 | ret = nvme_map_metadata(dev, req, &cmnd); | ||
921 | if (ret) | ||
922 | goto out_unmap_data; | ||
940 | } | 923 | } |
941 | 924 | ||
942 | blk_mq_start_request(req); | 925 | blk_mq_start_request(req); |
943 | nvme_submit_cmd(nvmeq, &cmnd, bd->last); | 926 | nvme_submit_cmd(nvmeq, &cmnd, bd->last); |
944 | return BLK_STS_OK; | 927 | return BLK_STS_OK; |
945 | out_cleanup_iod: | 928 | out_unmap_data: |
946 | nvme_free_iod(dev, req); | 929 | nvme_unmap_data(dev, req); |
947 | out_free_cmd: | 930 | out_free_cmd: |
948 | nvme_cleanup_cmd(req); | 931 | nvme_cleanup_cmd(req); |
949 | return ret; | 932 | return ret; |
@@ -952,8 +935,14 @@ out_free_cmd: | |||
952 | static void nvme_pci_complete_rq(struct request *req) | 935 | static void nvme_pci_complete_rq(struct request *req) |
953 | { | 936 | { |
954 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | 937 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
938 | struct nvme_dev *dev = iod->nvmeq->dev; | ||
955 | 939 | ||
956 | nvme_unmap_data(iod->nvmeq->dev, req); | 940 | nvme_cleanup_cmd(req); |
941 | if (blk_integrity_rq(req)) | ||
942 | dma_unmap_page(dev->dev, iod->meta_dma, | ||
943 | rq_integrity_vec(req)->bv_len, rq_data_dir(req)); | ||
944 | if (blk_rq_nr_phys_segments(req)) | ||
945 | nvme_unmap_data(dev, req); | ||
957 | nvme_complete_rq(req); | 946 | nvme_complete_rq(req); |
958 | } | 947 | } |
959 | 948 | ||
@@ -1088,7 +1077,7 @@ static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) | |||
1088 | * using the CQ lock. For normal interrupt driven threads we have | 1077 | * using the CQ lock. For normal interrupt driven threads we have |
1089 | * to disable the interrupt to avoid racing with it. | 1078 | * to disable the interrupt to avoid racing with it. |
1090 | */ | 1079 | */ |
1091 | if (nvmeq->cq_vector == -1) { | 1080 | if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) { |
1092 | spin_lock(&nvmeq->cq_poll_lock); | 1081 | spin_lock(&nvmeq->cq_poll_lock); |
1093 | found = nvme_process_cq(nvmeq, &start, &end, tag); | 1082 | found = nvme_process_cq(nvmeq, &start, &end, tag); |
1094 | spin_unlock(&nvmeq->cq_poll_lock); | 1083 | spin_unlock(&nvmeq->cq_poll_lock); |
@@ -1148,7 +1137,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, | |||
1148 | struct nvme_command c; | 1137 | struct nvme_command c; |
1149 | int flags = NVME_QUEUE_PHYS_CONTIG; | 1138 | int flags = NVME_QUEUE_PHYS_CONTIG; |
1150 | 1139 | ||
1151 | if (vector != -1) | 1140 | if (!test_bit(NVMEQ_POLLED, &nvmeq->flags)) |
1152 | flags |= NVME_CQ_IRQ_ENABLED; | 1141 | flags |= NVME_CQ_IRQ_ENABLED; |
1153 | 1142 | ||
1154 | /* | 1143 | /* |
@@ -1161,10 +1150,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, | |||
1161 | c.create_cq.cqid = cpu_to_le16(qid); | 1150 | c.create_cq.cqid = cpu_to_le16(qid); |
1162 | c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); | 1151 | c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); |
1163 | c.create_cq.cq_flags = cpu_to_le16(flags); | 1152 | c.create_cq.cq_flags = cpu_to_le16(flags); |
1164 | if (vector != -1) | 1153 | c.create_cq.irq_vector = cpu_to_le16(vector); |
1165 | c.create_cq.irq_vector = cpu_to_le16(vector); | ||
1166 | else | ||
1167 | c.create_cq.irq_vector = 0; | ||
1168 | 1154 | ||
1169 | return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); | 1155 | return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); |
1170 | } | 1156 | } |
@@ -1271,6 +1257,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) | |||
1271 | struct nvme_dev *dev = nvmeq->dev; | 1257 | struct nvme_dev *dev = nvmeq->dev; |
1272 | struct request *abort_req; | 1258 | struct request *abort_req; |
1273 | struct nvme_command cmd; | 1259 | struct nvme_command cmd; |
1260 | bool shutdown = false; | ||
1274 | u32 csts = readl(dev->bar + NVME_REG_CSTS); | 1261 | u32 csts = readl(dev->bar + NVME_REG_CSTS); |
1275 | 1262 | ||
1276 | /* If PCI error recovery process is happening, we cannot reset or | 1263 | /* If PCI error recovery process is happening, we cannot reset or |
@@ -1307,12 +1294,14 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) | |||
1307 | * shutdown, so we return BLK_EH_DONE. | 1294 | * shutdown, so we return BLK_EH_DONE. |
1308 | */ | 1295 | */ |
1309 | switch (dev->ctrl.state) { | 1296 | switch (dev->ctrl.state) { |
1297 | case NVME_CTRL_DELETING: | ||
1298 | shutdown = true; | ||
1310 | case NVME_CTRL_CONNECTING: | 1299 | case NVME_CTRL_CONNECTING: |
1311 | case NVME_CTRL_RESETTING: | 1300 | case NVME_CTRL_RESETTING: |
1312 | dev_warn_ratelimited(dev->ctrl.device, | 1301 | dev_warn_ratelimited(dev->ctrl.device, |
1313 | "I/O %d QID %d timeout, disable controller\n", | 1302 | "I/O %d QID %d timeout, disable controller\n", |
1314 | req->tag, nvmeq->qid); | 1303 | req->tag, nvmeq->qid); |
1315 | nvme_dev_disable(dev, false); | 1304 | nvme_dev_disable(dev, shutdown); |
1316 | nvme_req(req)->flags |= NVME_REQ_CANCELLED; | 1305 | nvme_req(req)->flags |= NVME_REQ_CANCELLED; |
1317 | return BLK_EH_DONE; | 1306 | return BLK_EH_DONE; |
1318 | default: | 1307 | default: |
@@ -1371,16 +1360,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) | |||
1371 | 1360 | ||
1372 | static void nvme_free_queue(struct nvme_queue *nvmeq) | 1361 | static void nvme_free_queue(struct nvme_queue *nvmeq) |
1373 | { | 1362 | { |
1374 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | 1363 | dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth), |
1375 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | 1364 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); |
1376 | if (!nvmeq->sq_cmds) | 1365 | if (!nvmeq->sq_cmds) |
1377 | return; | 1366 | return; |
1378 | 1367 | ||
1379 | if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { | 1368 | if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { |
1380 | pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev), | 1369 | pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), |
1381 | nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); | 1370 | nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); |
1382 | } else { | 1371 | } else { |
1383 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | 1372 | dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth), |
1384 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | 1373 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); |
1385 | } | 1374 | } |
1386 | } | 1375 | } |
@@ -1410,10 +1399,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) | |||
1410 | nvmeq->dev->online_queues--; | 1399 | nvmeq->dev->online_queues--; |
1411 | if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) | 1400 | if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) |
1412 | blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); | 1401 | blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); |
1413 | if (nvmeq->cq_vector == -1) | 1402 | if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) |
1414 | return 0; | 1403 | pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); |
1415 | pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); | ||
1416 | nvmeq->cq_vector = -1; | ||
1417 | return 0; | 1404 | return 0; |
1418 | } | 1405 | } |
1419 | 1406 | ||
@@ -1498,7 +1485,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) | |||
1498 | if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) | 1485 | if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) |
1499 | goto free_cqdma; | 1486 | goto free_cqdma; |
1500 | 1487 | ||
1501 | nvmeq->q_dmadev = dev->dev; | ||
1502 | nvmeq->dev = dev; | 1488 | nvmeq->dev = dev; |
1503 | spin_lock_init(&nvmeq->sq_lock); | 1489 | spin_lock_init(&nvmeq->sq_lock); |
1504 | spin_lock_init(&nvmeq->cq_poll_lock); | 1490 | spin_lock_init(&nvmeq->cq_poll_lock); |
@@ -1507,7 +1493,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) | |||
1507 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; | 1493 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; |
1508 | nvmeq->q_depth = depth; | 1494 | nvmeq->q_depth = depth; |
1509 | nvmeq->qid = qid; | 1495 | nvmeq->qid = qid; |
1510 | nvmeq->cq_vector = -1; | ||
1511 | dev->ctrl.queue_count++; | 1496 | dev->ctrl.queue_count++; |
1512 | 1497 | ||
1513 | return 0; | 1498 | return 0; |
@@ -1552,7 +1537,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) | |||
1552 | { | 1537 | { |
1553 | struct nvme_dev *dev = nvmeq->dev; | 1538 | struct nvme_dev *dev = nvmeq->dev; |
1554 | int result; | 1539 | int result; |
1555 | s16 vector; | 1540 | u16 vector = 0; |
1556 | 1541 | ||
1557 | clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); | 1542 | clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); |
1558 | 1543 | ||
@@ -1563,7 +1548,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) | |||
1563 | if (!polled) | 1548 | if (!polled) |
1564 | vector = dev->num_vecs == 1 ? 0 : qid; | 1549 | vector = dev->num_vecs == 1 ? 0 : qid; |
1565 | else | 1550 | else |
1566 | vector = -1; | 1551 | set_bit(NVMEQ_POLLED, &nvmeq->flags); |
1567 | 1552 | ||
1568 | result = adapter_alloc_cq(dev, qid, nvmeq, vector); | 1553 | result = adapter_alloc_cq(dev, qid, nvmeq, vector); |
1569 | if (result) | 1554 | if (result) |
@@ -1578,7 +1563,8 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) | |||
1578 | nvmeq->cq_vector = vector; | 1563 | nvmeq->cq_vector = vector; |
1579 | nvme_init_queue(nvmeq, qid); | 1564 | nvme_init_queue(nvmeq, qid); |
1580 | 1565 | ||
1581 | if (vector != -1) { | 1566 | if (!polled) { |
1567 | nvmeq->cq_vector = vector; | ||
1582 | result = queue_request_irq(nvmeq); | 1568 | result = queue_request_irq(nvmeq); |
1583 | if (result < 0) | 1569 | if (result < 0) |
1584 | goto release_sq; | 1570 | goto release_sq; |
@@ -1588,7 +1574,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) | |||
1588 | return result; | 1574 | return result; |
1589 | 1575 | ||
1590 | release_sq: | 1576 | release_sq: |
1591 | nvmeq->cq_vector = -1; | ||
1592 | dev->online_queues--; | 1577 | dev->online_queues--; |
1593 | adapter_delete_sq(dev, qid); | 1578 | adapter_delete_sq(dev, qid); |
1594 | release_cq: | 1579 | release_cq: |
@@ -1639,7 +1624,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) | |||
1639 | dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; | 1624 | dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; |
1640 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; | 1625 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; |
1641 | dev->admin_tagset.numa_node = dev_to_node(dev->dev); | 1626 | dev->admin_tagset.numa_node = dev_to_node(dev->dev); |
1642 | dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false); | 1627 | dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); |
1643 | dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; | 1628 | dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; |
1644 | dev->admin_tagset.driver_data = dev; | 1629 | dev->admin_tagset.driver_data = dev; |
1645 | 1630 | ||
@@ -1730,7 +1715,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) | |||
1730 | nvme_init_queue(nvmeq, 0); | 1715 | nvme_init_queue(nvmeq, 0); |
1731 | result = queue_request_irq(nvmeq); | 1716 | result = queue_request_irq(nvmeq); |
1732 | if (result) { | 1717 | if (result) { |
1733 | nvmeq->cq_vector = -1; | 1718 | dev->online_queues--; |
1734 | return result; | 1719 | return result; |
1735 | } | 1720 | } |
1736 | 1721 | ||
@@ -2171,10 +2156,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
2171 | * number of interrupts. | 2156 | * number of interrupts. |
2172 | */ | 2157 | */ |
2173 | result = queue_request_irq(adminq); | 2158 | result = queue_request_irq(adminq); |
2174 | if (result) { | 2159 | if (result) |
2175 | adminq->cq_vector = -1; | ||
2176 | return result; | 2160 | return result; |
2177 | } | ||
2178 | set_bit(NVMEQ_ENABLED, &adminq->flags); | 2161 | set_bit(NVMEQ_ENABLED, &adminq->flags); |
2179 | 2162 | ||
2180 | result = nvme_create_io_queues(dev); | 2163 | result = nvme_create_io_queues(dev); |
@@ -2286,11 +2269,7 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
2286 | dev->tagset.numa_node = dev_to_node(dev->dev); | 2269 | dev->tagset.numa_node = dev_to_node(dev->dev); |
2287 | dev->tagset.queue_depth = | 2270 | dev->tagset.queue_depth = |
2288 | min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; | 2271 | min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; |
2289 | dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false); | 2272 | dev->tagset.cmd_size = sizeof(struct nvme_iod); |
2290 | if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) { | ||
2291 | dev->tagset.cmd_size = max(dev->tagset.cmd_size, | ||
2292 | nvme_pci_cmd_size(dev, true)); | ||
2293 | } | ||
2294 | dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; | 2273 | dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; |
2295 | dev->tagset.driver_data = dev; | 2274 | dev->tagset.driver_data = dev; |
2296 | 2275 | ||
@@ -2438,8 +2417,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) | |||
2438 | * must flush all entered requests to their failed completion to avoid | 2417 | * must flush all entered requests to their failed completion to avoid |
2439 | * deadlocking blk-mq hot-cpu notifier. | 2418 | * deadlocking blk-mq hot-cpu notifier. |
2440 | */ | 2419 | */ |
2441 | if (shutdown) | 2420 | if (shutdown) { |
2442 | nvme_start_queues(&dev->ctrl); | 2421 | nvme_start_queues(&dev->ctrl); |
2422 | if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) | ||
2423 | blk_mq_unquiesce_queue(dev->ctrl.admin_q); | ||
2424 | } | ||
2443 | mutex_unlock(&dev->shutdown_lock); | 2425 | mutex_unlock(&dev->shutdown_lock); |
2444 | } | 2426 | } |
2445 | 2427 | ||
@@ -2979,6 +2961,9 @@ static struct pci_driver nvme_driver = { | |||
2979 | 2961 | ||
2980 | static int __init nvme_init(void) | 2962 | static int __init nvme_init(void) |
2981 | { | 2963 | { |
2964 | BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); | ||
2965 | BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); | ||
2966 | BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); | ||
2982 | BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); | 2967 | BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); |
2983 | return pci_register_driver(&nvme_driver); | 2968 | return pci_register_driver(&nvme_driver); |
2984 | } | 2969 | } |
@@ -2987,7 +2972,6 @@ static void __exit nvme_exit(void) | |||
2987 | { | 2972 | { |
2988 | pci_unregister_driver(&nvme_driver); | 2973 | pci_unregister_driver(&nvme_driver); |
2989 | flush_workqueue(nvme_wq); | 2974 | flush_workqueue(nvme_wq); |
2990 | _nvme_check_size(); | ||
2991 | } | 2975 | } |
2992 | 2976 | ||
2993 | MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); | 2977 | MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); |
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 11a5ecae78c8..e1824c2e0a1c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c | |||
@@ -914,8 +914,9 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, | |||
914 | { | 914 | { |
915 | blk_mq_quiesce_queue(ctrl->ctrl.admin_q); | 915 | blk_mq_quiesce_queue(ctrl->ctrl.admin_q); |
916 | nvme_rdma_stop_queue(&ctrl->queues[0]); | 916 | nvme_rdma_stop_queue(&ctrl->queues[0]); |
917 | blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, | 917 | if (ctrl->ctrl.admin_tagset) |
918 | &ctrl->ctrl); | 918 | blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, |
919 | nvme_cancel_request, &ctrl->ctrl); | ||
919 | blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); | 920 | blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); |
920 | nvme_rdma_destroy_admin_queue(ctrl, remove); | 921 | nvme_rdma_destroy_admin_queue(ctrl, remove); |
921 | } | 922 | } |
@@ -926,8 +927,9 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, | |||
926 | if (ctrl->ctrl.queue_count > 1) { | 927 | if (ctrl->ctrl.queue_count > 1) { |
927 | nvme_stop_queues(&ctrl->ctrl); | 928 | nvme_stop_queues(&ctrl->ctrl); |
928 | nvme_rdma_stop_io_queues(ctrl); | 929 | nvme_rdma_stop_io_queues(ctrl); |
929 | blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, | 930 | if (ctrl->ctrl.tagset) |
930 | &ctrl->ctrl); | 931 | blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, |
932 | nvme_cancel_request, &ctrl->ctrl); | ||
931 | if (remove) | 933 | if (remove) |
932 | nvme_start_queues(&ctrl->ctrl); | 934 | nvme_start_queues(&ctrl->ctrl); |
933 | nvme_rdma_destroy_io_queues(ctrl, remove); | 935 | nvme_rdma_destroy_io_queues(ctrl, remove); |
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 68c49dd67210..2b107a1d152b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c | |||
@@ -473,7 +473,6 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, | |||
473 | } | 473 | } |
474 | 474 | ||
475 | return 0; | 475 | return 0; |
476 | |||
477 | } | 476 | } |
478 | 477 | ||
479 | static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, | 478 | static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, |
@@ -634,7 +633,6 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status) | |||
634 | nvme_end_request(rq, cpu_to_le16(status << 1), res); | 633 | nvme_end_request(rq, cpu_to_le16(status << 1), res); |
635 | } | 634 | } |
636 | 635 | ||
637 | |||
638 | static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, | 636 | static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, |
639 | unsigned int *offset, size_t *len) | 637 | unsigned int *offset, size_t *len) |
640 | { | 638 | { |
@@ -1425,7 +1423,8 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) | |||
1425 | if (!ret) { | 1423 | if (!ret) { |
1426 | set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); | 1424 | set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); |
1427 | } else { | 1425 | } else { |
1428 | __nvme_tcp_stop_queue(&ctrl->queues[idx]); | 1426 | if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags)) |
1427 | __nvme_tcp_stop_queue(&ctrl->queues[idx]); | ||
1429 | dev_err(nctrl->device, | 1428 | dev_err(nctrl->device, |
1430 | "failed to connect queue: %d ret=%d\n", idx, ret); | 1429 | "failed to connect queue: %d ret=%d\n", idx, ret); |
1431 | } | 1430 | } |
@@ -1535,7 +1534,7 @@ out_free_queue: | |||
1535 | return ret; | 1534 | return ret; |
1536 | } | 1535 | } |
1537 | 1536 | ||
1538 | static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) | 1537 | static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) |
1539 | { | 1538 | { |
1540 | int i, ret; | 1539 | int i, ret; |
1541 | 1540 | ||
@@ -1565,7 +1564,7 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl) | |||
1565 | return nr_io_queues; | 1564 | return nr_io_queues; |
1566 | } | 1565 | } |
1567 | 1566 | ||
1568 | static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl) | 1567 | static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) |
1569 | { | 1568 | { |
1570 | unsigned int nr_io_queues; | 1569 | unsigned int nr_io_queues; |
1571 | int ret; | 1570 | int ret; |
@@ -1582,7 +1581,7 @@ static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl) | |||
1582 | dev_info(ctrl->device, | 1581 | dev_info(ctrl->device, |
1583 | "creating %d I/O queues.\n", nr_io_queues); | 1582 | "creating %d I/O queues.\n", nr_io_queues); |
1584 | 1583 | ||
1585 | return nvme_tcp_alloc_io_queues(ctrl); | 1584 | return __nvme_tcp_alloc_io_queues(ctrl); |
1586 | } | 1585 | } |
1587 | 1586 | ||
1588 | static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) | 1587 | static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) |
@@ -1599,7 +1598,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) | |||
1599 | { | 1598 | { |
1600 | int ret; | 1599 | int ret; |
1601 | 1600 | ||
1602 | ret = nvme_alloc_io_queues(ctrl); | 1601 | ret = nvme_tcp_alloc_io_queues(ctrl); |
1603 | if (ret) | 1602 | if (ret) |
1604 | return ret; | 1603 | return ret; |
1605 | 1604 | ||
@@ -1710,7 +1709,9 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, | |||
1710 | { | 1709 | { |
1711 | blk_mq_quiesce_queue(ctrl->admin_q); | 1710 | blk_mq_quiesce_queue(ctrl->admin_q); |
1712 | nvme_tcp_stop_queue(ctrl, 0); | 1711 | nvme_tcp_stop_queue(ctrl, 0); |
1713 | blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); | 1712 | if (ctrl->admin_tagset) |
1713 | blk_mq_tagset_busy_iter(ctrl->admin_tagset, | ||
1714 | nvme_cancel_request, ctrl); | ||
1714 | blk_mq_unquiesce_queue(ctrl->admin_q); | 1715 | blk_mq_unquiesce_queue(ctrl->admin_q); |
1715 | nvme_tcp_destroy_admin_queue(ctrl, remove); | 1716 | nvme_tcp_destroy_admin_queue(ctrl, remove); |
1716 | } | 1717 | } |
@@ -1722,7 +1723,9 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, | |||
1722 | return; | 1723 | return; |
1723 | nvme_stop_queues(ctrl); | 1724 | nvme_stop_queues(ctrl); |
1724 | nvme_tcp_stop_io_queues(ctrl); | 1725 | nvme_tcp_stop_io_queues(ctrl); |
1725 | blk_mq_tagset_busy_iter(ctrl->tagset, nvme_cancel_request, ctrl); | 1726 | if (ctrl->tagset) |
1727 | blk_mq_tagset_busy_iter(ctrl->tagset, | ||
1728 | nvme_cancel_request, ctrl); | ||
1726 | if (remove) | 1729 | if (remove) |
1727 | nvme_start_queues(ctrl); | 1730 | nvme_start_queues(ctrl); |
1728 | nvme_tcp_destroy_io_queues(ctrl, remove); | 1731 | nvme_tcp_destroy_io_queues(ctrl, remove); |
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index d94f25cde019..3ef0a4e5eed6 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig | |||
@@ -3,6 +3,7 @@ config NVME_TARGET | |||
3 | tristate "NVMe Target support" | 3 | tristate "NVMe Target support" |
4 | depends on BLOCK | 4 | depends on BLOCK |
5 | depends on CONFIGFS_FS | 5 | depends on CONFIGFS_FS |
6 | select SGL_ALLOC | ||
6 | help | 7 | help |
7 | This enabled target side support for the NVMe protocol, that is | 8 | This enabled target side support for the NVMe protocol, that is |
8 | it allows the Linux kernel to implement NVMe subsystems and | 9 | it allows the Linux kernel to implement NVMe subsystems and |
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index adb79545cdd7..08dd5af357f7 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c | |||
@@ -898,8 +898,8 @@ static struct config_group *nvmet_subsys_make(struct config_group *group, | |||
898 | } | 898 | } |
899 | 899 | ||
900 | subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME); | 900 | subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME); |
901 | if (!subsys) | 901 | if (IS_ERR(subsys)) |
902 | return ERR_PTR(-ENOMEM); | 902 | return ERR_CAST(subsys); |
903 | 903 | ||
904 | config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type); | 904 | config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type); |
905 | 905 | ||
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b3e765a95af8..7734a6acff85 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/random.h> | 8 | #include <linux/random.h> |
9 | #include <linux/rculist.h> | 9 | #include <linux/rculist.h> |
10 | #include <linux/pci-p2pdma.h> | 10 | #include <linux/pci-p2pdma.h> |
11 | #include <linux/scatterlist.h> | ||
11 | 12 | ||
12 | #include "nvmet.h" | 13 | #include "nvmet.h" |
13 | 14 | ||
@@ -214,6 +215,8 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) | |||
214 | { | 215 | { |
215 | struct nvmet_ctrl *ctrl; | 216 | struct nvmet_ctrl *ctrl; |
216 | 217 | ||
218 | lockdep_assert_held(&subsys->lock); | ||
219 | |||
217 | list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { | 220 | list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { |
218 | nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); | 221 | nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); |
219 | if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) | 222 | if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) |
@@ -494,13 +497,14 @@ int nvmet_ns_enable(struct nvmet_ns *ns) | |||
494 | int ret; | 497 | int ret; |
495 | 498 | ||
496 | mutex_lock(&subsys->lock); | 499 | mutex_lock(&subsys->lock); |
497 | ret = -EMFILE; | ||
498 | if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) | ||
499 | goto out_unlock; | ||
500 | ret = 0; | 500 | ret = 0; |
501 | if (ns->enabled) | 501 | if (ns->enabled) |
502 | goto out_unlock; | 502 | goto out_unlock; |
503 | 503 | ||
504 | ret = -EMFILE; | ||
505 | if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) | ||
506 | goto out_unlock; | ||
507 | |||
504 | ret = nvmet_bdev_ns_enable(ns); | 508 | ret = nvmet_bdev_ns_enable(ns); |
505 | if (ret == -ENOTBLK) | 509 | if (ret == -ENOTBLK) |
506 | ret = nvmet_file_ns_enable(ns); | 510 | ret = nvmet_file_ns_enable(ns); |
@@ -644,7 +648,7 @@ static void nvmet_update_sq_head(struct nvmet_req *req) | |||
644 | } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != | 648 | } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != |
645 | old_sqhd); | 649 | old_sqhd); |
646 | } | 650 | } |
647 | req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); | 651 | req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); |
648 | } | 652 | } |
649 | 653 | ||
650 | static void nvmet_set_error(struct nvmet_req *req, u16 status) | 654 | static void nvmet_set_error(struct nvmet_req *req, u16 status) |
@@ -653,7 +657,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) | |||
653 | struct nvme_error_slot *new_error_slot; | 657 | struct nvme_error_slot *new_error_slot; |
654 | unsigned long flags; | 658 | unsigned long flags; |
655 | 659 | ||
656 | req->rsp->status = cpu_to_le16(status << 1); | 660 | req->cqe->status = cpu_to_le16(status << 1); |
657 | 661 | ||
658 | if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) | 662 | if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) |
659 | return; | 663 | return; |
@@ -673,15 +677,15 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) | |||
673 | spin_unlock_irqrestore(&ctrl->error_lock, flags); | 677 | spin_unlock_irqrestore(&ctrl->error_lock, flags); |
674 | 678 | ||
675 | /* set the more bit for this request */ | 679 | /* set the more bit for this request */ |
676 | req->rsp->status |= cpu_to_le16(1 << 14); | 680 | req->cqe->status |= cpu_to_le16(1 << 14); |
677 | } | 681 | } |
678 | 682 | ||
679 | static void __nvmet_req_complete(struct nvmet_req *req, u16 status) | 683 | static void __nvmet_req_complete(struct nvmet_req *req, u16 status) |
680 | { | 684 | { |
681 | if (!req->sq->sqhd_disabled) | 685 | if (!req->sq->sqhd_disabled) |
682 | nvmet_update_sq_head(req); | 686 | nvmet_update_sq_head(req); |
683 | req->rsp->sq_id = cpu_to_le16(req->sq->qid); | 687 | req->cqe->sq_id = cpu_to_le16(req->sq->qid); |
684 | req->rsp->command_id = req->cmd->common.command_id; | 688 | req->cqe->command_id = req->cmd->common.command_id; |
685 | 689 | ||
686 | if (unlikely(status)) | 690 | if (unlikely(status)) |
687 | nvmet_set_error(req, status); | 691 | nvmet_set_error(req, status); |
@@ -838,8 +842,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, | |||
838 | req->sg = NULL; | 842 | req->sg = NULL; |
839 | req->sg_cnt = 0; | 843 | req->sg_cnt = 0; |
840 | req->transfer_len = 0; | 844 | req->transfer_len = 0; |
841 | req->rsp->status = 0; | 845 | req->cqe->status = 0; |
842 | req->rsp->sq_head = 0; | 846 | req->cqe->sq_head = 0; |
843 | req->ns = NULL; | 847 | req->ns = NULL; |
844 | req->error_loc = NVMET_NO_ERROR_LOC; | 848 | req->error_loc = NVMET_NO_ERROR_LOC; |
845 | req->error_slba = 0; | 849 | req->error_slba = 0; |
@@ -1066,7 +1070,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, | |||
1066 | if (!subsys) { | 1070 | if (!subsys) { |
1067 | pr_warn("connect request for invalid subsystem %s!\n", | 1071 | pr_warn("connect request for invalid subsystem %s!\n", |
1068 | subsysnqn); | 1072 | subsysnqn); |
1069 | req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); | 1073 | req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); |
1070 | return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; | 1074 | return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; |
1071 | } | 1075 | } |
1072 | 1076 | ||
@@ -1087,7 +1091,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, | |||
1087 | 1091 | ||
1088 | pr_warn("could not find controller %d for subsys %s / host %s\n", | 1092 | pr_warn("could not find controller %d for subsys %s / host %s\n", |
1089 | cntlid, subsysnqn, hostnqn); | 1093 | cntlid, subsysnqn, hostnqn); |
1090 | req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); | 1094 | req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); |
1091 | status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; | 1095 | status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; |
1092 | 1096 | ||
1093 | out: | 1097 | out: |
@@ -1185,7 +1189,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, | |||
1185 | if (!subsys) { | 1189 | if (!subsys) { |
1186 | pr_warn("connect request for invalid subsystem %s!\n", | 1190 | pr_warn("connect request for invalid subsystem %s!\n", |
1187 | subsysnqn); | 1191 | subsysnqn); |
1188 | req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); | 1192 | req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); |
1189 | goto out; | 1193 | goto out; |
1190 | } | 1194 | } |
1191 | 1195 | ||
@@ -1194,7 +1198,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, | |||
1194 | if (!nvmet_host_allowed(subsys, hostnqn)) { | 1198 | if (!nvmet_host_allowed(subsys, hostnqn)) { |
1195 | pr_info("connect by host %s for subsystem %s not allowed\n", | 1199 | pr_info("connect by host %s for subsystem %s not allowed\n", |
1196 | hostnqn, subsysnqn); | 1200 | hostnqn, subsysnqn); |
1197 | req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); | 1201 | req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); |
1198 | up_read(&nvmet_config_sem); | 1202 | up_read(&nvmet_config_sem); |
1199 | status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; | 1203 | status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; |
1200 | goto out_put_subsystem; | 1204 | goto out_put_subsystem; |
@@ -1364,7 +1368,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, | |||
1364 | 1368 | ||
1365 | subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); | 1369 | subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); |
1366 | if (!subsys) | 1370 | if (!subsys) |
1367 | return NULL; | 1371 | return ERR_PTR(-ENOMEM); |
1368 | 1372 | ||
1369 | subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ | 1373 | subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ |
1370 | /* generate a random serial number as our controllers are ephemeral: */ | 1374 | /* generate a random serial number as our controllers are ephemeral: */ |
@@ -1380,14 +1384,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, | |||
1380 | default: | 1384 | default: |
1381 | pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); | 1385 | pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); |
1382 | kfree(subsys); | 1386 | kfree(subsys); |
1383 | return NULL; | 1387 | return ERR_PTR(-EINVAL); |
1384 | } | 1388 | } |
1385 | subsys->type = type; | 1389 | subsys->type = type; |
1386 | subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, | 1390 | subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, |
1387 | GFP_KERNEL); | 1391 | GFP_KERNEL); |
1388 | if (!subsys->subsysnqn) { | 1392 | if (!subsys->subsysnqn) { |
1389 | kfree(subsys); | 1393 | kfree(subsys); |
1390 | return NULL; | 1394 | return ERR_PTR(-ENOMEM); |
1391 | } | 1395 | } |
1392 | 1396 | ||
1393 | kref_init(&subsys->ref); | 1397 | kref_init(&subsys->ref); |
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 33ed95e72d6b..5baf269f3f8a 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c | |||
@@ -30,14 +30,17 @@ void nvmet_port_disc_changed(struct nvmet_port *port, | |||
30 | { | 30 | { |
31 | struct nvmet_ctrl *ctrl; | 31 | struct nvmet_ctrl *ctrl; |
32 | 32 | ||
33 | lockdep_assert_held(&nvmet_config_sem); | ||
33 | nvmet_genctr++; | 34 | nvmet_genctr++; |
34 | 35 | ||
36 | mutex_lock(&nvmet_disc_subsys->lock); | ||
35 | list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { | 37 | list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { |
36 | if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn)) | 38 | if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn)) |
37 | continue; | 39 | continue; |
38 | 40 | ||
39 | __nvmet_disc_changed(port, ctrl); | 41 | __nvmet_disc_changed(port, ctrl); |
40 | } | 42 | } |
43 | mutex_unlock(&nvmet_disc_subsys->lock); | ||
41 | } | 44 | } |
42 | 45 | ||
43 | static void __nvmet_subsys_disc_changed(struct nvmet_port *port, | 46 | static void __nvmet_subsys_disc_changed(struct nvmet_port *port, |
@@ -46,12 +49,14 @@ static void __nvmet_subsys_disc_changed(struct nvmet_port *port, | |||
46 | { | 49 | { |
47 | struct nvmet_ctrl *ctrl; | 50 | struct nvmet_ctrl *ctrl; |
48 | 51 | ||
52 | mutex_lock(&nvmet_disc_subsys->lock); | ||
49 | list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { | 53 | list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { |
50 | if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn)) | 54 | if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn)) |
51 | continue; | 55 | continue; |
52 | 56 | ||
53 | __nvmet_disc_changed(port, ctrl); | 57 | __nvmet_disc_changed(port, ctrl); |
54 | } | 58 | } |
59 | mutex_unlock(&nvmet_disc_subsys->lock); | ||
55 | } | 60 | } |
56 | 61 | ||
57 | void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, | 62 | void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, |
@@ -372,8 +377,8 @@ int __init nvmet_init_discovery(void) | |||
372 | { | 377 | { |
373 | nvmet_disc_subsys = | 378 | nvmet_disc_subsys = |
374 | nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC); | 379 | nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC); |
375 | if (!nvmet_disc_subsys) | 380 | if (IS_ERR(nvmet_disc_subsys)) |
376 | return -ENOMEM; | 381 | return PTR_ERR(nvmet_disc_subsys); |
377 | return 0; | 382 | return 0; |
378 | } | 383 | } |
379 | 384 | ||
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 3a76ebc3d155..3b9f79aba98f 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c | |||
@@ -72,7 +72,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) | |||
72 | offsetof(struct nvmf_property_get_command, attrib); | 72 | offsetof(struct nvmf_property_get_command, attrib); |
73 | } | 73 | } |
74 | 74 | ||
75 | req->rsp->result.u64 = cpu_to_le64(val); | 75 | req->cqe->result.u64 = cpu_to_le64(val); |
76 | nvmet_req_complete(req, status); | 76 | nvmet_req_complete(req, status); |
77 | } | 77 | } |
78 | 78 | ||
@@ -124,7 +124,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) | |||
124 | 124 | ||
125 | if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) { | 125 | if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) { |
126 | req->sq->sqhd_disabled = true; | 126 | req->sq->sqhd_disabled = true; |
127 | req->rsp->sq_head = cpu_to_le16(0xffff); | 127 | req->cqe->sq_head = cpu_to_le16(0xffff); |
128 | } | 128 | } |
129 | 129 | ||
130 | if (ctrl->ops->install_queue) { | 130 | if (ctrl->ops->install_queue) { |
@@ -158,7 +158,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) | |||
158 | goto out; | 158 | goto out; |
159 | 159 | ||
160 | /* zero out initial completion result, assign values as needed */ | 160 | /* zero out initial completion result, assign values as needed */ |
161 | req->rsp->result.u32 = 0; | 161 | req->cqe->result.u32 = 0; |
162 | 162 | ||
163 | if (c->recfmt != 0) { | 163 | if (c->recfmt != 0) { |
164 | pr_warn("invalid connect version (%d).\n", | 164 | pr_warn("invalid connect version (%d).\n", |
@@ -172,7 +172,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) | |||
172 | pr_warn("connect attempt for invalid controller ID %#x\n", | 172 | pr_warn("connect attempt for invalid controller ID %#x\n", |
173 | d->cntlid); | 173 | d->cntlid); |
174 | status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; | 174 | status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; |
175 | req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); | 175 | req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); |
176 | goto out; | 176 | goto out; |
177 | } | 177 | } |
178 | 178 | ||
@@ -195,7 +195,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) | |||
195 | 195 | ||
196 | pr_info("creating controller %d for subsystem %s for NQN %s.\n", | 196 | pr_info("creating controller %d for subsystem %s for NQN %s.\n", |
197 | ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); | 197 | ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); |
198 | req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); | 198 | req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); |
199 | 199 | ||
200 | out: | 200 | out: |
201 | kfree(d); | 201 | kfree(d); |
@@ -222,7 +222,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) | |||
222 | goto out; | 222 | goto out; |
223 | 223 | ||
224 | /* zero out initial completion result, assign values as needed */ | 224 | /* zero out initial completion result, assign values as needed */ |
225 | req->rsp->result.u32 = 0; | 225 | req->cqe->result.u32 = 0; |
226 | 226 | ||
227 | if (c->recfmt != 0) { | 227 | if (c->recfmt != 0) { |
228 | pr_warn("invalid connect version (%d).\n", | 228 | pr_warn("invalid connect version (%d).\n", |
@@ -240,14 +240,14 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) | |||
240 | if (unlikely(qid > ctrl->subsys->max_qid)) { | 240 | if (unlikely(qid > ctrl->subsys->max_qid)) { |
241 | pr_warn("invalid queue id (%d)\n", qid); | 241 | pr_warn("invalid queue id (%d)\n", qid); |
242 | status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; | 242 | status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; |
243 | req->rsp->result.u32 = IPO_IATTR_CONNECT_SQE(qid); | 243 | req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(qid); |
244 | goto out_ctrl_put; | 244 | goto out_ctrl_put; |
245 | } | 245 | } |
246 | 246 | ||
247 | status = nvmet_install_queue(ctrl, req); | 247 | status = nvmet_install_queue(ctrl, req); |
248 | if (status) { | 248 | if (status) { |
249 | /* pass back cntlid that had the issue of installing queue */ | 249 | /* pass back cntlid that had the issue of installing queue */ |
250 | req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); | 250 | req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); |
251 | goto out_ctrl_put; | 251 | goto out_ctrl_put; |
252 | } | 252 | } |
253 | 253 | ||
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 98b7b1f4ee96..508661af0f50 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c | |||
@@ -128,12 +128,12 @@ struct nvmet_fc_tgt_queue { | |||
128 | struct nvmet_cq nvme_cq; | 128 | struct nvmet_cq nvme_cq; |
129 | struct nvmet_sq nvme_sq; | 129 | struct nvmet_sq nvme_sq; |
130 | struct nvmet_fc_tgt_assoc *assoc; | 130 | struct nvmet_fc_tgt_assoc *assoc; |
131 | struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */ | ||
132 | struct list_head fod_list; | 131 | struct list_head fod_list; |
133 | struct list_head pending_cmd_list; | 132 | struct list_head pending_cmd_list; |
134 | struct list_head avail_defer_list; | 133 | struct list_head avail_defer_list; |
135 | struct workqueue_struct *work_q; | 134 | struct workqueue_struct *work_q; |
136 | struct kref ref; | 135 | struct kref ref; |
136 | struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ | ||
137 | } __aligned(sizeof(unsigned long long)); | 137 | } __aligned(sizeof(unsigned long long)); |
138 | 138 | ||
139 | struct nvmet_fc_tgt_assoc { | 139 | struct nvmet_fc_tgt_assoc { |
@@ -588,9 +588,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, | |||
588 | if (qid > NVMET_NR_QUEUES) | 588 | if (qid > NVMET_NR_QUEUES) |
589 | return NULL; | 589 | return NULL; |
590 | 590 | ||
591 | queue = kzalloc((sizeof(*queue) + | 591 | queue = kzalloc(struct_size(queue, fod, sqsize), GFP_KERNEL); |
592 | (sizeof(struct nvmet_fc_fcp_iod) * sqsize)), | ||
593 | GFP_KERNEL); | ||
594 | if (!queue) | 592 | if (!queue) |
595 | return NULL; | 593 | return NULL; |
596 | 594 | ||
@@ -603,7 +601,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, | |||
603 | if (!queue->work_q) | 601 | if (!queue->work_q) |
604 | goto out_a_put; | 602 | goto out_a_put; |
605 | 603 | ||
606 | queue->fod = (struct nvmet_fc_fcp_iod *)&queue[1]; | ||
607 | queue->qid = qid; | 604 | queue->qid = qid; |
608 | queue->sqsize = sqsize; | 605 | queue->sqsize = sqsize; |
609 | queue->assoc = assoc; | 606 | queue->assoc = assoc; |
@@ -2187,7 +2184,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, | |||
2187 | } | 2184 | } |
2188 | 2185 | ||
2189 | fod->req.cmd = &fod->cmdiubuf.sqe; | 2186 | fod->req.cmd = &fod->cmdiubuf.sqe; |
2190 | fod->req.rsp = &fod->rspiubuf.cqe; | 2187 | fod->req.cqe = &fod->rspiubuf.cqe; |
2191 | fod->req.port = tgtport->pe->port; | 2188 | fod->req.port = tgtport->pe->port; |
2192 | 2189 | ||
2193 | /* clear any response payload */ | 2190 | /* clear any response payload */ |
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index a065dbfc43b1..3efc52f9c309 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c | |||
@@ -196,7 +196,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, | |||
196 | GFP_KERNEL, 0, bio); | 196 | GFP_KERNEL, 0, bio); |
197 | if (ret && ret != -EOPNOTSUPP) { | 197 | if (ret && ret != -EOPNOTSUPP) { |
198 | req->error_slba = le64_to_cpu(range->slba); | 198 | req->error_slba = le64_to_cpu(range->slba); |
199 | return blk_to_nvme_status(req, errno_to_blk_status(ret)); | 199 | return errno_to_nvme_status(req, ret); |
200 | } | 200 | } |
201 | return NVME_SC_SUCCESS; | 201 | return NVME_SC_SUCCESS; |
202 | } | 202 | } |
@@ -252,7 +252,6 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) | |||
252 | { | 252 | { |
253 | struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; | 253 | struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; |
254 | struct bio *bio = NULL; | 254 | struct bio *bio = NULL; |
255 | u16 status = NVME_SC_SUCCESS; | ||
256 | sector_t sector; | 255 | sector_t sector; |
257 | sector_t nr_sector; | 256 | sector_t nr_sector; |
258 | int ret; | 257 | int ret; |
@@ -264,13 +263,12 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) | |||
264 | 263 | ||
265 | ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, | 264 | ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, |
266 | GFP_KERNEL, &bio, 0); | 265 | GFP_KERNEL, &bio, 0); |
267 | status = blk_to_nvme_status(req, errno_to_blk_status(ret)); | ||
268 | if (bio) { | 266 | if (bio) { |
269 | bio->bi_private = req; | 267 | bio->bi_private = req; |
270 | bio->bi_end_io = nvmet_bio_done; | 268 | bio->bi_end_io = nvmet_bio_done; |
271 | submit_bio(bio); | 269 | submit_bio(bio); |
272 | } else { | 270 | } else { |
273 | nvmet_req_complete(req, status); | 271 | nvmet_req_complete(req, errno_to_nvme_status(req, ret)); |
274 | } | 272 | } |
275 | } | 273 | } |
276 | 274 | ||
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index bc6ebb51b0bf..05453f5d1448 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c | |||
@@ -49,7 +49,12 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) | |||
49 | goto err; | 49 | goto err; |
50 | 50 | ||
51 | ns->size = stat.size; | 51 | ns->size = stat.size; |
52 | ns->blksize_shift = file_inode(ns->file)->i_blkbits; | 52 | /* |
53 | * i_blkbits can be greater than the universally accepted upper bound, | ||
54 | * so make sure we export a sane namespace lba_shift. | ||
55 | */ | ||
56 | ns->blksize_shift = min_t(u8, | ||
57 | file_inode(ns->file)->i_blkbits, 12); | ||
53 | 58 | ||
54 | ns->bvec_cache = kmem_cache_create("nvmet-bvec", | 59 | ns->bvec_cache = kmem_cache_create("nvmet-bvec", |
55 | NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), | 60 | NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), |
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index b9f623ab01f3..9e211ad6bdd3 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c | |||
@@ -18,7 +18,7 @@ | |||
18 | struct nvme_loop_iod { | 18 | struct nvme_loop_iod { |
19 | struct nvme_request nvme_req; | 19 | struct nvme_request nvme_req; |
20 | struct nvme_command cmd; | 20 | struct nvme_command cmd; |
21 | struct nvme_completion rsp; | 21 | struct nvme_completion cqe; |
22 | struct nvmet_req req; | 22 | struct nvmet_req req; |
23 | struct nvme_loop_queue *queue; | 23 | struct nvme_loop_queue *queue; |
24 | struct work_struct work; | 24 | struct work_struct work; |
@@ -94,7 +94,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req) | |||
94 | { | 94 | { |
95 | struct nvme_loop_queue *queue = | 95 | struct nvme_loop_queue *queue = |
96 | container_of(req->sq, struct nvme_loop_queue, nvme_sq); | 96 | container_of(req->sq, struct nvme_loop_queue, nvme_sq); |
97 | struct nvme_completion *cqe = req->rsp; | 97 | struct nvme_completion *cqe = req->cqe; |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * AEN requests are special as they don't time out and can | 100 | * AEN requests are special as they don't time out and can |
@@ -129,20 +129,6 @@ static void nvme_loop_execute_work(struct work_struct *work) | |||
129 | nvmet_req_execute(&iod->req); | 129 | nvmet_req_execute(&iod->req); |
130 | } | 130 | } |
131 | 131 | ||
132 | static enum blk_eh_timer_return | ||
133 | nvme_loop_timeout(struct request *rq, bool reserved) | ||
134 | { | ||
135 | struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq); | ||
136 | |||
137 | /* queue error recovery */ | ||
138 | nvme_reset_ctrl(&iod->queue->ctrl->ctrl); | ||
139 | |||
140 | /* fail with DNR on admin cmd timeout */ | ||
141 | nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; | ||
142 | |||
143 | return BLK_EH_DONE; | ||
144 | } | ||
145 | |||
146 | static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, | 132 | static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, |
147 | const struct blk_mq_queue_data *bd) | 133 | const struct blk_mq_queue_data *bd) |
148 | { | 134 | { |
@@ -207,7 +193,7 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, | |||
207 | struct nvme_loop_iod *iod, unsigned int queue_idx) | 193 | struct nvme_loop_iod *iod, unsigned int queue_idx) |
208 | { | 194 | { |
209 | iod->req.cmd = &iod->cmd; | 195 | iod->req.cmd = &iod->cmd; |
210 | iod->req.rsp = &iod->rsp; | 196 | iod->req.cqe = &iod->cqe; |
211 | iod->queue = &ctrl->queues[queue_idx]; | 197 | iod->queue = &ctrl->queues[queue_idx]; |
212 | INIT_WORK(&iod->work, nvme_loop_execute_work); | 198 | INIT_WORK(&iod->work, nvme_loop_execute_work); |
213 | return 0; | 199 | return 0; |
@@ -253,7 +239,6 @@ static const struct blk_mq_ops nvme_loop_mq_ops = { | |||
253 | .complete = nvme_loop_complete_rq, | 239 | .complete = nvme_loop_complete_rq, |
254 | .init_request = nvme_loop_init_request, | 240 | .init_request = nvme_loop_init_request, |
255 | .init_hctx = nvme_loop_init_hctx, | 241 | .init_hctx = nvme_loop_init_hctx, |
256 | .timeout = nvme_loop_timeout, | ||
257 | }; | 242 | }; |
258 | 243 | ||
259 | static const struct blk_mq_ops nvme_loop_admin_mq_ops = { | 244 | static const struct blk_mq_ops nvme_loop_admin_mq_ops = { |
@@ -261,7 +246,6 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { | |||
261 | .complete = nvme_loop_complete_rq, | 246 | .complete = nvme_loop_complete_rq, |
262 | .init_request = nvme_loop_init_request, | 247 | .init_request = nvme_loop_init_request, |
263 | .init_hctx = nvme_loop_init_admin_hctx, | 248 | .init_hctx = nvme_loop_init_admin_hctx, |
264 | .timeout = nvme_loop_timeout, | ||
265 | }; | 249 | }; |
266 | 250 | ||
267 | static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) | 251 | static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) |
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 1653d19b187f..c25d88fc9dec 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h | |||
@@ -284,7 +284,7 @@ struct nvmet_fabrics_ops { | |||
284 | 284 | ||
285 | struct nvmet_req { | 285 | struct nvmet_req { |
286 | struct nvme_command *cmd; | 286 | struct nvme_command *cmd; |
287 | struct nvme_completion *rsp; | 287 | struct nvme_completion *cqe; |
288 | struct nvmet_sq *sq; | 288 | struct nvmet_sq *sq; |
289 | struct nvmet_cq *cq; | 289 | struct nvmet_cq *cq; |
290 | struct nvmet_ns *ns; | 290 | struct nvmet_ns *ns; |
@@ -322,7 +322,7 @@ extern struct workqueue_struct *buffered_io_wq; | |||
322 | 322 | ||
323 | static inline void nvmet_set_result(struct nvmet_req *req, u32 result) | 323 | static inline void nvmet_set_result(struct nvmet_req *req, u32 result) |
324 | { | 324 | { |
325 | req->rsp->result.u32 = cpu_to_le32(result); | 325 | req->cqe->result.u32 = cpu_to_le32(result); |
326 | } | 326 | } |
327 | 327 | ||
328 | /* | 328 | /* |
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ef893addf341..36d906a7f70d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c | |||
@@ -160,7 +160,7 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp) | |||
160 | { | 160 | { |
161 | return !nvme_is_write(rsp->req.cmd) && | 161 | return !nvme_is_write(rsp->req.cmd) && |
162 | rsp->req.transfer_len && | 162 | rsp->req.transfer_len && |
163 | !rsp->req.rsp->status && | 163 | !rsp->req.cqe->status && |
164 | !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); | 164 | !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); |
165 | } | 165 | } |
166 | 166 | ||
@@ -364,16 +364,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, | |||
364 | struct nvmet_rdma_rsp *r) | 364 | struct nvmet_rdma_rsp *r) |
365 | { | 365 | { |
366 | /* NVMe CQE / RDMA SEND */ | 366 | /* NVMe CQE / RDMA SEND */ |
367 | r->req.rsp = kmalloc(sizeof(*r->req.rsp), GFP_KERNEL); | 367 | r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL); |
368 | if (!r->req.rsp) | 368 | if (!r->req.cqe) |
369 | goto out; | 369 | goto out; |
370 | 370 | ||
371 | r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.rsp, | 371 | r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe, |
372 | sizeof(*r->req.rsp), DMA_TO_DEVICE); | 372 | sizeof(*r->req.cqe), DMA_TO_DEVICE); |
373 | if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) | 373 | if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) |
374 | goto out_free_rsp; | 374 | goto out_free_rsp; |
375 | 375 | ||
376 | r->send_sge.length = sizeof(*r->req.rsp); | 376 | r->req.p2p_client = &ndev->device->dev; |
377 | r->send_sge.length = sizeof(*r->req.cqe); | ||
377 | r->send_sge.lkey = ndev->pd->local_dma_lkey; | 378 | r->send_sge.lkey = ndev->pd->local_dma_lkey; |
378 | 379 | ||
379 | r->send_cqe.done = nvmet_rdma_send_done; | 380 | r->send_cqe.done = nvmet_rdma_send_done; |
@@ -388,7 +389,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, | |||
388 | return 0; | 389 | return 0; |
389 | 390 | ||
390 | out_free_rsp: | 391 | out_free_rsp: |
391 | kfree(r->req.rsp); | 392 | kfree(r->req.cqe); |
392 | out: | 393 | out: |
393 | return -ENOMEM; | 394 | return -ENOMEM; |
394 | } | 395 | } |
@@ -397,8 +398,8 @@ static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, | |||
397 | struct nvmet_rdma_rsp *r) | 398 | struct nvmet_rdma_rsp *r) |
398 | { | 399 | { |
399 | ib_dma_unmap_single(ndev->device, r->send_sge.addr, | 400 | ib_dma_unmap_single(ndev->device, r->send_sge.addr, |
400 | sizeof(*r->req.rsp), DMA_TO_DEVICE); | 401 | sizeof(*r->req.cqe), DMA_TO_DEVICE); |
401 | kfree(r->req.rsp); | 402 | kfree(r->req.cqe); |
402 | } | 403 | } |
403 | 404 | ||
404 | static int | 405 | static int |
@@ -763,8 +764,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, | |||
763 | cmd->send_sge.addr, cmd->send_sge.length, | 764 | cmd->send_sge.addr, cmd->send_sge.length, |
764 | DMA_TO_DEVICE); | 765 | DMA_TO_DEVICE); |
765 | 766 | ||
766 | cmd->req.p2p_client = &queue->dev->device->dev; | ||
767 | |||
768 | if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, | 767 | if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, |
769 | &queue->nvme_sq, &nvmet_rdma_ops)) | 768 | &queue->nvme_sq, &nvmet_rdma_ops)) |
770 | return; | 769 | return; |
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ad0df786fe93..69b83fa0c76c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c | |||
@@ -161,14 +161,14 @@ static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd) | |||
161 | 161 | ||
162 | static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) | 162 | static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) |
163 | { | 163 | { |
164 | return nvmet_tcp_has_data_in(cmd) && !cmd->req.rsp->status; | 164 | return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status; |
165 | } | 165 | } |
166 | 166 | ||
167 | static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) | 167 | static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) |
168 | { | 168 | { |
169 | return !nvme_is_write(cmd->req.cmd) && | 169 | return !nvme_is_write(cmd->req.cmd) && |
170 | cmd->req.transfer_len > 0 && | 170 | cmd->req.transfer_len > 0 && |
171 | !cmd->req.rsp->status; | 171 | !cmd->req.cqe->status; |
172 | } | 172 | } |
173 | 173 | ||
174 | static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) | 174 | static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) |
@@ -371,13 +371,14 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) | |||
371 | cmd->state = NVMET_TCP_SEND_DATA_PDU; | 371 | cmd->state = NVMET_TCP_SEND_DATA_PDU; |
372 | 372 | ||
373 | pdu->hdr.type = nvme_tcp_c2h_data; | 373 | pdu->hdr.type = nvme_tcp_c2h_data; |
374 | pdu->hdr.flags = NVME_TCP_F_DATA_LAST; | 374 | pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ? |
375 | NVME_TCP_F_DATA_SUCCESS : 0); | ||
375 | pdu->hdr.hlen = sizeof(*pdu); | 376 | pdu->hdr.hlen = sizeof(*pdu); |
376 | pdu->hdr.pdo = pdu->hdr.hlen + hdgst; | 377 | pdu->hdr.pdo = pdu->hdr.hlen + hdgst; |
377 | pdu->hdr.plen = | 378 | pdu->hdr.plen = |
378 | cpu_to_le32(pdu->hdr.hlen + hdgst + | 379 | cpu_to_le32(pdu->hdr.hlen + hdgst + |
379 | cmd->req.transfer_len + ddgst); | 380 | cmd->req.transfer_len + ddgst); |
380 | pdu->command_id = cmd->req.rsp->command_id; | 381 | pdu->command_id = cmd->req.cqe->command_id; |
381 | pdu->data_length = cpu_to_le32(cmd->req.transfer_len); | 382 | pdu->data_length = cpu_to_le32(cmd->req.transfer_len); |
382 | pdu->data_offset = cpu_to_le32(cmd->wbytes_done); | 383 | pdu->data_offset = cpu_to_le32(cmd->wbytes_done); |
383 | 384 | ||
@@ -542,8 +543,19 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) | |||
542 | cmd->state = NVMET_TCP_SEND_DDGST; | 543 | cmd->state = NVMET_TCP_SEND_DDGST; |
543 | cmd->offset = 0; | 544 | cmd->offset = 0; |
544 | } else { | 545 | } else { |
545 | nvmet_setup_response_pdu(cmd); | 546 | if (queue->nvme_sq.sqhd_disabled) { |
547 | cmd->queue->snd_cmd = NULL; | ||
548 | nvmet_tcp_put_cmd(cmd); | ||
549 | } else { | ||
550 | nvmet_setup_response_pdu(cmd); | ||
551 | } | ||
552 | } | ||
553 | |||
554 | if (queue->nvme_sq.sqhd_disabled) { | ||
555 | kfree(cmd->iov); | ||
556 | sgl_free(cmd->req.sg); | ||
546 | } | 557 | } |
558 | |||
547 | return 1; | 559 | return 1; |
548 | 560 | ||
549 | } | 561 | } |
@@ -619,7 +631,13 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) | |||
619 | return ret; | 631 | return ret; |
620 | 632 | ||
621 | cmd->offset += ret; | 633 | cmd->offset += ret; |
622 | nvmet_setup_response_pdu(cmd); | 634 | |
635 | if (queue->nvme_sq.sqhd_disabled) { | ||
636 | cmd->queue->snd_cmd = NULL; | ||
637 | nvmet_tcp_put_cmd(cmd); | ||
638 | } else { | ||
639 | nvmet_setup_response_pdu(cmd); | ||
640 | } | ||
623 | return 1; | 641 | return 1; |
624 | } | 642 | } |
625 | 643 | ||
@@ -756,12 +774,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) | |||
756 | return -EPROTO; | 774 | return -EPROTO; |
757 | } | 775 | } |
758 | 776 | ||
759 | if (icreq->maxr2t != 0) { | ||
760 | pr_err("queue %d: unsupported maxr2t %d\n", queue->idx, | ||
761 | le32_to_cpu(icreq->maxr2t) + 1); | ||
762 | return -EPROTO; | ||
763 | } | ||
764 | |||
765 | queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); | 777 | queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); |
766 | queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); | 778 | queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); |
767 | if (queue->hdr_digest || queue->data_digest) { | 779 | if (queue->hdr_digest || queue->data_digest) { |
@@ -1206,7 +1218,7 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, | |||
1206 | sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | 1218 | sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); |
1207 | if (!c->rsp_pdu) | 1219 | if (!c->rsp_pdu) |
1208 | goto out_free_cmd; | 1220 | goto out_free_cmd; |
1209 | c->req.rsp = &c->rsp_pdu->cqe; | 1221 | c->req.cqe = &c->rsp_pdu->cqe; |
1210 | 1222 | ||
1211 | c->data_pdu = page_frag_alloc(&queue->pf_cache, | 1223 | c->data_pdu = page_frag_alloc(&queue->pf_cache, |
1212 | sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | 1224 | sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); |
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 2b2bc4b49d78..ebc80354714c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
@@ -2256,22 +2256,6 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, | |||
2256 | 2256 | ||
2257 | #define READ_CAPACITY_RETRIES_ON_RESET 10 | 2257 | #define READ_CAPACITY_RETRIES_ON_RESET 10 |
2258 | 2258 | ||
2259 | /* | ||
2260 | * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set | ||
2261 | * and the reported logical block size is bigger than 512 bytes. Note | ||
2262 | * that last_sector is a u64 and therefore logical_to_sectors() is not | ||
2263 | * applicable. | ||
2264 | */ | ||
2265 | static bool sd_addressable_capacity(u64 lba, unsigned int sector_size) | ||
2266 | { | ||
2267 | u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9); | ||
2268 | |||
2269 | if (sizeof(sector_t) == 4 && last_sector > U32_MAX) | ||
2270 | return false; | ||
2271 | |||
2272 | return true; | ||
2273 | } | ||
2274 | |||
2275 | static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, | 2259 | static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, |
2276 | unsigned char *buffer) | 2260 | unsigned char *buffer) |
2277 | { | 2261 | { |
@@ -2337,14 +2321,6 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, | |||
2337 | return -ENODEV; | 2321 | return -ENODEV; |
2338 | } | 2322 | } |
2339 | 2323 | ||
2340 | if (!sd_addressable_capacity(lba, sector_size)) { | ||
2341 | sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " | ||
2342 | "kernel compiled with support for large block " | ||
2343 | "devices.\n"); | ||
2344 | sdkp->capacity = 0; | ||
2345 | return -EOVERFLOW; | ||
2346 | } | ||
2347 | |||
2348 | /* Logical blocks per physical block exponent */ | 2324 | /* Logical blocks per physical block exponent */ |
2349 | sdkp->physical_block_size = (1 << (buffer[13] & 0xf)) * sector_size; | 2325 | sdkp->physical_block_size = (1 << (buffer[13] & 0xf)) * sector_size; |
2350 | 2326 | ||
@@ -2426,14 +2402,6 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, | |||
2426 | return sector_size; | 2402 | return sector_size; |
2427 | } | 2403 | } |
2428 | 2404 | ||
2429 | if (!sd_addressable_capacity(lba, sector_size)) { | ||
2430 | sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " | ||
2431 | "kernel compiled with support for large block " | ||
2432 | "devices.\n"); | ||
2433 | sdkp->capacity = 0; | ||
2434 | return -EOVERFLOW; | ||
2435 | } | ||
2436 | |||
2437 | sdkp->capacity = lba + 1; | 2405 | sdkp->capacity = lba + 1; |
2438 | sdkp->physical_block_size = sector_size; | 2406 | sdkp->physical_block_size = sector_size; |
2439 | return sector_size; | 2407 | return sector_size; |
@@ -3325,6 +3293,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) | |||
3325 | if (sdp->removable) { | 3293 | if (sdp->removable) { |
3326 | gd->flags |= GENHD_FL_REMOVABLE; | 3294 | gd->flags |= GENHD_FL_REMOVABLE; |
3327 | gd->events |= DISK_EVENT_MEDIA_CHANGE; | 3295 | gd->events |= DISK_EVENT_MEDIA_CHANGE; |
3296 | gd->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT; | ||
3328 | } | 3297 | } |
3329 | 3298 | ||
3330 | blk_pm_runtime_init(sdp->request_queue, dev); | 3299 | blk_pm_runtime_init(sdp->request_queue, dev); |
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 039c27c2d7b3..c3f443d5aea8 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c | |||
@@ -716,6 +716,7 @@ static int sr_probe(struct device *dev) | |||
716 | disk->fops = &sr_bdops; | 716 | disk->fops = &sr_bdops; |
717 | disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; | 717 | disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; |
718 | disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST; | 718 | disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST; |
719 | disk->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT; | ||
719 | 720 | ||
720 | blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); | 721 | blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); |
721 | 722 | ||
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c index c64ec76643d4..746685f90564 100644 --- a/drivers/staging/erofs/data.c +++ b/drivers/staging/erofs/data.c | |||
@@ -18,7 +18,6 @@ | |||
18 | static inline void read_endio(struct bio *bio) | 18 | static inline void read_endio(struct bio *bio) |
19 | { | 19 | { |
20 | struct super_block *const sb = bio->bi_private; | 20 | struct super_block *const sb = bio->bi_private; |
21 | int i; | ||
22 | struct bio_vec *bvec; | 21 | struct bio_vec *bvec; |
23 | blk_status_t err = bio->bi_status; | 22 | blk_status_t err = bio->bi_status; |
24 | struct bvec_iter_all iter_all; | 23 | struct bvec_iter_all iter_all; |
@@ -28,7 +27,7 @@ static inline void read_endio(struct bio *bio) | |||
28 | err = BLK_STS_IOERR; | 27 | err = BLK_STS_IOERR; |
29 | } | 28 | } |
30 | 29 | ||
31 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 30 | bio_for_each_segment_all(bvec, bio, iter_all) { |
32 | struct page *page = bvec->bv_page; | 31 | struct page *page = bvec->bv_page; |
33 | 32 | ||
34 | /* page is already locked */ | 33 | /* page is already locked */ |
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index a2e03c932102..9ecaa872bae8 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c | |||
@@ -846,11 +846,10 @@ static inline void z_erofs_vle_read_endio(struct bio *bio) | |||
846 | { | 846 | { |
847 | struct erofs_sb_info *sbi = NULL; | 847 | struct erofs_sb_info *sbi = NULL; |
848 | blk_status_t err = bio->bi_status; | 848 | blk_status_t err = bio->bi_status; |
849 | unsigned int i; | ||
850 | struct bio_vec *bvec; | 849 | struct bio_vec *bvec; |
851 | struct bvec_iter_all iter_all; | 850 | struct bvec_iter_all iter_all; |
852 | 851 | ||
853 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 852 | bio_for_each_segment_all(bvec, bio, iter_all) { |
854 | struct page *page = bvec->bv_page; | 853 | struct page *page = bvec->bv_page; |
855 | bool cachemngd = false; | 854 | bool cachemngd = false; |
856 | 855 | ||
diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index f3fbb700f569..05a286d24f14 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c | |||
@@ -4,12 +4,13 @@ | |||
4 | #include <xen/xen.h> | 4 | #include <xen/xen.h> |
5 | #include <xen/page.h> | 5 | #include <xen/page.h> |
6 | 6 | ||
7 | /* check if @page can be merged with 'vec1' */ | ||
7 | bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, | 8 | bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, |
8 | const struct bio_vec *vec2) | 9 | const struct page *page) |
9 | { | 10 | { |
10 | #if XEN_PAGE_SIZE == PAGE_SIZE | 11 | #if XEN_PAGE_SIZE == PAGE_SIZE |
11 | unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); | 12 | unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); |
12 | unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page)); | 13 | unsigned long bfn2 = pfn_to_bfn(page_to_pfn(page)); |
13 | 14 | ||
14 | return bfn1 + PFN_DOWN(vec1->bv_offset + vec1->bv_len) == bfn2; | 15 | return bfn1 + PFN_DOWN(vec1->bv_offset + vec1->bv_len) == bfn2; |
15 | #else | 16 | #else |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 9ee3117ee0bf..500aaa3e5990 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -210,7 +210,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, | |||
210 | struct bio bio; | 210 | struct bio bio; |
211 | ssize_t ret; | 211 | ssize_t ret; |
212 | blk_qc_t qc; | 212 | blk_qc_t qc; |
213 | int i; | ||
214 | struct bvec_iter_all iter_all; | 213 | struct bvec_iter_all iter_all; |
215 | 214 | ||
216 | if ((pos | iov_iter_alignment(iter)) & | 215 | if ((pos | iov_iter_alignment(iter)) & |
@@ -261,7 +260,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, | |||
261 | } | 260 | } |
262 | __set_current_state(TASK_RUNNING); | 261 | __set_current_state(TASK_RUNNING); |
263 | 262 | ||
264 | bio_for_each_segment_all(bvec, &bio, i, iter_all) { | 263 | bio_for_each_segment_all(bvec, &bio, iter_all) { |
265 | if (should_dirty && !PageCompound(bvec->bv_page)) | 264 | if (should_dirty && !PageCompound(bvec->bv_page)) |
266 | set_page_dirty_lock(bvec->bv_page); | 265 | set_page_dirty_lock(bvec->bv_page); |
267 | if (!bio_flagged(&bio, BIO_NO_PAGE_REF)) | 266 | if (!bio_flagged(&bio, BIO_NO_PAGE_REF)) |
@@ -340,9 +339,8 @@ static void blkdev_bio_end_io(struct bio *bio) | |||
340 | if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { | 339 | if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { |
341 | struct bvec_iter_all iter_all; | 340 | struct bvec_iter_all iter_all; |
342 | struct bio_vec *bvec; | 341 | struct bio_vec *bvec; |
343 | int i; | ||
344 | 342 | ||
345 | bio_for_each_segment_all(bvec, bio, i, iter_all) | 343 | bio_for_each_segment_all(bvec, bio, iter_all) |
346 | put_page(bvec->bv_page); | 344 | put_page(bvec->bv_page); |
347 | } | 345 | } |
348 | bio_put(bio); | 346 | bio_put(bio); |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 1463e14af2fb..daf7908d1e35 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -160,7 +160,6 @@ csum_failed: | |||
160 | if (cb->errors) { | 160 | if (cb->errors) { |
161 | bio_io_error(cb->orig_bio); | 161 | bio_io_error(cb->orig_bio); |
162 | } else { | 162 | } else { |
163 | int i; | ||
164 | struct bio_vec *bvec; | 163 | struct bio_vec *bvec; |
165 | struct bvec_iter_all iter_all; | 164 | struct bvec_iter_all iter_all; |
166 | 165 | ||
@@ -169,7 +168,7 @@ csum_failed: | |||
169 | * checked so the end_io handlers know about it | 168 | * checked so the end_io handlers know about it |
170 | */ | 169 | */ |
171 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 170 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
172 | bio_for_each_segment_all(bvec, cb->orig_bio, i, iter_all) | 171 | bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) |
173 | SetPageChecked(bvec->bv_page); | 172 | SetPageChecked(bvec->bv_page); |
174 | 173 | ||
175 | bio_endio(cb->orig_bio); | 174 | bio_endio(cb->orig_bio); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 663efce22d98..deb74a8c191a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -849,11 +849,11 @@ static blk_status_t btree_csum_one_bio(struct bio *bio) | |||
849 | { | 849 | { |
850 | struct bio_vec *bvec; | 850 | struct bio_vec *bvec; |
851 | struct btrfs_root *root; | 851 | struct btrfs_root *root; |
852 | int i, ret = 0; | 852 | int ret = 0; |
853 | struct bvec_iter_all iter_all; | 853 | struct bvec_iter_all iter_all; |
854 | 854 | ||
855 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 855 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
856 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 856 | bio_for_each_segment_all(bvec, bio, iter_all) { |
857 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; | 857 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; |
858 | ret = csum_dirty_buffer(root->fs_info, bvec->bv_page); | 858 | ret = csum_dirty_buffer(root->fs_info, bvec->bv_page); |
859 | if (ret) | 859 | if (ret) |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 13fca7bfc1f2..db337e53aab3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2582,11 +2582,10 @@ static void end_bio_extent_writepage(struct bio *bio) | |||
2582 | struct bio_vec *bvec; | 2582 | struct bio_vec *bvec; |
2583 | u64 start; | 2583 | u64 start; |
2584 | u64 end; | 2584 | u64 end; |
2585 | int i; | ||
2586 | struct bvec_iter_all iter_all; | 2585 | struct bvec_iter_all iter_all; |
2587 | 2586 | ||
2588 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 2587 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
2589 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 2588 | bio_for_each_segment_all(bvec, bio, iter_all) { |
2590 | struct page *page = bvec->bv_page; | 2589 | struct page *page = bvec->bv_page; |
2591 | struct inode *inode = page->mapping->host; | 2590 | struct inode *inode = page->mapping->host; |
2592 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 2591 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
@@ -2654,11 +2653,10 @@ static void end_bio_extent_readpage(struct bio *bio) | |||
2654 | u64 extent_len = 0; | 2653 | u64 extent_len = 0; |
2655 | int mirror; | 2654 | int mirror; |
2656 | int ret; | 2655 | int ret; |
2657 | int i; | ||
2658 | struct bvec_iter_all iter_all; | 2656 | struct bvec_iter_all iter_all; |
2659 | 2657 | ||
2660 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 2658 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
2661 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 2659 | bio_for_each_segment_all(bvec, bio, iter_all) { |
2662 | struct page *page = bvec->bv_page; | 2660 | struct page *page = bvec->bv_page; |
2663 | struct inode *inode = page->mapping->host; | 2661 | struct inode *inode = page->mapping->host; |
2664 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 2662 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
@@ -3755,11 +3753,11 @@ static void end_bio_extent_buffer_writepage(struct bio *bio) | |||
3755 | { | 3753 | { |
3756 | struct bio_vec *bvec; | 3754 | struct bio_vec *bvec; |
3757 | struct extent_buffer *eb; | 3755 | struct extent_buffer *eb; |
3758 | int i, done; | 3756 | int done; |
3759 | struct bvec_iter_all iter_all; | 3757 | struct bvec_iter_all iter_all; |
3760 | 3758 | ||
3761 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 3759 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
3762 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 3760 | bio_for_each_segment_all(bvec, bio, iter_all) { |
3763 | struct page *page = bvec->bv_page; | 3761 | struct page *page = bvec->bv_page; |
3764 | 3762 | ||
3765 | eb = (struct extent_buffer *)page->private; | 3763 | eb = (struct extent_buffer *)page->private; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 56929daea0f7..9aba9660efe5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7872,7 +7872,6 @@ static void btrfs_retry_endio_nocsum(struct bio *bio) | |||
7872 | struct inode *inode = done->inode; | 7872 | struct inode *inode = done->inode; |
7873 | struct bio_vec *bvec; | 7873 | struct bio_vec *bvec; |
7874 | struct extent_io_tree *io_tree, *failure_tree; | 7874 | struct extent_io_tree *io_tree, *failure_tree; |
7875 | int i; | ||
7876 | struct bvec_iter_all iter_all; | 7875 | struct bvec_iter_all iter_all; |
7877 | 7876 | ||
7878 | if (bio->bi_status) | 7877 | if (bio->bi_status) |
@@ -7885,7 +7884,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio) | |||
7885 | 7884 | ||
7886 | done->uptodate = 1; | 7885 | done->uptodate = 1; |
7887 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 7886 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
7888 | bio_for_each_segment_all(bvec, bio, i, iter_all) | 7887 | bio_for_each_segment_all(bvec, bio, iter_all) |
7889 | clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, | 7888 | clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, |
7890 | io_tree, done->start, bvec->bv_page, | 7889 | io_tree, done->start, bvec->bv_page, |
7891 | btrfs_ino(BTRFS_I(inode)), 0); | 7890 | btrfs_ino(BTRFS_I(inode)), 0); |
@@ -7963,7 +7962,7 @@ static void btrfs_retry_endio(struct bio *bio) | |||
7963 | struct bio_vec *bvec; | 7962 | struct bio_vec *bvec; |
7964 | int uptodate; | 7963 | int uptodate; |
7965 | int ret; | 7964 | int ret; |
7966 | int i; | 7965 | int i = 0; |
7967 | struct bvec_iter_all iter_all; | 7966 | struct bvec_iter_all iter_all; |
7968 | 7967 | ||
7969 | if (bio->bi_status) | 7968 | if (bio->bi_status) |
@@ -7978,7 +7977,7 @@ static void btrfs_retry_endio(struct bio *bio) | |||
7978 | failure_tree = &BTRFS_I(inode)->io_failure_tree; | 7977 | failure_tree = &BTRFS_I(inode)->io_failure_tree; |
7979 | 7978 | ||
7980 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 7979 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
7981 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 7980 | bio_for_each_segment_all(bvec, bio, iter_all) { |
7982 | ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, | 7981 | ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, |
7983 | bvec->bv_offset, done->start, | 7982 | bvec->bv_offset, done->start, |
7984 | bvec->bv_len); | 7983 | bvec->bv_len); |
@@ -7990,6 +7989,7 @@ static void btrfs_retry_endio(struct bio *bio) | |||
7990 | bvec->bv_offset); | 7989 | bvec->bv_offset); |
7991 | else | 7990 | else |
7992 | uptodate = 0; | 7991 | uptodate = 0; |
7992 | i++; | ||
7993 | } | 7993 | } |
7994 | 7994 | ||
7995 | done->uptodate = uptodate; | 7995 | done->uptodate = uptodate; |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 67a6f7d47402..f3d0576dd327 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -1442,12 +1442,11 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio, | |||
1442 | static void set_bio_pages_uptodate(struct bio *bio) | 1442 | static void set_bio_pages_uptodate(struct bio *bio) |
1443 | { | 1443 | { |
1444 | struct bio_vec *bvec; | 1444 | struct bio_vec *bvec; |
1445 | int i; | ||
1446 | struct bvec_iter_all iter_all; | 1445 | struct bvec_iter_all iter_all; |
1447 | 1446 | ||
1448 | ASSERT(!bio_flagged(bio, BIO_CLONED)); | 1447 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
1449 | 1448 | ||
1450 | bio_for_each_segment_all(bvec, bio, i, iter_all) | 1449 | bio_for_each_segment_all(bvec, bio, iter_all) |
1451 | SetPageUptodate(bvec->bv_page); | 1450 | SetPageUptodate(bvec->bv_page); |
1452 | } | 1451 | } |
1453 | 1452 | ||
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 5759bcd018cd..8f3a8bc15d98 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c | |||
@@ -29,10 +29,9 @@ | |||
29 | static void __fscrypt_decrypt_bio(struct bio *bio, bool done) | 29 | static void __fscrypt_decrypt_bio(struct bio *bio, bool done) |
30 | { | 30 | { |
31 | struct bio_vec *bv; | 31 | struct bio_vec *bv; |
32 | int i; | ||
33 | struct bvec_iter_all iter_all; | 32 | struct bvec_iter_all iter_all; |
34 | 33 | ||
35 | bio_for_each_segment_all(bv, bio, i, iter_all) { | 34 | bio_for_each_segment_all(bv, bio, iter_all) { |
36 | struct page *page = bv->bv_page; | 35 | struct page *page = bv->bv_page; |
37 | int ret = fscrypt_decrypt_page(page->mapping->host, page, | 36 | int ret = fscrypt_decrypt_page(page->mapping->host, page, |
38 | PAGE_SIZE, 0, page->index); | 37 | PAGE_SIZE, 0, page->index); |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 9bb015bc4a83..fbe885d68035 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -538,7 +538,6 @@ static struct bio *dio_await_one(struct dio *dio) | |||
538 | static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) | 538 | static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) |
539 | { | 539 | { |
540 | struct bio_vec *bvec; | 540 | struct bio_vec *bvec; |
541 | unsigned i; | ||
542 | blk_status_t err = bio->bi_status; | 541 | blk_status_t err = bio->bi_status; |
543 | 542 | ||
544 | if (err) { | 543 | if (err) { |
@@ -553,7 +552,7 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) | |||
553 | } else { | 552 | } else { |
554 | struct bvec_iter_all iter_all; | 553 | struct bvec_iter_all iter_all; |
555 | 554 | ||
556 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 555 | bio_for_each_segment_all(bvec, bio, iter_all) { |
557 | struct page *page = bvec->bv_page; | 556 | struct page *page = bvec->bv_page; |
558 | 557 | ||
559 | if (dio->op == REQ_OP_READ && !PageCompound(page) && | 558 | if (dio->op == REQ_OP_READ && !PageCompound(page) && |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 3e9298e6a705..4690618a92e9 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -61,11 +61,10 @@ static void buffer_io_error(struct buffer_head *bh) | |||
61 | 61 | ||
62 | static void ext4_finish_bio(struct bio *bio) | 62 | static void ext4_finish_bio(struct bio *bio) |
63 | { | 63 | { |
64 | int i; | ||
65 | struct bio_vec *bvec; | 64 | struct bio_vec *bvec; |
66 | struct bvec_iter_all iter_all; | 65 | struct bvec_iter_all iter_all; |
67 | 66 | ||
68 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 67 | bio_for_each_segment_all(bvec, bio, iter_all) { |
69 | struct page *page = bvec->bv_page; | 68 | struct page *page = bvec->bv_page; |
70 | #ifdef CONFIG_FS_ENCRYPTION | 69 | #ifdef CONFIG_FS_ENCRYPTION |
71 | struct page *data_page = NULL; | 70 | struct page *data_page = NULL; |
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 3adadf461825..3629a74b7f94 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c | |||
@@ -71,7 +71,6 @@ static inline bool ext4_bio_encrypted(struct bio *bio) | |||
71 | static void mpage_end_io(struct bio *bio) | 71 | static void mpage_end_io(struct bio *bio) |
72 | { | 72 | { |
73 | struct bio_vec *bv; | 73 | struct bio_vec *bv; |
74 | int i; | ||
75 | struct bvec_iter_all iter_all; | 74 | struct bvec_iter_all iter_all; |
76 | 75 | ||
77 | if (ext4_bio_encrypted(bio)) { | 76 | if (ext4_bio_encrypted(bio)) { |
@@ -82,7 +81,7 @@ static void mpage_end_io(struct bio *bio) | |||
82 | return; | 81 | return; |
83 | } | 82 | } |
84 | } | 83 | } |
85 | bio_for_each_segment_all(bv, bio, i, iter_all) { | 84 | bio_for_each_segment_all(bv, bio, iter_all) { |
86 | struct page *page = bv->bv_page; | 85 | struct page *page = bv->bv_page; |
87 | 86 | ||
88 | if (!bio->bi_status) { | 87 | if (!bio->bi_status) { |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e7ae26e36c9c..38faf661e237 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -1760,8 +1760,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1760 | ext4_msg(sb, KERN_ERR, | 1760 | ext4_msg(sb, KERN_ERR, |
1761 | "filesystem too large to resize to %llu blocks safely", | 1761 | "filesystem too large to resize to %llu blocks safely", |
1762 | n_blocks_count); | 1762 | n_blocks_count); |
1763 | if (sizeof(sector_t) < 8) | ||
1764 | ext4_warning(sb, "CONFIG_LBDAF not enabled"); | ||
1765 | return -EINVAL; | 1763 | return -EINVAL; |
1766 | } | 1764 | } |
1767 | 1765 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 981f702848e7..0e63069b9d5b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2705,13 +2705,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) | |||
2705 | loff_t res; | 2705 | loff_t res; |
2706 | loff_t upper_limit = MAX_LFS_FILESIZE; | 2706 | loff_t upper_limit = MAX_LFS_FILESIZE; |
2707 | 2707 | ||
2708 | /* small i_blocks in vfs inode? */ | 2708 | BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64)); |
2709 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { | 2709 | |
2710 | /* | 2710 | if (!has_huge_files) { |
2711 | * CONFIG_LBDAF is not enabled implies the inode | ||
2712 | * i_block represent total blocks in 512 bytes | ||
2713 | * 32 == size of vfs inode i_blocks * 8 | ||
2714 | */ | ||
2715 | upper_limit = (1LL << 32) - 1; | 2711 | upper_limit = (1LL << 32) - 1; |
2716 | 2712 | ||
2717 | /* total blocks in file system block size */ | 2713 | /* total blocks in file system block size */ |
@@ -2752,11 +2748,11 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) | |||
2752 | * number of 512-byte sectors of the file. | 2748 | * number of 512-byte sectors of the file. |
2753 | */ | 2749 | */ |
2754 | 2750 | ||
2755 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { | 2751 | if (!has_huge_files) { |
2756 | /* | 2752 | /* |
2757 | * !has_huge_files or CONFIG_LBDAF not enabled implies that | 2753 | * !has_huge_files or implies that the inode i_block field |
2758 | * the inode i_block field represents total file blocks in | 2754 | * represents total file blocks in 2^32 512-byte sectors == |
2759 | * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 | 2755 | * size of vfs inode i_blocks * 8 |
2760 | */ | 2756 | */ |
2761 | upper_limit = (1LL << 32) - 1; | 2757 | upper_limit = (1LL << 32) - 1; |
2762 | 2758 | ||
@@ -2896,18 +2892,6 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) | |||
2896 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | 2892 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); |
2897 | return 0; | 2893 | return 0; |
2898 | } | 2894 | } |
2899 | /* | ||
2900 | * Large file size enabled file system can only be mounted | ||
2901 | * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF | ||
2902 | */ | ||
2903 | if (ext4_has_feature_huge_file(sb)) { | ||
2904 | if (sizeof(blkcnt_t) < sizeof(u64)) { | ||
2905 | ext4_msg(sb, KERN_ERR, "Filesystem with huge files " | ||
2906 | "cannot be mounted RDWR without " | ||
2907 | "CONFIG_LBDAF"); | ||
2908 | return 0; | ||
2909 | } | ||
2910 | } | ||
2911 | if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) { | 2895 | if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) { |
2912 | ext4_msg(sb, KERN_ERR, | 2896 | ext4_msg(sb, KERN_ERR, |
2913 | "Can't support bigalloc feature without " | 2897 | "Can't support bigalloc feature without " |
@@ -4056,8 +4040,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
4056 | if (err) { | 4040 | if (err) { |
4057 | ext4_msg(sb, KERN_ERR, "filesystem" | 4041 | ext4_msg(sb, KERN_ERR, "filesystem" |
4058 | " too large to mount safely on this system"); | 4042 | " too large to mount safely on this system"); |
4059 | if (sizeof(sector_t) < 8) | ||
4060 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | ||
4061 | goto failed_mount; | 4043 | goto failed_mount; |
4062 | } | 4044 | } |
4063 | 4045 | ||
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9727944139f2..64040e998439 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -86,10 +86,9 @@ static void __read_end_io(struct bio *bio) | |||
86 | { | 86 | { |
87 | struct page *page; | 87 | struct page *page; |
88 | struct bio_vec *bv; | 88 | struct bio_vec *bv; |
89 | int i; | ||
90 | struct bvec_iter_all iter_all; | 89 | struct bvec_iter_all iter_all; |
91 | 90 | ||
92 | bio_for_each_segment_all(bv, bio, i, iter_all) { | 91 | bio_for_each_segment_all(bv, bio, iter_all) { |
93 | page = bv->bv_page; | 92 | page = bv->bv_page; |
94 | 93 | ||
95 | /* PG_error was set if any post_read step failed */ | 94 | /* PG_error was set if any post_read step failed */ |
@@ -164,7 +163,6 @@ static void f2fs_write_end_io(struct bio *bio) | |||
164 | { | 163 | { |
165 | struct f2fs_sb_info *sbi = bio->bi_private; | 164 | struct f2fs_sb_info *sbi = bio->bi_private; |
166 | struct bio_vec *bvec; | 165 | struct bio_vec *bvec; |
167 | int i; | ||
168 | struct bvec_iter_all iter_all; | 166 | struct bvec_iter_all iter_all; |
169 | 167 | ||
170 | if (time_to_inject(sbi, FAULT_WRITE_IO)) { | 168 | if (time_to_inject(sbi, FAULT_WRITE_IO)) { |
@@ -172,7 +170,7 @@ static void f2fs_write_end_io(struct bio *bio) | |||
172 | bio->bi_status = BLK_STS_IOERR; | 170 | bio->bi_status = BLK_STS_IOERR; |
173 | } | 171 | } |
174 | 172 | ||
175 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 173 | bio_for_each_segment_all(bvec, bio, iter_all) { |
176 | struct page *page = bvec->bv_page; | 174 | struct page *page = bvec->bv_page; |
177 | enum count_type type = WB_DATA_TYPE(page); | 175 | enum count_type type = WB_DATA_TYPE(page); |
178 | 176 | ||
@@ -349,7 +347,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, | |||
349 | { | 347 | { |
350 | struct bio_vec *bvec; | 348 | struct bio_vec *bvec; |
351 | struct page *target; | 349 | struct page *target; |
352 | int i; | ||
353 | struct bvec_iter_all iter_all; | 350 | struct bvec_iter_all iter_all; |
354 | 351 | ||
355 | if (!io->bio) | 352 | if (!io->bio) |
@@ -358,7 +355,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, | |||
358 | if (!inode && !page && !ino) | 355 | if (!inode && !page && !ino) |
359 | return true; | 356 | return true; |
360 | 357 | ||
361 | bio_for_each_segment_all(bvec, io->bio, i, iter_all) { | 358 | bio_for_each_segment_all(bvec, io->bio, iter_all) { |
362 | 359 | ||
363 | if (bvec->bv_page->mapping) | 360 | if (bvec->bv_page->mapping) |
364 | target = bvec->bv_page; | 361 | target = bvec->bv_page; |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 3ed2b088dcfd..6a1e499543f5 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -1,6 +1,5 @@ | |||
1 | config GFS2_FS | 1 | config GFS2_FS |
2 | tristate "GFS2 file system support" | 2 | tristate "GFS2 file system support" |
3 | depends on (64BIT || LBDAF) | ||
4 | select FS_POSIX_ACL | 3 | select FS_POSIX_ACL |
5 | select CRC32 | 4 | select CRC32 |
6 | select LIBCRC32C | 5 | select LIBCRC32C |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 8722c60b11fe..6f09b5e3dd6e 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -207,7 +207,6 @@ static void gfs2_end_log_write(struct bio *bio) | |||
207 | struct gfs2_sbd *sdp = bio->bi_private; | 207 | struct gfs2_sbd *sdp = bio->bi_private; |
208 | struct bio_vec *bvec; | 208 | struct bio_vec *bvec; |
209 | struct page *page; | 209 | struct page *page; |
210 | int i; | ||
211 | struct bvec_iter_all iter_all; | 210 | struct bvec_iter_all iter_all; |
212 | 211 | ||
213 | if (bio->bi_status) { | 212 | if (bio->bi_status) { |
@@ -216,7 +215,7 @@ static void gfs2_end_log_write(struct bio *bio) | |||
216 | wake_up(&sdp->sd_logd_waitq); | 215 | wake_up(&sdp->sd_logd_waitq); |
217 | } | 216 | } |
218 | 217 | ||
219 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 218 | bio_for_each_segment_all(bvec, bio, iter_all) { |
220 | page = bvec->bv_page; | 219 | page = bvec->bv_page; |
221 | if (page_has_buffers(page)) | 220 | if (page_has_buffers(page)) |
222 | gfs2_end_log_write_bh(sdp, bvec, bio->bi_status); | 221 | gfs2_end_log_write_bh(sdp, bvec, bio->bi_status); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3201342404a7..ff86e1d4f8ff 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -189,10 +189,9 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) | |||
189 | static void gfs2_meta_read_endio(struct bio *bio) | 189 | static void gfs2_meta_read_endio(struct bio *bio) |
190 | { | 190 | { |
191 | struct bio_vec *bvec; | 191 | struct bio_vec *bvec; |
192 | int i; | ||
193 | struct bvec_iter_all iter_all; | 192 | struct bvec_iter_all iter_all; |
194 | 193 | ||
195 | bio_for_each_segment_all(bvec, bio, i, iter_all) { | 194 | bio_for_each_segment_all(bvec, bio, iter_all) { |
196 | struct page *page = bvec->bv_page; | 195 | struct page *page = bvec->bv_page; |
197 | struct buffer_head *bh = page_buffers(page); | 196 | struct buffer_head *bh = page_buffers(page); |
198 | unsigned int len = bvec->bv_len; | 197 | unsigned int len = bvec->bv_len; |
diff --git a/fs/iomap.c b/fs/iomap.c index 9ef049d61e8a..23ef63fd1669 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -245,10 +245,9 @@ iomap_read_end_io(struct bio *bio) | |||
245 | { | 245 | { |
246 | int error = blk_status_to_errno(bio->bi_status); | 246 | int error = blk_status_to_errno(bio->bi_status); |
247 | struct bio_vec *bvec; | 247 | struct bio_vec *bvec; |
248 | int i; | ||
249 | struct bvec_iter_all iter_all; | 248 | struct bvec_iter_all iter_all; |
250 | 249 | ||
251 | bio_for_each_segment_all(bvec, bio, i, iter_all) | 250 | bio_for_each_segment_all(bvec, bio, iter_all) |
252 | iomap_read_page_end_io(bvec, error); | 251 | iomap_read_page_end_io(bvec, error); |
253 | bio_put(bio); | 252 | bio_put(bio); |
254 | } | 253 | } |
@@ -1599,9 +1598,8 @@ static void iomap_dio_bio_end_io(struct bio *bio) | |||
1599 | if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { | 1598 | if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { |
1600 | struct bvec_iter_all iter_all; | 1599 | struct bvec_iter_all iter_all; |
1601 | struct bio_vec *bvec; | 1600 | struct bio_vec *bvec; |
1602 | int i; | ||
1603 | 1601 | ||
1604 | bio_for_each_segment_all(bvec, bio, i, iter_all) | 1602 | bio_for_each_segment_all(bvec, bio, iter_all) |
1605 | put_page(bvec->bv_page); | 1603 | put_page(bvec->bv_page); |
1606 | } | 1604 | } |
1607 | bio_put(bio); | 1605 | bio_put(bio); |
diff --git a/fs/mpage.c b/fs/mpage.c index 3f19da75178b..436a85260394 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -47,10 +47,9 @@ | |||
47 | static void mpage_end_io(struct bio *bio) | 47 | static void mpage_end_io(struct bio *bio) |
48 | { | 48 | { |
49 | struct bio_vec *bv; | 49 | struct bio_vec *bv; |
50 | int i; | ||
51 | struct bvec_iter_all iter_all; | 50 | struct bvec_iter_all iter_all; |
52 | 51 | ||
53 | bio_for_each_segment_all(bv, bio, i, iter_all) { | 52 | bio_for_each_segment_all(bv, bio, iter_all) { |
54 | struct page *page = bv->bv_page; | 53 | struct page *page = bv->bv_page; |
55 | page_endio(page, bio_op(bio), | 54 | page_endio(page, bio_op(bio), |
56 | blk_status_to_errno(bio->bi_status)); | 55 | blk_status_to_errno(bio->bi_status)); |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 5f93cfacb3d1..69d02cf8cf37 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -121,7 +121,6 @@ config PNFS_FILE_LAYOUT | |||
121 | config PNFS_BLOCK | 121 | config PNFS_BLOCK |
122 | tristate | 122 | tristate |
123 | depends on NFS_V4_1 && BLK_DEV_DM | 123 | depends on NFS_V4_1 && BLK_DEV_DM |
124 | depends on 64BIT || LBDAF | ||
125 | default NFS_V4 | 124 | default NFS_V4 |
126 | 125 | ||
127 | config PNFS_FLEXFILE_LAYOUT | 126 | config PNFS_FLEXFILE_LAYOUT |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7982a93e630f..8821bc7b9c72 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -594,7 +594,6 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits, | |||
594 | */ | 594 | */ |
595 | 595 | ||
596 | #if BITS_PER_LONG == 32 | 596 | #if BITS_PER_LONG == 32 |
597 | # if defined(CONFIG_LBDAF) | ||
598 | BUILD_BUG_ON(sizeof(sector_t) != 8); | 597 | BUILD_BUG_ON(sizeof(sector_t) != 8); |
599 | /* | 598 | /* |
600 | * We might be limited by page cache size. | 599 | * We might be limited by page cache size. |
@@ -608,15 +607,6 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits, | |||
608 | */ | 607 | */ |
609 | bitshift = 31; | 608 | bitshift = 31; |
610 | } | 609 | } |
611 | # else | ||
612 | /* | ||
613 | * We are limited by the size of sector_t. Use block size, as | ||
614 | * that's what we expose to the VFS. | ||
615 | */ | ||
616 | bytes = 1 << bbits; | ||
617 | trim = 1; | ||
618 | bitshift = 31; | ||
619 | # endif | ||
620 | #endif | 610 | #endif |
621 | 611 | ||
622 | /* | 612 | /* |
diff --git a/fs/stack.c b/fs/stack.c index a54e33ed10f1..664ed35558bd 100644 --- a/fs/stack.c +++ b/fs/stack.c | |||
@@ -21,11 +21,10 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) | |||
21 | i_size = i_size_read(src); | 21 | i_size = i_size_read(src); |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to | 24 | * But on 32-bit, we ought to make an effort to keep the two halves of |
25 | * keep the two halves of i_blocks in sync despite SMP or PREEMPT - | 25 | * i_blocks in sync despite SMP or PREEMPT - though stat's |
26 | * though stat's generic_fillattr() doesn't bother, and we won't be | 26 | * generic_fillattr() doesn't bother, and we won't be applying quotas |
27 | * applying quotas (where i_blocks does become important) at the | 27 | * (where i_blocks does become important) at the upper level. |
28 | * upper level. | ||
29 | * | 28 | * |
30 | * We don't actually know what locking is used at the lower level; | 29 | * We don't actually know what locking is used at the lower level; |
31 | * but if it's a filesystem that supports quotas, it will be using | 30 | * but if it's a filesystem that supports quotas, it will be using |
@@ -44,9 +43,9 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) | |||
44 | * include/linux/fs.h). We don't necessarily hold i_mutex when this | 43 | * include/linux/fs.h). We don't necessarily hold i_mutex when this |
45 | * is called, so take i_lock for that case. | 44 | * is called, so take i_lock for that case. |
46 | * | 45 | * |
47 | * And if CONFIG_LBDAF (on 32-bit), continue our effort to keep the | 46 | * And if on 32-bit, continue our effort to keep the two halves of |
48 | * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock | 47 | * i_blocks in sync despite SMP or PREEMPT: use i_lock for that case |
49 | * for that case too, and do both at once by combining the tests. | 48 | * too, and do both at once by combining the tests. |
50 | * | 49 | * |
51 | * There is none of this locking overhead in the 64-bit case. | 50 | * There is none of this locking overhead in the 64-bit case. |
52 | */ | 51 | */ |
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 457ac9f97377..99af5e5bda9f 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig | |||
@@ -1,7 +1,6 @@ | |||
1 | config XFS_FS | 1 | config XFS_FS |
2 | tristate "XFS filesystem support" | 2 | tristate "XFS filesystem support" |
3 | depends on BLOCK | 3 | depends on BLOCK |
4 | depends on (64BIT || LBDAF) | ||
5 | select EXPORTFS | 4 | select EXPORTFS |
6 | select LIBCRC32C | 5 | select LIBCRC32C |
7 | select FS_IOMAP | 6 | select FS_IOMAP |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 09ac1bb4c2b7..a6f0f4761a37 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -98,7 +98,6 @@ xfs_destroy_ioend( | |||
98 | 98 | ||
99 | for (bio = &ioend->io_inline_bio; bio; bio = next) { | 99 | for (bio = &ioend->io_inline_bio; bio; bio = next) { |
100 | struct bio_vec *bvec; | 100 | struct bio_vec *bvec; |
101 | int i; | ||
102 | struct bvec_iter_all iter_all; | 101 | struct bvec_iter_all iter_all; |
103 | 102 | ||
104 | /* | 103 | /* |
@@ -111,7 +110,7 @@ xfs_destroy_ioend( | |||
111 | next = bio->bi_private; | 110 | next = bio->bi_private; |
112 | 111 | ||
113 | /* walk each page on bio, ending page IO on them */ | 112 | /* walk each page on bio, ending page IO on them */ |
114 | bio_for_each_segment_all(bvec, bio, i, iter_all) | 113 | bio_for_each_segment_all(bvec, bio, iter_all) |
115 | xfs_finish_page_writeback(inode, bvec, error); | 114 | xfs_finish_page_writeback(inode, bvec, error); |
116 | bio_put(bio); | 115 | bio_put(bio); |
117 | } | 116 | } |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b56c6e585ece..a14d11d78bd8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -534,26 +534,18 @@ xfs_max_file_offset( | |||
534 | 534 | ||
535 | /* Figure out maximum filesize, on Linux this can depend on | 535 | /* Figure out maximum filesize, on Linux this can depend on |
536 | * the filesystem blocksize (on 32 bit platforms). | 536 | * the filesystem blocksize (on 32 bit platforms). |
537 | * __block_write_begin does this in an [unsigned] long... | 537 | * __block_write_begin does this in an [unsigned] long long... |
538 | * page->index << (PAGE_SHIFT - bbits) | 538 | * page->index << (PAGE_SHIFT - bbits) |
539 | * So, for page sized blocks (4K on 32 bit platforms), | 539 | * So, for page sized blocks (4K on 32 bit platforms), |
540 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is | 540 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is |
541 | * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1) | 541 | * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1) |
542 | * but for smaller blocksizes it is less (bbits = log2 bsize). | 542 | * but for smaller blocksizes it is less (bbits = log2 bsize). |
543 | * Note1: get_block_t takes a long (implicit cast from above) | ||
544 | * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch | ||
545 | * can optionally convert the [unsigned] long from above into | ||
546 | * an [unsigned] long long. | ||
547 | */ | 543 | */ |
548 | 544 | ||
549 | #if BITS_PER_LONG == 32 | 545 | #if BITS_PER_LONG == 32 |
550 | # if defined(CONFIG_LBDAF) | ||
551 | ASSERT(sizeof(sector_t) == 8); | 546 | ASSERT(sizeof(sector_t) == 8); |
552 | pagefactor = PAGE_SIZE; | 547 | pagefactor = PAGE_SIZE; |
553 | bitshift = BITS_PER_LONG; | 548 | bitshift = BITS_PER_LONG; |
554 | # else | ||
555 | pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift); | ||
556 | # endif | ||
557 | #endif | 549 | #endif |
558 | 550 | ||
559 | return (((uint64_t)pagefactor) << bitshift) - 1; | 551 | return (((uint64_t)pagefactor) << bitshift) - 1; |
diff --git a/include/linux/bio.h b/include/linux/bio.h index e584673c1881..ea73df36529a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -1,19 +1,6 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * Copyright (C) 2001 Jens Axboe <axboe@suse.de> | 3 | * Copyright (C) 2001 Jens Axboe <axboe@suse.de> |
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public Licens | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- | ||
17 | */ | 4 | */ |
18 | #ifndef __LINUX_BIO_H | 5 | #ifndef __LINUX_BIO_H |
19 | #define __LINUX_BIO_H | 6 | #define __LINUX_BIO_H |
@@ -134,9 +121,8 @@ static inline bool bio_next_segment(const struct bio *bio, | |||
134 | * drivers should _never_ use the all version - the bio may have been split | 121 | * drivers should _never_ use the all version - the bio may have been split |
135 | * before it got to the driver and the driver won't own all of it | 122 | * before it got to the driver and the driver won't own all of it |
136 | */ | 123 | */ |
137 | #define bio_for_each_segment_all(bvl, bio, i, iter) \ | 124 | #define bio_for_each_segment_all(bvl, bio, iter) \ |
138 | for (i = 0, bvl = bvec_init_iter_all(&iter); \ | 125 | for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) |
139 | bio_next_segment((bio), &iter); i++) | ||
140 | 126 | ||
141 | static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, | 127 | static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, |
142 | unsigned bytes) | 128 | unsigned bytes) |
diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h index 7b6ecf9ac4c3..5cc5f0f36218 100644 --- a/include/linux/blk-mq-rdma.h +++ b/include/linux/blk-mq-rdma.h | |||
@@ -1,3 +1,4 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | #ifndef _LINUX_BLK_MQ_RDMA_H | 2 | #ifndef _LINUX_BLK_MQ_RDMA_H |
2 | #define _LINUX_BLK_MQ_RDMA_H | 3 | #define _LINUX_BLK_MQ_RDMA_H |
3 | 4 | ||
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index db29928de467..15d1aa53d96c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -70,6 +70,8 @@ struct blk_mq_hw_ctx { | |||
70 | struct dentry *sched_debugfs_dir; | 70 | struct dentry *sched_debugfs_dir; |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | struct list_head hctx_list; | ||
74 | |||
73 | /* Must be the last member - see also blk_mq_hw_ctx_size(). */ | 75 | /* Must be the last member - see also blk_mq_hw_ctx_size(). */ |
74 | struct srcu_struct srcu[0]; | 76 | struct srcu_struct srcu[0]; |
75 | }; | 77 | }; |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 791fee35df88..be418275763c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -215,21 +215,24 @@ struct bio { | |||
215 | /* | 215 | /* |
216 | * bio flags | 216 | * bio flags |
217 | */ | 217 | */ |
218 | #define BIO_NO_PAGE_REF 0 /* don't put release vec pages */ | 218 | enum { |
219 | #define BIO_SEG_VALID 1 /* bi_phys_segments valid */ | 219 | BIO_NO_PAGE_REF, /* don't put release vec pages */ |
220 | #define BIO_CLONED 2 /* doesn't own data */ | 220 | BIO_SEG_VALID, /* bi_phys_segments valid */ |
221 | #define BIO_BOUNCED 3 /* bio is a bounce bio */ | 221 | BIO_CLONED, /* doesn't own data */ |
222 | #define BIO_USER_MAPPED 4 /* contains user pages */ | 222 | BIO_BOUNCED, /* bio is a bounce bio */ |
223 | #define BIO_NULL_MAPPED 5 /* contains invalid user pages */ | 223 | BIO_USER_MAPPED, /* contains user pages */ |
224 | #define BIO_QUIET 6 /* Make BIO Quiet */ | 224 | BIO_NULL_MAPPED, /* contains invalid user pages */ |
225 | #define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ | 225 | BIO_QUIET, /* Make BIO Quiet */ |
226 | #define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ | 226 | BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ |
227 | #define BIO_THROTTLED 9 /* This bio has already been subjected to | 227 | BIO_REFFED, /* bio has elevated ->bi_cnt */ |
228 | BIO_THROTTLED, /* This bio has already been subjected to | ||
228 | * throttling rules. Don't do it again. */ | 229 | * throttling rules. Don't do it again. */ |
229 | #define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion | 230 | BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion |
230 | * of this bio. */ | 231 | * of this bio. */ |
231 | #define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ | 232 | BIO_QUEUE_ENTERED, /* can use blk_queue_enter_live() */ |
232 | #define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */ | 233 | BIO_TRACKED, /* set if bio goes through the rq_qos path */ |
234 | BIO_FLAG_LAST | ||
235 | }; | ||
233 | 236 | ||
234 | /* See BVEC_POOL_OFFSET below before adding new flags */ | 237 | /* See BVEC_POOL_OFFSET below before adding new flags */ |
235 | 238 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 317ab30d2904..1aafeb923e7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -535,6 +535,13 @@ struct request_queue { | |||
535 | 535 | ||
536 | struct mutex sysfs_lock; | 536 | struct mutex sysfs_lock; |
537 | 537 | ||
538 | /* | ||
539 | * for reusing dead hctx instance in case of updating | ||
540 | * nr_hw_queues | ||
541 | */ | ||
542 | struct list_head unused_hctx_list; | ||
543 | spinlock_t unused_hctx_lock; | ||
544 | |||
538 | atomic_t mq_freeze_depth; | 545 | atomic_t mq_freeze_depth; |
539 | 546 | ||
540 | #if defined(CONFIG_BLK_DEV_BSG) | 547 | #if defined(CONFIG_BLK_DEV_BSG) |
@@ -640,6 +647,13 @@ static inline bool blk_account_rq(struct request *rq) | |||
640 | 647 | ||
641 | #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) | 648 | #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) |
642 | 649 | ||
650 | #define rq_dma_dir(rq) \ | ||
651 | (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) | ||
652 | |||
653 | #define dma_map_bvec(dev, bv, dir, attrs) \ | ||
654 | dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ | ||
655 | (dir), (attrs)) | ||
656 | |||
643 | static inline bool queue_is_mq(struct request_queue *q) | 657 | static inline bool queue_is_mq(struct request_queue *q) |
644 | { | 658 | { |
645 | return q->mq_ops; | 659 | return q->mq_ops; |
@@ -931,6 +945,17 @@ static inline unsigned int blk_rq_payload_bytes(struct request *rq) | |||
931 | return blk_rq_bytes(rq); | 945 | return blk_rq_bytes(rq); |
932 | } | 946 | } |
933 | 947 | ||
948 | /* | ||
949 | * Return the first full biovec in the request. The caller needs to check that | ||
950 | * there are any bvecs before calling this helper. | ||
951 | */ | ||
952 | static inline struct bio_vec req_bvec(struct request *rq) | ||
953 | { | ||
954 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) | ||
955 | return rq->special_vec; | ||
956 | return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); | ||
957 | } | ||
958 | |||
934 | static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, | 959 | static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, |
935 | int op) | 960 | int op) |
936 | { | 961 | { |
@@ -1051,7 +1076,6 @@ extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, | |||
1051 | extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | 1076 | extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, |
1052 | sector_t offset); | 1077 | sector_t offset); |
1053 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); | 1078 | extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); |
1054 | extern void blk_queue_dma_pad(struct request_queue *, unsigned int); | ||
1055 | extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); | 1079 | extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); |
1056 | extern int blk_queue_dma_drain(struct request_queue *q, | 1080 | extern int blk_queue_dma_drain(struct request_queue *q, |
1057 | dma_drain_needed_fn *dma_drain_needed, | 1081 | dma_drain_needed_fn *dma_drain_needed, |
@@ -1547,6 +1571,17 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, | |||
1547 | return bio_integrity_intervals(bi, sectors) * bi->tuple_size; | 1571 | return bio_integrity_intervals(bi, sectors) * bi->tuple_size; |
1548 | } | 1572 | } |
1549 | 1573 | ||
1574 | /* | ||
1575 | * Return the first bvec that contains integrity data. Only drivers that are | ||
1576 | * limited to a single integrity segment should use this helper. | ||
1577 | */ | ||
1578 | static inline struct bio_vec *rq_integrity_vec(struct request *rq) | ||
1579 | { | ||
1580 | if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) | ||
1581 | return NULL; | ||
1582 | return rq->bio->bi_integrity->bip_vec; | ||
1583 | } | ||
1584 | |||
1550 | #else /* CONFIG_BLK_DEV_INTEGRITY */ | 1585 | #else /* CONFIG_BLK_DEV_INTEGRITY */ |
1551 | 1586 | ||
1552 | struct bio; | 1587 | struct bio; |
@@ -1621,6 +1656,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, | |||
1621 | return 0; | 1656 | return 0; |
1622 | } | 1657 | } |
1623 | 1658 | ||
1659 | static inline struct bio_vec *rq_integrity_vec(struct request *rq) | ||
1660 | { | ||
1661 | return NULL; | ||
1662 | } | ||
1663 | |||
1624 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ | 1664 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ |
1625 | 1665 | ||
1626 | struct block_device_operations { | 1666 | struct block_device_operations { |
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 7f14517a559b..960988d42f77 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h | |||
@@ -1,24 +1,10 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
1 | /* | 2 | /* |
2 | * BSG helper library | 3 | * BSG helper library |
3 | * | 4 | * |
4 | * Copyright (C) 2008 James Smart, Emulex Corporation | 5 | * Copyright (C) 2008 James Smart, Emulex Corporation |
5 | * Copyright (C) 2011 Red Hat, Inc. All rights reserved. | 6 | * Copyright (C) 2011 Red Hat, Inc. All rights reserved. |
6 | * Copyright (C) 2011 Mike Christie | 7 | * Copyright (C) 2011 Mike Christie |
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | * | ||
22 | */ | 8 | */ |
23 | #ifndef _BLK_BSG_ | 9 | #ifndef _BLK_BSG_ |
24 | #define _BLK_BSG_ | 10 | #define _BLK_BSG_ |
diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ff13cbc1887d..a032f01e928c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h | |||
@@ -1,21 +1,8 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * bvec iterator | 3 | * bvec iterator |
3 | * | 4 | * |
4 | * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com> | 5 | * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com> |
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public Licens | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- | ||
19 | */ | 6 | */ |
20 | #ifndef __LINUX_BVEC_ITER_H | 7 | #ifndef __LINUX_BVEC_ITER_H |
21 | #define __LINUX_BVEC_ITER_H | 8 | #define __LINUX_BVEC_ITER_H |
@@ -51,11 +38,6 @@ struct bvec_iter_all { | |||
51 | unsigned done; | 38 | unsigned done; |
52 | }; | 39 | }; |
53 | 40 | ||
54 | static inline struct page *bvec_nth_page(struct page *page, int idx) | ||
55 | { | ||
56 | return idx == 0 ? page : nth_page(page, idx); | ||
57 | } | ||
58 | |||
59 | /* | 41 | /* |
60 | * various member access, note that bio_data should of course not be used | 42 | * various member access, note that bio_data should of course not be used |
61 | * on highmem page vectors | 43 | * on highmem page vectors |
@@ -92,8 +74,8 @@ static inline struct page *bvec_nth_page(struct page *page, int idx) | |||
92 | PAGE_SIZE - bvec_iter_offset((bvec), (iter))) | 74 | PAGE_SIZE - bvec_iter_offset((bvec), (iter))) |
93 | 75 | ||
94 | #define bvec_iter_page(bvec, iter) \ | 76 | #define bvec_iter_page(bvec, iter) \ |
95 | bvec_nth_page(mp_bvec_iter_page((bvec), (iter)), \ | 77 | (mp_bvec_iter_page((bvec), (iter)) + \ |
96 | mp_bvec_iter_page_idx((bvec), (iter))) | 78 | mp_bvec_iter_page_idx((bvec), (iter))) |
97 | 79 | ||
98 | #define bvec_iter_bvec(bvec, iter) \ | 80 | #define bvec_iter_bvec(bvec, iter) \ |
99 | ((struct bio_vec) { \ | 81 | ((struct bio_vec) { \ |
@@ -157,11 +139,10 @@ static inline void bvec_advance(const struct bio_vec *bvec, | |||
157 | struct bio_vec *bv = &iter_all->bv; | 139 | struct bio_vec *bv = &iter_all->bv; |
158 | 140 | ||
159 | if (iter_all->done) { | 141 | if (iter_all->done) { |
160 | bv->bv_page = nth_page(bv->bv_page, 1); | 142 | bv->bv_page++; |
161 | bv->bv_offset = 0; | 143 | bv->bv_offset = 0; |
162 | } else { | 144 | } else { |
163 | bv->bv_page = bvec_nth_page(bvec->bv_page, bvec->bv_offset / | 145 | bv->bv_page = bvec->bv_page + (bvec->bv_offset >> PAGE_SHIFT); |
164 | PAGE_SIZE); | ||
165 | bv->bv_offset = bvec->bv_offset & ~PAGE_MASK; | 146 | bv->bv_offset = bvec->bv_offset & ~PAGE_MASK; |
166 | } | 147 | } |
167 | bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, | 148 | bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, |
@@ -184,7 +165,7 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec, | |||
184 | unsigned total = bvec->bv_offset + bvec->bv_len; | 165 | unsigned total = bvec->bv_offset + bvec->bv_len; |
185 | unsigned last_page = (total - 1) / PAGE_SIZE; | 166 | unsigned last_page = (total - 1) / PAGE_SIZE; |
186 | 167 | ||
187 | seg->bv_page = bvec_nth_page(bvec->bv_page, last_page); | 168 | seg->bv_page = bvec->bv_page + last_page; |
188 | 169 | ||
189 | /* the whole segment is inside the last page */ | 170 | /* the whole segment is inside the last page */ |
190 | if (bvec->bv_offset >= last_page * PAGE_SIZE) { | 171 | if (bvec->bv_offset >= last_page * PAGE_SIZE) { |
@@ -196,9 +177,4 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec, | |||
196 | } | 177 | } |
197 | } | 178 | } |
198 | 179 | ||
199 | #define mp_bvec_for_each_page(pg, bv, i) \ | ||
200 | for (i = (bv)->bv_offset / PAGE_SIZE; \ | ||
201 | (i <= (((bv)->bv_offset + (bv)->bv_len - 1) / PAGE_SIZE)) && \ | ||
202 | (pg = bvec_nth_page((bv)->bv_page, i)); i += 1) | ||
203 | |||
204 | #endif /* __LINUX_BVEC_ITER_H */ | 180 | #endif /* __LINUX_BVEC_ITER_H */ |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..8b5330dd5ac0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -150,6 +150,13 @@ enum { | |||
150 | DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ | 150 | DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ |
151 | }; | 151 | }; |
152 | 152 | ||
153 | enum { | ||
154 | /* Poll even if events_poll_msecs is unset */ | ||
155 | DISK_EVENT_FLAG_POLL = 1 << 0, | ||
156 | /* Forward events to udev */ | ||
157 | DISK_EVENT_FLAG_UEVENT = 1 << 1, | ||
158 | }; | ||
159 | |||
153 | struct disk_part_tbl { | 160 | struct disk_part_tbl { |
154 | struct rcu_head rcu_head; | 161 | struct rcu_head rcu_head; |
155 | int len; | 162 | int len; |
@@ -184,8 +191,8 @@ struct gendisk { | |||
184 | char disk_name[DISK_NAME_LEN]; /* name of major driver */ | 191 | char disk_name[DISK_NAME_LEN]; /* name of major driver */ |
185 | char *(*devnode)(struct gendisk *gd, umode_t *mode); | 192 | char *(*devnode)(struct gendisk *gd, umode_t *mode); |
186 | 193 | ||
187 | unsigned int events; /* supported events */ | 194 | unsigned short events; /* supported events */ |
188 | unsigned int async_events; /* async events, subset of all */ | 195 | unsigned short event_flags; /* flags related to event processing */ |
189 | 196 | ||
190 | /* Array of pointers to partitions indexed by partno. | 197 | /* Array of pointers to partitions indexed by partno. |
191 | * Protected with matching bdev lock but stat and other | 198 | * Protected with matching bdev lock but stat and other |
@@ -610,6 +617,7 @@ struct unixware_disklabel { | |||
610 | 617 | ||
611 | extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); | 618 | extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); |
612 | extern void blk_free_devt(dev_t devt); | 619 | extern void blk_free_devt(dev_t devt); |
620 | extern void blk_invalidate_devt(dev_t devt); | ||
613 | extern dev_t blk_lookup_devt(const char *name, int partno); | 621 | extern dev_t blk_lookup_devt(const char *name, int partno); |
614 | extern char *disk_name (struct gendisk *hd, int partno, char *buf); | 622 | extern char *disk_name (struct gendisk *hd, int partno, char *buf); |
615 | 623 | ||
@@ -714,7 +722,7 @@ static inline void hd_free_part(struct hd_struct *part) | |||
714 | */ | 722 | */ |
715 | static inline sector_t part_nr_sects_read(struct hd_struct *part) | 723 | static inline sector_t part_nr_sects_read(struct hd_struct *part) |
716 | { | 724 | { |
717 | #if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) | 725 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
718 | sector_t nr_sects; | 726 | sector_t nr_sects; |
719 | unsigned seq; | 727 | unsigned seq; |
720 | do { | 728 | do { |
@@ -722,7 +730,7 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) | |||
722 | nr_sects = part->nr_sects; | 730 | nr_sects = part->nr_sects; |
723 | } while (read_seqcount_retry(&part->nr_sects_seq, seq)); | 731 | } while (read_seqcount_retry(&part->nr_sects_seq, seq)); |
724 | return nr_sects; | 732 | return nr_sects; |
725 | #elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) | 733 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
726 | sector_t nr_sects; | 734 | sector_t nr_sects; |
727 | 735 | ||
728 | preempt_disable(); | 736 | preempt_disable(); |
@@ -741,11 +749,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) | |||
741 | */ | 749 | */ |
742 | static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) | 750 | static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) |
743 | { | 751 | { |
744 | #if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) | 752 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
745 | write_seqcount_begin(&part->nr_sects_seq); | 753 | write_seqcount_begin(&part->nr_sects_seq); |
746 | part->nr_sects = size; | 754 | part->nr_sects = size; |
747 | write_seqcount_end(&part->nr_sects_seq); | 755 | write_seqcount_end(&part->nr_sects_seq); |
748 | #elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) | 756 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
749 | preempt_disable(); | 757 | preempt_disable(); |
750 | part->nr_sects = size; | 758 | part->nr_sects = size; |
751 | preempt_enable(); | 759 | preempt_enable(); |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2d14e21c16c0..a3b59d143afb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/byteorder.h> | 17 | #include <asm/byteorder.h> |
18 | #include <asm/div64.h> | 18 | #include <asm/div64.h> |
19 | #include <uapi/linux/kernel.h> | 19 | #include <uapi/linux/kernel.h> |
20 | #include <asm/div64.h> | ||
20 | 21 | ||
21 | #define STACK_MAGIC 0xdeadbeef | 22 | #define STACK_MAGIC 0xdeadbeef |
22 | 23 | ||
@@ -175,18 +176,7 @@ | |||
175 | #define _RET_IP_ (unsigned long)__builtin_return_address(0) | 176 | #define _RET_IP_ (unsigned long)__builtin_return_address(0) |
176 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) | 177 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) |
177 | 178 | ||
178 | #ifdef CONFIG_LBDAF | 179 | #define sector_div(a, b) do_div(a, b) |
179 | # define sector_div(a, b) do_div(a, b) | ||
180 | #else | ||
181 | # define sector_div(n, b)( \ | ||
182 | { \ | ||
183 | int _res; \ | ||
184 | _res = (n) % (b); \ | ||
185 | (n) /= (b); \ | ||
186 | _res; \ | ||
187 | } \ | ||
188 | ) | ||
189 | #endif | ||
190 | 180 | ||
191 | /** | 181 | /** |
192 | * upper_32_bits - return bits 32-63 of a number | 182 | * upper_32_bits - return bits 32-63 of a number |
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 3aa97b98dc89..3ec8e50efa16 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h | |||
@@ -77,7 +77,7 @@ struct nvme_rdma_cm_rep { | |||
77 | * struct nvme_rdma_cm_rej - rdma connect reject | 77 | * struct nvme_rdma_cm_rej - rdma connect reject |
78 | * | 78 | * |
79 | * @recfmt: format of the RDMA Private Data | 79 | * @recfmt: format of the RDMA Private Data |
80 | * @fsts: error status for the associated connect request | 80 | * @sts: error status for the associated connect request |
81 | */ | 81 | */ |
82 | struct nvme_rdma_cm_rej { | 82 | struct nvme_rdma_cm_rej { |
83 | __le16 recfmt; | 83 | __le16 recfmt; |
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 04b124fca51e..3e76b6d7d97f 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h | |||
@@ -1,18 +1,10 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1 | /* | 2 | /* |
2 | * Copyright © 2016 Intel Corporation | 3 | * Copyright © 2016 Intel Corporation |
3 | * | 4 | * |
4 | * Authors: | 5 | * Authors: |
5 | * Rafael Antognolli <rafael.antognolli@intel.com> | 6 | * Rafael Antognolli <rafael.antognolli@intel.com> |
6 | * Scott Bauer <scott.bauer@intel.com> | 7 | * Scott Bauer <scott.bauer@intel.com> |
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | 8 | */ |
17 | 9 | ||
18 | #ifndef LINUX_OPAL_H | 10 | #ifndef LINUX_OPAL_H |
diff --git a/include/linux/types.h b/include/linux/types.h index cc0dbbe551d5..231114ae38f4 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
@@ -127,13 +127,8 @@ typedef s64 int64_t; | |||
127 | * | 127 | * |
128 | * blkcnt_t is the type of the inode's block count. | 128 | * blkcnt_t is the type of the inode's block count. |
129 | */ | 129 | */ |
130 | #ifdef CONFIG_LBDAF | ||
131 | typedef u64 sector_t; | 130 | typedef u64 sector_t; |
132 | typedef u64 blkcnt_t; | 131 | typedef u64 blkcnt_t; |
133 | #else | ||
134 | typedef unsigned long sector_t; | ||
135 | typedef unsigned long blkcnt_t; | ||
136 | #endif | ||
137 | 132 | ||
138 | /* | 133 | /* |
139 | * The type of an index into the pagecache. | 134 | * The type of an index into the pagecache. |
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 627624d35030..33e53b80cd1f 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h | |||
@@ -5,15 +5,6 @@ | |||
5 | * Authors: | 5 | * Authors: |
6 | * Rafael Antognolli <rafael.antognolli@intel.com> | 6 | * Rafael Antognolli <rafael.antognolli@intel.com> |
7 | * Scott Bauer <scott.bauer@intel.com> | 7 | * Scott Bauer <scott.bauer@intel.com> |
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms and conditions of the GNU General Public License, | ||
11 | * version 2, as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | */ | 8 | */ |
18 | 9 | ||
19 | #ifndef _UAPI_SED_OPAL_H | 10 | #ifndef _UAPI_SED_OPAL_H |
@@ -58,7 +49,7 @@ struct opal_key { | |||
58 | struct opal_lr_act { | 49 | struct opal_lr_act { |
59 | struct opal_key key; | 50 | struct opal_key key; |
60 | __u32 sum; | 51 | __u32 sum; |
61 | __u8 num_lrs; | 52 | __u8 num_lrs; |
62 | __u8 lr[OPAL_MAX_LRS]; | 53 | __u8 lr[OPAL_MAX_LRS]; |
63 | __u8 align[2]; /* Align to 8 byte boundary */ | 54 | __u8 align[2]; /* Align to 8 byte boundary */ |
64 | }; | 55 | }; |
diff --git a/include/xen/xen.h b/include/xen/xen.h index 19d032373de5..19a72f591e2b 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h | |||
@@ -43,8 +43,10 @@ extern struct hvm_start_info pvh_start_info; | |||
43 | #endif /* CONFIG_XEN_DOM0 */ | 43 | #endif /* CONFIG_XEN_DOM0 */ |
44 | 44 | ||
45 | struct bio_vec; | 45 | struct bio_vec; |
46 | struct page; | ||
47 | |||
46 | bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, | 48 | bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, |
47 | const struct bio_vec *vec2); | 49 | const struct page *page); |
48 | 50 | ||
49 | #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) | 51 | #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) |
50 | extern u64 xen_saved_max_mem_size; | 52 | extern u64 xen_saved_max_mem_size; |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4c54a89f06ee..971c6c70891e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1930,7 +1930,6 @@ config TEST_STATIC_KEYS | |||
1930 | config TEST_KMOD | 1930 | config TEST_KMOD |
1931 | tristate "kmod stress tester" | 1931 | tristate "kmod stress tester" |
1932 | depends on m | 1932 | depends on m |
1933 | depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS | ||
1934 | depends on NETDEVICES && NET_CORE && INET # for TUN | 1933 | depends on NETDEVICES && NET_CORE && INET # for TUN |
1935 | depends on BLOCK | 1934 | depends on BLOCK |
1936 | select TEST_LKM | 1935 | select TEST_LKM |
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h index d27285f8ee82..8bc960e5e713 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h | |||
@@ -59,11 +59,7 @@ typedef __u32 uint32_t; | |||
59 | * | 59 | * |
60 | * blkcnt_t is the type of the inode's block count. | 60 | * blkcnt_t is the type of the inode's block count. |
61 | */ | 61 | */ |
62 | #ifdef CONFIG_LBDAF | ||
63 | typedef u64 sector_t; | 62 | typedef u64 sector_t; |
64 | #else | ||
65 | typedef unsigned long sector_t; | ||
66 | #endif | ||
67 | 63 | ||
68 | /* | 64 | /* |
69 | * The type of an index into the pagecache. | 65 | * The type of an index into the pagecache. |