diff options
author | Christoph Hellwig <hch@lst.de> | 2015-10-15 08:10:48 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2015-10-21 16:46:56 -0400 |
commit | bbd3e064362e5057cc4799ba2e4d68c7593e490b (patch) | |
tree | 94c1efd47cf05eaf940e3381bb4eb9df421ae001 | |
parent | d8e4bb8103df02a2c509868732dc93fb66110a12 (diff) |
block: add an API for Persistent Reservations
This commits adds a driver API and ioctls for controlling Persistent
Reservations s/genericly/generically/ at the block layer. Persistent
Reservations are supported by SCSI and NVMe and allow controlling who gets
access to a device in a shared storage setup.
Note that we add a pr_ops structure to struct block_device_operations
instead of adding the members directly to avoid bloating all instances
of devices that will never support Persistent Reservations.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | Documentation/block/pr.txt | 119 | ||||
-rw-r--r-- | block/ioctl.c | 103 | ||||
-rw-r--r-- | include/linux/blkdev.h | 2 | ||||
-rw-r--r-- | include/linux/pr.h | 18 | ||||
-rw-r--r-- | include/uapi/linux/pr.h | 48 |
5 files changed, 290 insertions, 0 deletions
diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt new file mode 100644 index 000000000000..d3eb1ca65051 --- /dev/null +++ b/Documentation/block/pr.txt | |||
@@ -0,0 +1,119 @@ | |||
1 | |||
2 | Block layer support for Persistent Reservations | ||
3 | =============================================== | ||
4 | |||
5 | The Linux kernel supports a user space interface for simplified | ||
6 | Persistent Reservations which map to block devices that support | ||
7 | these (like SCSI). Persistent Reservations allow restricting | ||
8 | access to block devices to specific initiators in a shared storage | ||
9 | setup. | ||
10 | |||
11 | This document gives a general overview of the support ioctl commands. | ||
12 | For a more detailed reference please refer the the SCSI Primary | ||
13 | Commands standard, specifically the section on Reservations and the | ||
14 | "PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands. | ||
15 | |||
16 | All implementations are expected to ensure the reservations survive | ||
17 | a power loss and cover all connections in a multi path environment. | ||
18 | These behaviors are optional in SPC but will be automatically applied | ||
19 | by Linux. | ||
20 | |||
21 | |||
22 | The following types of reservations are supported: | ||
23 | -------------------------------------------------- | ||
24 | |||
25 | - PR_WRITE_EXCLUSIVE | ||
26 | |||
27 | Only the initiator that owns the reservation can write to the | ||
28 | device. Any initiator can read from the device. | ||
29 | |||
30 | - PR_EXCLUSIVE_ACCESS | ||
31 | |||
32 | Only the initiator that owns the reservation can access the | ||
33 | device. | ||
34 | |||
35 | - PR_WRITE_EXCLUSIVE_REG_ONLY | ||
36 | |||
37 | Only initiators with a registered key can write to the device, | ||
38 | Any initiator can read from the device. | ||
39 | |||
40 | - PR_EXCLUSIVE_ACCESS_REG_ONLY | ||
41 | |||
42 | Only initiators with a registered key can access the device. | ||
43 | |||
44 | - PR_WRITE_EXCLUSIVE_ALL_REGS | ||
45 | |||
46 | Only initiators with a registered key can write to the device, | ||
47 | Any initiator can read from the device. | ||
48 | All initiators with a registered key are considered reservation | ||
49 | holders. | ||
50 | Please reference the SPC spec on the meaning of a reservation | ||
51 | holder if you want to use this type. | ||
52 | |||
53 | - PR_EXCLUSIVE_ACCESS_ALL_REGS | ||
54 | |||
55 | Only initiators with a registered key can access the device. | ||
56 | All initiators with a registered key are considered reservation | ||
57 | holders. | ||
58 | Please reference the SPC spec on the meaning of a reservation | ||
59 | holder if you want to use this type. | ||
60 | |||
61 | |||
62 | The following ioctl are supported: | ||
63 | ---------------------------------- | ||
64 | |||
65 | 1. IOC_PR_REGISTER | ||
66 | |||
67 | This ioctl command registers a new reservation if the new_key argument | ||
68 | is non-null. If no existing reservation exists old_key must be zero, | ||
69 | if an existing reservation should be replaced old_key must contain | ||
70 | the old reservation key. | ||
71 | |||
72 | If the new_key argument is 0 it unregisters the existing reservation passed | ||
73 | in old_key. | ||
74 | |||
75 | |||
76 | 2. IOC_PR_RESERVE | ||
77 | |||
78 | This ioctl command reserves the device and thus restricts access for other | ||
79 | devices based on the type argument. The key argument must be the existing | ||
80 | reservation key for the device as acquired by the IOC_PR_REGISTER, | ||
81 | IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands. | ||
82 | |||
83 | |||
84 | 3. IOC_PR_RELEASE | ||
85 | |||
86 | This ioctl command releases the reservation specified by key and flags | ||
87 | and thus removes any access restriction implied by it. | ||
88 | |||
89 | |||
90 | 4. IOC_PR_PREEMPT | ||
91 | |||
92 | This ioctl command releases the existing reservation referred to by | ||
93 | old_key and replaces it with a a new reservation of type for the | ||
94 | reservation key new_key. | ||
95 | |||
96 | |||
97 | 5. IOC_PR_PREEMPT_ABORT | ||
98 | |||
99 | This ioctl command works like IOC_PR_PREEMPT except that it also aborts | ||
100 | any outstanding command sent over a connection identified by old_key. | ||
101 | |||
102 | 6. IOC_PR_CLEAR | ||
103 | |||
104 | This ioctl command unregisters both key and any other reservation key | ||
105 | registered with the device and drops any existing reservation. | ||
106 | |||
107 | |||
108 | Flags | ||
109 | ----- | ||
110 | |||
111 | All the ioctls have a flag field. Currently only one flag is supported: | ||
112 | |||
113 | - PR_FL_IGNORE_KEY | ||
114 | |||
115 | Ignore the existing reservation key. This is commonly supported for | ||
116 | IOC_PR_REGISTER, and some implementation may support the flag for | ||
117 | IOC_PR_RESERVE. | ||
118 | |||
119 | For all unknown flags the kernel will return -EOPNOTSUPP. | ||
diff --git a/block/ioctl.c b/block/ioctl.c index df62b47d2379..0918aed2d847 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/backing-dev.h> | 7 | #include <linux/backing-dev.h> |
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/blktrace_api.h> | 9 | #include <linux/blktrace_api.h> |
10 | #include <linux/pr.h> | ||
10 | #include <asm/uaccess.h> | 11 | #include <asm/uaccess.h> |
11 | 12 | ||
12 | static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) | 13 | static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) |
@@ -295,6 +296,96 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, | |||
295 | */ | 296 | */ |
296 | EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); | 297 | EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); |
297 | 298 | ||
299 | static int blkdev_pr_register(struct block_device *bdev, | ||
300 | struct pr_registration __user *arg) | ||
301 | { | ||
302 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; | ||
303 | struct pr_registration reg; | ||
304 | |||
305 | if (!capable(CAP_SYS_ADMIN)) | ||
306 | return -EPERM; | ||
307 | if (!ops || !ops->pr_register) | ||
308 | return -EOPNOTSUPP; | ||
309 | if (copy_from_user(®, arg, sizeof(reg))) | ||
310 | return -EFAULT; | ||
311 | |||
312 | if (reg.flags & ~PR_FL_IGNORE_KEY) | ||
313 | return -EOPNOTSUPP; | ||
314 | return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); | ||
315 | } | ||
316 | |||
317 | static int blkdev_pr_reserve(struct block_device *bdev, | ||
318 | struct pr_reservation __user *arg) | ||
319 | { | ||
320 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; | ||
321 | struct pr_reservation rsv; | ||
322 | |||
323 | if (!capable(CAP_SYS_ADMIN)) | ||
324 | return -EPERM; | ||
325 | if (!ops || !ops->pr_reserve) | ||
326 | return -EOPNOTSUPP; | ||
327 | if (copy_from_user(&rsv, arg, sizeof(rsv))) | ||
328 | return -EFAULT; | ||
329 | |||
330 | if (rsv.flags & ~PR_FL_IGNORE_KEY) | ||
331 | return -EOPNOTSUPP; | ||
332 | return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); | ||
333 | } | ||
334 | |||
335 | static int blkdev_pr_release(struct block_device *bdev, | ||
336 | struct pr_reservation __user *arg) | ||
337 | { | ||
338 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; | ||
339 | struct pr_reservation rsv; | ||
340 | |||
341 | if (!capable(CAP_SYS_ADMIN)) | ||
342 | return -EPERM; | ||
343 | if (!ops || !ops->pr_release) | ||
344 | return -EOPNOTSUPP; | ||
345 | if (copy_from_user(&rsv, arg, sizeof(rsv))) | ||
346 | return -EFAULT; | ||
347 | |||
348 | if (rsv.flags) | ||
349 | return -EOPNOTSUPP; | ||
350 | return ops->pr_release(bdev, rsv.key, rsv.type); | ||
351 | } | ||
352 | |||
353 | static int blkdev_pr_preempt(struct block_device *bdev, | ||
354 | struct pr_preempt __user *arg, bool abort) | ||
355 | { | ||
356 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; | ||
357 | struct pr_preempt p; | ||
358 | |||
359 | if (!capable(CAP_SYS_ADMIN)) | ||
360 | return -EPERM; | ||
361 | if (!ops || !ops->pr_preempt) | ||
362 | return -EOPNOTSUPP; | ||
363 | if (copy_from_user(&p, arg, sizeof(p))) | ||
364 | return -EFAULT; | ||
365 | |||
366 | if (p.flags) | ||
367 | return -EOPNOTSUPP; | ||
368 | return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); | ||
369 | } | ||
370 | |||
371 | static int blkdev_pr_clear(struct block_device *bdev, | ||
372 | struct pr_clear __user *arg) | ||
373 | { | ||
374 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; | ||
375 | struct pr_clear c; | ||
376 | |||
377 | if (!capable(CAP_SYS_ADMIN)) | ||
378 | return -EPERM; | ||
379 | if (!ops || !ops->pr_clear) | ||
380 | return -EOPNOTSUPP; | ||
381 | if (copy_from_user(&c, arg, sizeof(c))) | ||
382 | return -EFAULT; | ||
383 | |||
384 | if (c.flags) | ||
385 | return -EOPNOTSUPP; | ||
386 | return ops->pr_clear(bdev, c.key); | ||
387 | } | ||
388 | |||
298 | /* | 389 | /* |
299 | * Is it an unrecognized ioctl? The correct returns are either | 390 | * Is it an unrecognized ioctl? The correct returns are either |
300 | * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a | 391 | * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a |
@@ -477,6 +568,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, | |||
477 | case BLKTRACESETUP: | 568 | case BLKTRACESETUP: |
478 | case BLKTRACETEARDOWN: | 569 | case BLKTRACETEARDOWN: |
479 | return blk_trace_ioctl(bdev, cmd, argp); | 570 | return blk_trace_ioctl(bdev, cmd, argp); |
571 | case IOC_PR_REGISTER: | ||
572 | return blkdev_pr_register(bdev, argp); | ||
573 | case IOC_PR_RESERVE: | ||
574 | return blkdev_pr_reserve(bdev, argp); | ||
575 | case IOC_PR_RELEASE: | ||
576 | return blkdev_pr_release(bdev, argp); | ||
577 | case IOC_PR_PREEMPT: | ||
578 | return blkdev_pr_preempt(bdev, argp, false); | ||
579 | case IOC_PR_PREEMPT_ABORT: | ||
580 | return blkdev_pr_preempt(bdev, argp, true); | ||
581 | case IOC_PR_CLEAR: | ||
582 | return blkdev_pr_clear(bdev, argp); | ||
480 | default: | 583 | default: |
481 | return __blkdev_driver_ioctl(bdev, mode, cmd, arg); | 584 | return __blkdev_driver_ioctl(bdev, mode, cmd, arg); |
482 | } | 585 | } |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 19c2e947d4d1..fe25da05e823 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -35,6 +35,7 @@ struct sg_io_hdr; | |||
35 | struct bsg_job; | 35 | struct bsg_job; |
36 | struct blkcg_gq; | 36 | struct blkcg_gq; |
37 | struct blk_flush_queue; | 37 | struct blk_flush_queue; |
38 | struct pr_ops; | ||
38 | 39 | ||
39 | #define BLKDEV_MIN_RQ 4 | 40 | #define BLKDEV_MIN_RQ 4 |
40 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 41 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
@@ -1633,6 +1634,7 @@ struct block_device_operations { | |||
1633 | /* this callback is with swap_lock and sometimes page table lock held */ | 1634 | /* this callback is with swap_lock and sometimes page table lock held */ |
1634 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | 1635 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); |
1635 | struct module *owner; | 1636 | struct module *owner; |
1637 | const struct pr_ops *pr_ops; | ||
1636 | }; | 1638 | }; |
1637 | 1639 | ||
1638 | extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, | 1640 | extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, |
diff --git a/include/linux/pr.h b/include/linux/pr.h new file mode 100644 index 000000000000..65c01c10b335 --- /dev/null +++ b/include/linux/pr.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef LINUX_PR_H | ||
2 | #define LINUX_PR_H | ||
3 | |||
4 | #include <uapi/linux/pr.h> | ||
5 | |||
6 | struct pr_ops { | ||
7 | int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, | ||
8 | u32 flags); | ||
9 | int (*pr_reserve)(struct block_device *bdev, u64 key, | ||
10 | enum pr_type type, u32 flags); | ||
11 | int (*pr_release)(struct block_device *bdev, u64 key, | ||
12 | enum pr_type type); | ||
13 | int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, | ||
14 | enum pr_type type, bool abort); | ||
15 | int (*pr_clear)(struct block_device *bdev, u64 key); | ||
16 | }; | ||
17 | |||
18 | #endif /* LINUX_PR_H */ | ||
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h new file mode 100644 index 000000000000..57d7c0f916b6 --- /dev/null +++ b/include/uapi/linux/pr.h | |||
@@ -0,0 +1,48 @@ | |||
1 | #ifndef _UAPI_PR_H | ||
2 | #define _UAPI_PR_H | ||
3 | |||
4 | enum pr_type { | ||
5 | PR_WRITE_EXCLUSIVE = 1, | ||
6 | PR_EXCLUSIVE_ACCESS = 2, | ||
7 | PR_WRITE_EXCLUSIVE_REG_ONLY = 3, | ||
8 | PR_EXCLUSIVE_ACCESS_REG_ONLY = 4, | ||
9 | PR_WRITE_EXCLUSIVE_ALL_REGS = 5, | ||
10 | PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, | ||
11 | }; | ||
12 | |||
13 | struct pr_reservation { | ||
14 | __u64 key; | ||
15 | __u32 type; | ||
16 | __u32 flags; | ||
17 | }; | ||
18 | |||
19 | struct pr_registration { | ||
20 | __u64 old_key; | ||
21 | __u64 new_key; | ||
22 | __u32 flags; | ||
23 | __u32 __pad; | ||
24 | }; | ||
25 | |||
26 | struct pr_preempt { | ||
27 | __u64 old_key; | ||
28 | __u64 new_key; | ||
29 | __u32 type; | ||
30 | __u32 flags; | ||
31 | }; | ||
32 | |||
33 | struct pr_clear { | ||
34 | __u64 key; | ||
35 | __u32 flags; | ||
36 | __u32 __pad; | ||
37 | }; | ||
38 | |||
39 | #define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ | ||
40 | |||
41 | #define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) | ||
42 | #define IOC_PR_RESERVE _IOW('p', 201, struct pr_reservation) | ||
43 | #define IOC_PR_RELEASE _IOW('p', 202, struct pr_reservation) | ||
44 | #define IOC_PR_PREEMPT _IOW('p', 203, struct pr_preempt) | ||
45 | #define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt) | ||
46 | #define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear) | ||
47 | |||
48 | #endif /* _UAPI_PR_H */ | ||