aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2015-10-15 08:10:48 -0400
committerJens Axboe <axboe@fb.com>2015-10-21 16:46:56 -0400
commitbbd3e064362e5057cc4799ba2e4d68c7593e490b (patch)
tree94c1efd47cf05eaf940e3381bb4eb9df421ae001
parentd8e4bb8103df02a2c509868732dc93fb66110a12 (diff)
block: add an API for Persistent Reservations
This commits adds a driver API and ioctls for controlling Persistent Reservations s/genericly/generically/ at the block layer. Persistent Reservations are supported by SCSI and NVMe and allow controlling who gets access to a device in a shared storage setup. Note that we add a pr_ops structure to struct block_device_operations instead of adding the members directly to avoid bloating all instances of devices that will never support Persistent Reservations. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--Documentation/block/pr.txt119
-rw-r--r--block/ioctl.c103
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/pr.h18
-rw-r--r--include/uapi/linux/pr.h48
5 files changed, 290 insertions, 0 deletions
diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt
new file mode 100644
index 000000000000..d3eb1ca65051
--- /dev/null
+++ b/Documentation/block/pr.txt
@@ -0,0 +1,119 @@
1
2Block layer support for Persistent Reservations
3===============================================
4
5The Linux kernel supports a user space interface for simplified
6Persistent Reservations which map to block devices that support
7these (like SCSI). Persistent Reservations allow restricting
8access to block devices to specific initiators in a shared storage
9setup.
10
11This document gives a general overview of the support ioctl commands.
12For a more detailed reference please refer the the SCSI Primary
13Commands standard, specifically the section on Reservations and the
14"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
15
16All implementations are expected to ensure the reservations survive
17a power loss and cover all connections in a multi path environment.
18These behaviors are optional in SPC but will be automatically applied
19by Linux.
20
21
22The following types of reservations are supported:
23--------------------------------------------------
24
25 - PR_WRITE_EXCLUSIVE
26
27 Only the initiator that owns the reservation can write to the
28 device. Any initiator can read from the device.
29
30 - PR_EXCLUSIVE_ACCESS
31
32 Only the initiator that owns the reservation can access the
33 device.
34
35 - PR_WRITE_EXCLUSIVE_REG_ONLY
36
37 Only initiators with a registered key can write to the device,
38 Any initiator can read from the device.
39
40 - PR_EXCLUSIVE_ACCESS_REG_ONLY
41
42 Only initiators with a registered key can access the device.
43
44 - PR_WRITE_EXCLUSIVE_ALL_REGS
45
46 Only initiators with a registered key can write to the device,
47 Any initiator can read from the device.
48 All initiators with a registered key are considered reservation
49 holders.
50 Please reference the SPC spec on the meaning of a reservation
51 holder if you want to use this type.
52
53 - PR_EXCLUSIVE_ACCESS_ALL_REGS
54
55 Only initiators with a registered key can access the device.
56 All initiators with a registered key are considered reservation
57 holders.
58 Please reference the SPC spec on the meaning of a reservation
59 holder if you want to use this type.
60
61
62The following ioctl are supported:
63----------------------------------
64
651. IOC_PR_REGISTER
66
67This ioctl command registers a new reservation if the new_key argument
68is non-null. If no existing reservation exists old_key must be zero,
69if an existing reservation should be replaced old_key must contain
70the old reservation key.
71
72If the new_key argument is 0 it unregisters the existing reservation passed
73in old_key.
74
75
762. IOC_PR_RESERVE
77
78This ioctl command reserves the device and thus restricts access for other
79devices based on the type argument. The key argument must be the existing
80reservation key for the device as acquired by the IOC_PR_REGISTER,
81IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
82
83
843. IOC_PR_RELEASE
85
86This ioctl command releases the reservation specified by key and flags
87and thus removes any access restriction implied by it.
88
89
904. IOC_PR_PREEMPT
91
92This ioctl command releases the existing reservation referred to by
93old_key and replaces it with a a new reservation of type for the
94reservation key new_key.
95
96
975. IOC_PR_PREEMPT_ABORT
98
99This ioctl command works like IOC_PR_PREEMPT except that it also aborts
100any outstanding command sent over a connection identified by old_key.
101
1026. IOC_PR_CLEAR
103
104This ioctl command unregisters both key and any other reservation key
105registered with the device and drops any existing reservation.
106
107
108Flags
109-----
110
111All the ioctls have a flag field. Currently only one flag is supported:
112
113 - PR_FL_IGNORE_KEY
114
115 Ignore the existing reservation key. This is commonly supported for
116 IOC_PR_REGISTER, and some implementation may support the flag for
117 IOC_PR_RESERVE.
118
119For all unknown flags the kernel will return -EOPNOTSUPP.
diff --git a/block/ioctl.c b/block/ioctl.c
index df62b47d2379..0918aed2d847 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -7,6 +7,7 @@
7#include <linux/backing-dev.h> 7#include <linux/backing-dev.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/blktrace_api.h> 9#include <linux/blktrace_api.h>
10#include <linux/pr.h>
10#include <asm/uaccess.h> 11#include <asm/uaccess.h>
11 12
12static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) 13static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
@@ -295,6 +296,96 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
295 */ 296 */
296EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); 297EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
297 298
299static int blkdev_pr_register(struct block_device *bdev,
300 struct pr_registration __user *arg)
301{
302 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
303 struct pr_registration reg;
304
305 if (!capable(CAP_SYS_ADMIN))
306 return -EPERM;
307 if (!ops || !ops->pr_register)
308 return -EOPNOTSUPP;
309 if (copy_from_user(&reg, arg, sizeof(reg)))
310 return -EFAULT;
311
312 if (reg.flags & ~PR_FL_IGNORE_KEY)
313 return -EOPNOTSUPP;
314 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
315}
316
317static int blkdev_pr_reserve(struct block_device *bdev,
318 struct pr_reservation __user *arg)
319{
320 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
321 struct pr_reservation rsv;
322
323 if (!capable(CAP_SYS_ADMIN))
324 return -EPERM;
325 if (!ops || !ops->pr_reserve)
326 return -EOPNOTSUPP;
327 if (copy_from_user(&rsv, arg, sizeof(rsv)))
328 return -EFAULT;
329
330 if (rsv.flags & ~PR_FL_IGNORE_KEY)
331 return -EOPNOTSUPP;
332 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
333}
334
335static int blkdev_pr_release(struct block_device *bdev,
336 struct pr_reservation __user *arg)
337{
338 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
339 struct pr_reservation rsv;
340
341 if (!capable(CAP_SYS_ADMIN))
342 return -EPERM;
343 if (!ops || !ops->pr_release)
344 return -EOPNOTSUPP;
345 if (copy_from_user(&rsv, arg, sizeof(rsv)))
346 return -EFAULT;
347
348 if (rsv.flags)
349 return -EOPNOTSUPP;
350 return ops->pr_release(bdev, rsv.key, rsv.type);
351}
352
353static int blkdev_pr_preempt(struct block_device *bdev,
354 struct pr_preempt __user *arg, bool abort)
355{
356 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
357 struct pr_preempt p;
358
359 if (!capable(CAP_SYS_ADMIN))
360 return -EPERM;
361 if (!ops || !ops->pr_preempt)
362 return -EOPNOTSUPP;
363 if (copy_from_user(&p, arg, sizeof(p)))
364 return -EFAULT;
365
366 if (p.flags)
367 return -EOPNOTSUPP;
368 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
369}
370
371static int blkdev_pr_clear(struct block_device *bdev,
372 struct pr_clear __user *arg)
373{
374 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
375 struct pr_clear c;
376
377 if (!capable(CAP_SYS_ADMIN))
378 return -EPERM;
379 if (!ops || !ops->pr_clear)
380 return -EOPNOTSUPP;
381 if (copy_from_user(&c, arg, sizeof(c)))
382 return -EFAULT;
383
384 if (c.flags)
385 return -EOPNOTSUPP;
386 return ops->pr_clear(bdev, c.key);
387}
388
298/* 389/*
299 * Is it an unrecognized ioctl? The correct returns are either 390 * Is it an unrecognized ioctl? The correct returns are either
300 * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a 391 * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
@@ -477,6 +568,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
477 case BLKTRACESETUP: 568 case BLKTRACESETUP:
478 case BLKTRACETEARDOWN: 569 case BLKTRACETEARDOWN:
479 return blk_trace_ioctl(bdev, cmd, argp); 570 return blk_trace_ioctl(bdev, cmd, argp);
571 case IOC_PR_REGISTER:
572 return blkdev_pr_register(bdev, argp);
573 case IOC_PR_RESERVE:
574 return blkdev_pr_reserve(bdev, argp);
575 case IOC_PR_RELEASE:
576 return blkdev_pr_release(bdev, argp);
577 case IOC_PR_PREEMPT:
578 return blkdev_pr_preempt(bdev, argp, false);
579 case IOC_PR_PREEMPT_ABORT:
580 return blkdev_pr_preempt(bdev, argp, true);
581 case IOC_PR_CLEAR:
582 return blkdev_pr_clear(bdev, argp);
480 default: 583 default:
481 return __blkdev_driver_ioctl(bdev, mode, cmd, arg); 584 return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
482 } 585 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 19c2e947d4d1..fe25da05e823 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -35,6 +35,7 @@ struct sg_io_hdr;
35struct bsg_job; 35struct bsg_job;
36struct blkcg_gq; 36struct blkcg_gq;
37struct blk_flush_queue; 37struct blk_flush_queue;
38struct pr_ops;
38 39
39#define BLKDEV_MIN_RQ 4 40#define BLKDEV_MIN_RQ 4
40#define BLKDEV_MAX_RQ 128 /* Default maximum */ 41#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -1633,6 +1634,7 @@ struct block_device_operations {
1633 /* this callback is with swap_lock and sometimes page table lock held */ 1634 /* this callback is with swap_lock and sometimes page table lock held */
1634 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1635 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
1635 struct module *owner; 1636 struct module *owner;
1637 const struct pr_ops *pr_ops;
1636}; 1638};
1637 1639
1638extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, 1640extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
diff --git a/include/linux/pr.h b/include/linux/pr.h
new file mode 100644
index 000000000000..65c01c10b335
--- /dev/null
+++ b/include/linux/pr.h
@@ -0,0 +1,18 @@
1#ifndef LINUX_PR_H
2#define LINUX_PR_H
3
4#include <uapi/linux/pr.h>
5
6struct pr_ops {
7 int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
8 u32 flags);
9 int (*pr_reserve)(struct block_device *bdev, u64 key,
10 enum pr_type type, u32 flags);
11 int (*pr_release)(struct block_device *bdev, u64 key,
12 enum pr_type type);
13 int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
14 enum pr_type type, bool abort);
15 int (*pr_clear)(struct block_device *bdev, u64 key);
16};
17
18#endif /* LINUX_PR_H */
diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h
new file mode 100644
index 000000000000..57d7c0f916b6
--- /dev/null
+++ b/include/uapi/linux/pr.h
@@ -0,0 +1,48 @@
1#ifndef _UAPI_PR_H
2#define _UAPI_PR_H
3
4enum pr_type {
5 PR_WRITE_EXCLUSIVE = 1,
6 PR_EXCLUSIVE_ACCESS = 2,
7 PR_WRITE_EXCLUSIVE_REG_ONLY = 3,
8 PR_EXCLUSIVE_ACCESS_REG_ONLY = 4,
9 PR_WRITE_EXCLUSIVE_ALL_REGS = 5,
10 PR_EXCLUSIVE_ACCESS_ALL_REGS = 6,
11};
12
13struct pr_reservation {
14 __u64 key;
15 __u32 type;
16 __u32 flags;
17};
18
19struct pr_registration {
20 __u64 old_key;
21 __u64 new_key;
22 __u32 flags;
23 __u32 __pad;
24};
25
26struct pr_preempt {
27 __u64 old_key;
28 __u64 new_key;
29 __u32 type;
30 __u32 flags;
31};
32
33struct pr_clear {
34 __u64 key;
35 __u32 flags;
36 __u32 __pad;
37};
38
39#define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */
40
41#define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration)
42#define IOC_PR_RESERVE _IOW('p', 201, struct pr_reservation)
43#define IOC_PR_RELEASE _IOW('p', 202, struct pr_reservation)
44#define IOC_PR_PREEMPT _IOW('p', 203, struct pr_preempt)
45#define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt)
46#define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear)
47
48#endif /* _UAPI_PR_H */