aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-block34
-rw-r--r--Documentation/block/data-integrity.txt327
-rw-r--r--block/Kconfig12
-rw-r--r--block/Makefile4
-rw-r--r--block/as-iosched.c18
-rw-r--r--block/blk-core.c19
-rw-r--r--block/blk-integrity.c381
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/blk-settings.c24
-rw-r--r--block/blk.h8
-rw-r--r--block/blktrace.c45
-rw-r--r--block/bsg.c38
-rw-r--r--block/cfq-iosched.c83
-rw-r--r--block/cmd-filter.c334
-rw-r--r--block/elevator.c8
-rw-r--r--block/genhd.c12
-rw-r--r--block/scsi_ioctl.c121
-rw-r--r--drivers/ata/libata-scsi.c3
-rw-r--r--drivers/block/DAC960.c157
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/paride/pt.c20
-rw-r--r--drivers/block/pktcdvd.c46
-rw-r--r--drivers/block/xen-blkfront.c48
-rw-r--r--drivers/cdrom/cdrom.c274
-rw-r--r--drivers/md/linear.c10
-rw-r--r--drivers/md/raid0.c10
-rw-r--r--drivers/md/raid10.c15
-rw-r--r--drivers/md/raid5.c10
-rw-r--r--drivers/net/xen-netfront.c4
-rw-r--r--drivers/scsi/sg.c44
-rw-r--r--drivers/scsi/sr.c20
-rw-r--r--drivers/xen/xenbus/xenbus_client.c2
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c10
-rw-r--r--fs/Makefile1
-rw-r--r--fs/bio-integrity.c719
-rw-r--r--fs/bio.c88
-rw-r--r--fs/ramfs/file-mmu.c1
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/splice.c17
-rw-r--r--include/linux/bio.h130
-rw-r--r--include/linux/blkdev.h163
-rw-r--r--include/linux/blktrace_api.h1
-rw-r--r--include/linux/genhd.h12
-rw-r--r--include/linux/iocontext.h18
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
47 files changed, 2741 insertions, 564 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 4bd9ea539129..44f52a4f5903 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -26,3 +26,37 @@ Description:
26 I/O statistics of partition <part>. The format is the 26 I/O statistics of partition <part>. The format is the
27 same as the above-written /sys/block/<disk>/stat 27 same as the above-written /sys/block/<disk>/stat
28 format. 28 format.
29
30
31What: /sys/block/<disk>/integrity/format
32Date: June 2008
33Contact: Martin K. Petersen <martin.petersen@oracle.com>
34Description:
35 Metadata format for integrity capable block device.
36 E.g. T10-DIF-TYPE1-CRC.
37
38
39What: /sys/block/<disk>/integrity/read_verify
40Date: June 2008
41Contact: Martin K. Petersen <martin.petersen@oracle.com>
42Description:
43 Indicates whether the block layer should verify the
44 integrity of read requests serviced by devices that
45 support sending integrity metadata.
46
47
48What: /sys/block/<disk>/integrity/tag_size
49Date: June 2008
50Contact: Martin K. Petersen <martin.petersen@oracle.com>
51Description:
52 Number of bytes of integrity tag space available per
53 512 bytes of data.
54
55
56What: /sys/block/<disk>/integrity/write_generate
57Date: June 2008
58Contact: Martin K. Petersen <martin.petersen@oracle.com>
59Description:
60 Indicates whether the block layer should automatically
61 generate checksums for write requests bound for
62 devices that support receiving integrity metadata.
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
new file mode 100644
index 000000000000..e9dc8d86adc7
--- /dev/null
+++ b/Documentation/block/data-integrity.txt
@@ -0,0 +1,327 @@
1----------------------------------------------------------------------
21. INTRODUCTION
3
4Modern filesystems feature checksumming of data and metadata to
5protect against data corruption. However, the detection of the
6corruption is done at read time which could potentially be months
7after the data was written. At that point the original data that the
8application tried to write is most likely lost.
9
10The solution is to ensure that the disk is actually storing what the
11application meant it to. Recent additions to both the SCSI family
12protocols (SBC Data Integrity Field, SCC protection proposal) as well
13as SATA/T13 (External Path Protection) try to remedy this by adding
14support for appending integrity metadata to an I/O. The integrity
15metadata (or protection information in SCSI terminology) includes a
16checksum for each sector as well as an incrementing counter that
17ensures the individual sectors are written in the right order. And
18for some protection schemes also that the I/O is written to the right
19place on disk.
20
21Current storage controllers and devices implement various protective
22measures, for instance checksumming and scrubbing. But these
23technologies are working in their own isolated domains or at best
24between adjacent nodes in the I/O path. The interesting thing about
25DIF and the other integrity extensions is that the protection format
26is well defined and every node in the I/O path can verify the
27integrity of the I/O and reject it if corruption is detected. This
28allows not only corruption prevention but also isolation of the point
29of failure.
30
31----------------------------------------------------------------------
322. THE DATA INTEGRITY EXTENSIONS
33
34As written, the protocol extensions only protect the path between
35controller and storage device. However, many controllers actually
36allow the operating system to interact with the integrity metadata
37(IMD). We have been working with several FC/SAS HBA vendors to enable
38the protection information to be transferred to and from their
39controllers.
40
41The SCSI Data Integrity Field works by appending 8 bytes of protection
42information to each sector. The data + integrity metadata is stored
43in 520 byte sectors on disk. Data + IMD are interleaved when
44transferred between the controller and target. The T13 proposal is
45similar.
46
47Because it is highly inconvenient for operating systems to deal with
48520 (and 4104) byte sectors, we approached several HBA vendors and
49encouraged them to allow separation of the data and integrity metadata
50scatter-gather lists.
51
52The controller will interleave the buffers on write and split them on
53read. This means that the Linux can DMA the data buffers to and from
54host memory without changes to the page cache.
55
56Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
57is somewhat heavy to compute in software. Benchmarks found that
58calculating this checksum had a significant impact on system
59performance for a number of workloads. Some controllers allow a
60lighter-weight checksum to be used when interfacing with the operating
61system. Emulex, for instance, supports the TCP/IP checksum instead.
62The IP checksum received from the OS is converted to the 16-bit CRC
63when writing and vice versa. This allows the integrity metadata to be
64generated by Linux or the application at very low cost (comparable to
65software RAID5).
66
67The IP checksum is weaker than the CRC in terms of detecting bit
68errors. However, the strength is really in the separation of the data
69buffers and the integrity metadata. These two distinct buffers much
70match up for an I/O to complete.
71
72The separation of the data and integrity metadata buffers as well as
73the choice in checksums is referred to as the Data Integrity
74Extensions. As these extensions are outside the scope of the protocol
75bodies (T10, T13), Oracle and its partners are trying to standardize
76them within the Storage Networking Industry Association.
77
78----------------------------------------------------------------------
793. KERNEL CHANGES
80
81The data integrity framework in Linux enables protection information
82to be pinned to I/Os and sent to/received from controllers that
83support it.
84
85The advantage to the integrity extensions in SCSI and SATA is that
86they enable us to protect the entire path from application to storage
87device. However, at the same time this is also the biggest
88disadvantage. It means that the protection information must be in a
89format that can be understood by the disk.
90
91Generally Linux/POSIX applications are agnostic to the intricacies of
92the storage devices they are accessing. The virtual filesystem switch
93and the block layer make things like hardware sector size and
94transport protocols completely transparent to the application.
95
96However, this level of detail is required when preparing the
97protection information to send to a disk. Consequently, the very
98concept of an end-to-end protection scheme is a layering violation.
99It is completely unreasonable for an application to be aware whether
100it is accessing a SCSI or SATA disk.
101
102The data integrity support implemented in Linux attempts to hide this
103from the application. As far as the application (and to some extent
104the kernel) is concerned, the integrity metadata is opaque information
105that's attached to the I/O.
106
107The current implementation allows the block layer to automatically
108generate the protection information for any I/O. Eventually the
109intent is to move the integrity metadata calculation to userspace for
110user data. Metadata and other I/O that originates within the kernel
111will still use the automatic generation interface.
112
113Some storage devices allow each hardware sector to be tagged with a
11416-bit value. The owner of this tag space is the owner of the block
115device. I.e. the filesystem in most cases. The filesystem can use
116this extra space to tag sectors as they see fit. Because the tag
117space is limited, the block interface allows tagging bigger chunks by
118way of interleaving. This way, 8*16 bits of information can be
119attached to a typical 4KB filesystem block.
120
121This also means that applications such as fsck and mkfs will need
122access to manipulate the tags from user space. A passthrough
123interface for this is being worked on.
124
125
126----------------------------------------------------------------------
1274. BLOCK LAYER IMPLEMENTATION DETAILS
128
1294.1 BIO
130
131The data integrity patches add a new field to struct bio when
132CONFIG_BLK_DEV_INTEGRITY is enabled. bio->bi_integrity is a pointer
133to a struct bip which contains the bio integrity payload. Essentially
134a bip is a trimmed down struct bio which holds a bio_vec containing
135the integrity metadata and the required housekeeping information (bvec
136pool, vector count, etc.)
137
138A kernel subsystem can enable data integrity protection on a bio by
139calling bio_integrity_alloc(bio). This will allocate and attach the
140bip to the bio.
141
142Individual pages containing integrity metadata can subsequently be
143attached using bio_integrity_add_page().
144
145bio_free() will automatically free the bip.
146
147
1484.2 BLOCK DEVICE
149
150Because the format of the protection data is tied to the physical
151disk, each block device has been extended with a block integrity
152profile (struct blk_integrity). This optional profile is registered
153with the block layer using blk_integrity_register().
154
155The profile contains callback functions for generating and verifying
156the protection data, as well as getting and setting application tags.
157The profile also contains a few constants to aid in completing,
158merging and splitting the integrity metadata.
159
160Layered block devices will need to pick a profile that's appropriate
161for all subdevices. blk_integrity_compare() can help with that. DM
162and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
163will require extra work due to the application tag.
164
165
166----------------------------------------------------------------------
1675.0 BLOCK LAYER INTEGRITY API
168
1695.1 NORMAL FILESYSTEM
170
171 The normal filesystem is unaware that the underlying block device
172 is capable of sending/receiving integrity metadata. The IMD will
173 be automatically generated by the block layer at submit_bio() time
174 in case of a WRITE. A READ request will cause the I/O integrity
175 to be verified upon completion.
176
177 IMD generation and verification can be toggled using the
178
179 /sys/block/<bdev>/integrity/write_generate
180
181 and
182
183 /sys/block/<bdev>/integrity/read_verify
184
185 flags.
186
187
1885.2 INTEGRITY-AWARE FILESYSTEM
189
190 A filesystem that is integrity-aware can prepare I/Os with IMD
191 attached. It can also use the application tag space if this is
192 supported by the block device.
193
194
195 int bdev_integrity_enabled(block_device, int rw);
196
197 bdev_integrity_enabled() will return 1 if the block device
198 supports integrity metadata transfer for the data direction
199 specified in 'rw'.
200
201 bdev_integrity_enabled() honors the write_generate and
202 read_verify flags in sysfs and will respond accordingly.
203
204
205 int bio_integrity_prep(bio);
206
207 To generate IMD for WRITE and to set up buffers for READ, the
208 filesystem must call bio_integrity_prep(bio).
209
210 Prior to calling this function, the bio data direction and start
211 sector must be set, and the bio should have all data pages
212 added. It is up to the caller to ensure that the bio does not
213 change while I/O is in progress.
214
215 bio_integrity_prep() should only be called if
216 bio_integrity_enabled() returned 1.
217
218
219 int bio_integrity_tag_size(bio);
220
221 If the filesystem wants to use the application tag space it will
222 first have to find out how much storage space is available.
223 Because tag space is generally limited (usually 2 bytes per
224 sector regardless of sector size), the integrity framework
225 supports interleaving the information between the sectors in an
226 I/O.
227
228 Filesystems can call bio_integrity_tag_size(bio) to find out how
229 many bytes of storage are available for that particular bio.
230
231 Another option is bdev_get_tag_size(block_device) which will
232 return the number of available bytes per hardware sector.
233
234
235 int bio_integrity_set_tag(bio, void *tag_buf, len);
236
237 After a successful return from bio_integrity_prep(),
238 bio_integrity_set_tag() can be used to attach an opaque tag
239 buffer to a bio. Obviously this only makes sense if the I/O is
240 a WRITE.
241
242
243 int bio_integrity_get_tag(bio, void *tag_buf, len);
244
245 Similarly, at READ I/O completion time the filesystem can
246 retrieve the tag buffer using bio_integrity_get_tag().
247
248
2496.3 PASSING EXISTING INTEGRITY METADATA
250
251 Filesystems that either generate their own integrity metadata or
252 are capable of transferring IMD from user space can use the
253 following calls:
254
255
256 struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);
257
258 Allocates the bio integrity payload and hangs it off of the bio.
259 nr_pages indicate how many pages of protection data need to be
260 stored in the integrity bio_vec list (similar to bio_alloc()).
261
262 The integrity payload will be freed at bio_free() time.
263
264
265 int bio_integrity_add_page(bio, page, len, offset);
266
267 Attaches a page containing integrity metadata to an existing
268 bio. The bio must have an existing bip,
269 i.e. bio_integrity_alloc() must have been called. For a WRITE,
270 the integrity metadata in the pages must be in a format
271 understood by the target device with the notable exception that
272 the sector numbers will be remapped as the request traverses the
273 I/O stack. This implies that the pages added using this call
274 will be modified during I/O! The first reference tag in the
275 integrity metadata must have a value of bip->bip_sector.
276
277 Pages can be added using bio_integrity_add_page() as long as
278 there is room in the bip bio_vec array (nr_pages).
279
280 Upon completion of a READ operation, the attached pages will
281 contain the integrity metadata received from the storage device.
282 It is up to the receiver to process them and verify data
283 integrity upon completion.
284
285
2866.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
287 METADATA
288
289 To enable integrity exchange on a block device the gendisk must be
290 registered as capable:
291
292 int blk_integrity_register(gendisk, blk_integrity);
293
294 The blk_integrity struct is a template and should contain the
295 following:
296
297 static struct blk_integrity my_profile = {
298 .name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
299 .generate_fn = my_generate_fn,
300 .verify_fn = my_verify_fn,
301 .get_tag_fn = my_get_tag_fn,
302 .set_tag_fn = my_set_tag_fn,
303 .tuple_size = sizeof(struct my_tuple_size),
304 .tag_size = <tag bytes per hw sector>,
305 };
306
307 'name' is a text string which will be visible in sysfs. This is
308 part of the userland API so chose it carefully and never change
309 it. The format is standards body-type-variant.
310 E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
311
312 'generate_fn' generates appropriate integrity metadata (for WRITE).
313
314 'verify_fn' verifies that the data buffer matches the integrity
315 metadata.
316
317 'tuple_size' must be set to match the size of the integrity
318 metadata per sector. I.e. 8 for DIF and EPP.
319
320 'tag_size' must be set to identify how many bytes of tag space
321 are available per hardware sector. For DIF this is either 2 or
322 0 depending on the value of the Control Mode Page ATO bit.
323
324 See 6.2 for a description of get_tag_fn and set_tag_fn.
325
326----------------------------------------------------------------------
3272007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/Kconfig b/block/Kconfig
index 3e97f2bc446f..1ab7c15c8d7a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -81,6 +81,18 @@ config BLK_DEV_BSG
81 81
82 If unsure, say N. 82 If unsure, say N.
83 83
84config BLK_DEV_INTEGRITY
85 bool "Block layer data integrity support"
86 ---help---
87 Some storage devices allow extra information to be
88 stored/retrieved to help protect the data. The block layer
89 data integrity option provides hooks which can be used by
90 filesystems to ensure better data integrity.
91
92 Say yes here if you have a storage device that provides the
93 T10/SCSI Data Integrity Field or the T13/ATA External Path
94 Protection. If in doubt, say N.
95
84endif # BLOCK 96endif # BLOCK
85 97
86config BLOCK_COMPAT 98config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d79594..208000b0750d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
4 4
5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o 7 blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
8 cmd-filter.o
8 9
9obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
10obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
@@ -14,3 +15,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
14 15
15obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 16obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
16obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 17obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
18obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 743f33a01a07..9735acb5b4f5 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -151,6 +151,7 @@ enum arq_state {
151 151
152static DEFINE_PER_CPU(unsigned long, ioc_count); 152static DEFINE_PER_CPU(unsigned long, ioc_count);
153static struct completion *ioc_gone; 153static struct completion *ioc_gone;
154static DEFINE_SPINLOCK(ioc_gone_lock);
154 155
155static void as_move_to_dispatch(struct as_data *ad, struct request *rq); 156static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
156static void as_antic_stop(struct as_data *ad); 157static void as_antic_stop(struct as_data *ad);
@@ -164,8 +165,19 @@ static void free_as_io_context(struct as_io_context *aic)
164{ 165{
165 kfree(aic); 166 kfree(aic);
166 elv_ioc_count_dec(ioc_count); 167 elv_ioc_count_dec(ioc_count);
167 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 168 if (ioc_gone) {
168 complete(ioc_gone); 169 /*
170 * AS scheduler is exiting, grab exit lock and check
171 * the pending io context count. If it hits zero,
172 * complete ioc_gone and set it back to NULL.
173 */
174 spin_lock(&ioc_gone_lock);
175 if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
176 complete(ioc_gone);
177 ioc_gone = NULL;
178 }
179 spin_unlock(&ioc_gone_lock);
180 }
169} 181}
170 182
171static void as_trim(struct io_context *ioc) 183static void as_trim(struct io_context *ioc)
@@ -1493,7 +1505,7 @@ static void __exit as_exit(void)
1493 /* ioc_gone's update must be visible before reading ioc_count */ 1505 /* ioc_gone's update must be visible before reading ioc_count */
1494 smp_wmb(); 1506 smp_wmb();
1495 if (elv_ioc_count_read(ioc_count)) 1507 if (elv_ioc_count_read(ioc_count))
1496 wait_for_completion(ioc_gone); 1508 wait_for_completion(&all_gone);
1497 synchronize_rcu(); 1509 synchronize_rcu();
1498} 1510}
1499 1511
diff --git a/block/blk-core.c b/block/blk-core.c
index 1905aaba49fb..dbc7f42b5d2b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
143 143
144 bio->bi_size -= nbytes; 144 bio->bi_size -= nbytes;
145 bio->bi_sector += (nbytes >> 9); 145 bio->bi_sector += (nbytes >> 9);
146
147 if (bio_integrity(bio))
148 bio_integrity_advance(bio, nbytes);
149
146 if (bio->bi_size == 0) 150 if (bio->bi_size == 0)
147 bio_endio(bio, error); 151 bio_endio(bio, error);
148 } else { 152 } else {
@@ -201,8 +205,7 @@ void blk_plug_device(struct request_queue *q)
201 if (blk_queue_stopped(q)) 205 if (blk_queue_stopped(q))
202 return; 206 return;
203 207
204 if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { 208 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
205 __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
206 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 209 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
207 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 210 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
208 } 211 }
@@ -217,10 +220,9 @@ int blk_remove_plug(struct request_queue *q)
217{ 220{
218 WARN_ON(!irqs_disabled()); 221 WARN_ON(!irqs_disabled());
219 222
220 if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 223 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
221 return 0; 224 return 0;
222 225
223 queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
224 del_timer(&q->unplug_timer); 226 del_timer(&q->unplug_timer);
225 return 1; 227 return 1;
226} 228}
@@ -324,8 +326,7 @@ void blk_start_queue(struct request_queue *q)
324 * one level of recursion is ok and is much faster than kicking 326 * one level of recursion is ok and is much faster than kicking
325 * the unplug handling 327 * the unplug handling
326 */ 328 */
327 if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 329 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
328 queue_flag_set(QUEUE_FLAG_REENTER, q);
329 q->request_fn(q); 330 q->request_fn(q);
330 queue_flag_clear(QUEUE_FLAG_REENTER, q); 331 queue_flag_clear(QUEUE_FLAG_REENTER, q);
331 } else { 332 } else {
@@ -390,8 +391,7 @@ void __blk_run_queue(struct request_queue *q)
390 * handling reinvoke the handler shortly if we already got there. 391 * handling reinvoke the handler shortly if we already got there.
391 */ 392 */
392 if (!elv_queue_empty(q)) { 393 if (!elv_queue_empty(q)) {
393 if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 394 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
394 queue_flag_set(QUEUE_FLAG_REENTER, q);
395 q->request_fn(q); 395 q->request_fn(q);
396 queue_flag_clear(QUEUE_FLAG_REENTER, q); 396 queue_flag_clear(QUEUE_FLAG_REENTER, q);
397 } else { 397 } else {
@@ -1381,6 +1381,9 @@ end_io:
1381 */ 1381 */
1382 blk_partition_remap(bio); 1382 blk_partition_remap(bio);
1383 1383
1384 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1385 goto end_io;
1386
1384 if (old_sector != -1) 1387 if (old_sector != -1)
1385 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 1388 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
1386 old_sector); 1389 old_sector);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
new file mode 100644
index 000000000000..3f1a8478cc38
--- /dev/null
+++ b/block/blk-integrity.c
@@ -0,0 +1,381 @@
1/*
2 * blk-integrity.c - Block layer data integrity extensions
3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19 * USA.
20 *
21 */
22
23#include <linux/blkdev.h>
24#include <linux/mempool.h>
25#include <linux/bio.h>
26#include <linux/scatterlist.h>
27
28#include "blk.h"
29
30static struct kmem_cache *integrity_cachep;
31
32/**
33 * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
34 * @rq: request with integrity metadata attached
35 *
36 * Description: Returns the number of elements required in a
37 * scatterlist corresponding to the integrity metadata in a request.
38 */
39int blk_rq_count_integrity_sg(struct request *rq)
40{
41 struct bio_vec *iv, *ivprv;
42 struct req_iterator iter;
43 unsigned int segments;
44
45 ivprv = NULL;
46 segments = 0;
47
48 rq_for_each_integrity_segment(iv, rq, iter) {
49
50 if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
51 segments++;
52
53 ivprv = iv;
54 }
55
56 return segments;
57}
58EXPORT_SYMBOL(blk_rq_count_integrity_sg);
59
60/**
61 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
62 * @rq: request with integrity metadata attached
63 * @sglist: target scatterlist
64 *
65 * Description: Map the integrity vectors in request into a
66 * scatterlist. The scatterlist must be big enough to hold all
67 * elements. I.e. sized using blk_rq_count_integrity_sg().
68 */
69int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
70{
71 struct bio_vec *iv, *ivprv;
72 struct req_iterator iter;
73 struct scatterlist *sg;
74 unsigned int segments;
75
76 ivprv = NULL;
77 sg = NULL;
78 segments = 0;
79
80 rq_for_each_integrity_segment(iv, rq, iter) {
81
82 if (ivprv) {
83 if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
84 goto new_segment;
85
86 sg->length += iv->bv_len;
87 } else {
88new_segment:
89 if (!sg)
90 sg = sglist;
91 else {
92 sg->page_link &= ~0x02;
93 sg = sg_next(sg);
94 }
95
96 sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
97 segments++;
98 }
99
100 ivprv = iv;
101 }
102
103 if (sg)
104 sg_mark_end(sg);
105
106 return segments;
107}
108EXPORT_SYMBOL(blk_rq_map_integrity_sg);
109
110/**
111 * blk_integrity_compare - Compare integrity profile of two block devices
112 * @b1: Device to compare
113 * @b2: Device to compare
114 *
115 * Description: Meta-devices like DM and MD need to verify that all
116 * sub-devices use the same integrity format before advertising to
117 * upper layers that they can send/receive integrity metadata. This
118 * function can be used to check whether two block devices have
119 * compatible integrity formats.
120 */
121int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
122{
123 struct blk_integrity *b1 = bd1->bd_disk->integrity;
124 struct blk_integrity *b2 = bd2->bd_disk->integrity;
125
126 BUG_ON(bd1->bd_disk == NULL);
127 BUG_ON(bd2->bd_disk == NULL);
128
129 if (!b1 || !b2)
130 return 0;
131
132 if (b1->sector_size != b2->sector_size) {
133 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
134 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
135 b1->sector_size, b2->sector_size);
136 return -1;
137 }
138
139 if (b1->tuple_size != b2->tuple_size) {
140 printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
141 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
142 b1->tuple_size, b2->tuple_size);
143 return -1;
144 }
145
146 if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
147 printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
148 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
149 b1->tag_size, b2->tag_size);
150 return -1;
151 }
152
153 if (strcmp(b1->name, b2->name)) {
154 printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
155 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
156 b1->name, b2->name);
157 return -1;
158 }
159
160 return 0;
161}
162EXPORT_SYMBOL(blk_integrity_compare);
163
164struct integrity_sysfs_entry {
165 struct attribute attr;
166 ssize_t (*show)(struct blk_integrity *, char *);
167 ssize_t (*store)(struct blk_integrity *, const char *, size_t);
168};
169
170static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
171 char *page)
172{
173 struct blk_integrity *bi =
174 container_of(kobj, struct blk_integrity, kobj);
175 struct integrity_sysfs_entry *entry =
176 container_of(attr, struct integrity_sysfs_entry, attr);
177
178 return entry->show(bi, page);
179}
180
181static ssize_t integrity_attr_store(struct kobject *kobj,
182 struct attribute *attr, const char *page,
183 size_t count)
184{
185 struct blk_integrity *bi =
186 container_of(kobj, struct blk_integrity, kobj);
187 struct integrity_sysfs_entry *entry =
188 container_of(attr, struct integrity_sysfs_entry, attr);
189 ssize_t ret = 0;
190
191 if (entry->store)
192 ret = entry->store(bi, page, count);
193
194 return ret;
195}
196
197static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
198{
199 if (bi != NULL && bi->name != NULL)
200 return sprintf(page, "%s\n", bi->name);
201 else
202 return sprintf(page, "none\n");
203}
204
205static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
206{
207 if (bi != NULL)
208 return sprintf(page, "%u\n", bi->tag_size);
209 else
210 return sprintf(page, "0\n");
211}
212
213static ssize_t integrity_read_store(struct blk_integrity *bi,
214 const char *page, size_t count)
215{
216 char *p = (char *) page;
217 unsigned long val = simple_strtoul(p, &p, 10);
218
219 if (val)
220 bi->flags |= INTEGRITY_FLAG_READ;
221 else
222 bi->flags &= ~INTEGRITY_FLAG_READ;
223
224 return count;
225}
226
227static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
228{
229 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
230}
231
232static ssize_t integrity_write_store(struct blk_integrity *bi,
233 const char *page, size_t count)
234{
235 char *p = (char *) page;
236 unsigned long val = simple_strtoul(p, &p, 10);
237
238 if (val)
239 bi->flags |= INTEGRITY_FLAG_WRITE;
240 else
241 bi->flags &= ~INTEGRITY_FLAG_WRITE;
242
243 return count;
244}
245
246static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
247{
248 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
249}
250
251static struct integrity_sysfs_entry integrity_format_entry = {
252 .attr = { .name = "format", .mode = S_IRUGO },
253 .show = integrity_format_show,
254};
255
256static struct integrity_sysfs_entry integrity_tag_size_entry = {
257 .attr = { .name = "tag_size", .mode = S_IRUGO },
258 .show = integrity_tag_size_show,
259};
260
261static struct integrity_sysfs_entry integrity_read_entry = {
262 .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
263 .show = integrity_read_show,
264 .store = integrity_read_store,
265};
266
267static struct integrity_sysfs_entry integrity_write_entry = {
268 .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
269 .show = integrity_write_show,
270 .store = integrity_write_store,
271};
272
273static struct attribute *integrity_attrs[] = {
274 &integrity_format_entry.attr,
275 &integrity_tag_size_entry.attr,
276 &integrity_read_entry.attr,
277 &integrity_write_entry.attr,
278 NULL,
279};
280
281static struct sysfs_ops integrity_ops = {
282 .show = &integrity_attr_show,
283 .store = &integrity_attr_store,
284};
285
286static int __init blk_dev_integrity_init(void)
287{
288 integrity_cachep = kmem_cache_create("blkdev_integrity",
289 sizeof(struct blk_integrity),
290 0, SLAB_PANIC, NULL);
291 return 0;
292}
293subsys_initcall(blk_dev_integrity_init);
294
295static void blk_integrity_release(struct kobject *kobj)
296{
297 struct blk_integrity *bi =
298 container_of(kobj, struct blk_integrity, kobj);
299
300 kmem_cache_free(integrity_cachep, bi);
301}
302
303static struct kobj_type integrity_ktype = {
304 .default_attrs = integrity_attrs,
305 .sysfs_ops = &integrity_ops,
306 .release = blk_integrity_release,
307};
308
309/**
310 * blk_integrity_register - Register a gendisk as being integrity-capable
311 * @disk: struct gendisk pointer to make integrity-aware
312 * @template: integrity profile
313 *
314 * Description: When a device needs to advertise itself as being able
315 * to send/receive integrity metadata it must use this function to
316 * register the capability with the block layer. The template is a
317 * blk_integrity struct with values appropriate for the underlying
318 * hardware. See Documentation/block/data-integrity.txt.
319 */
320int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
321{
322 struct blk_integrity *bi;
323
324 BUG_ON(disk == NULL);
325 BUG_ON(template == NULL);
326
327 if (disk->integrity == NULL) {
328 bi = kmem_cache_alloc(integrity_cachep,
329 GFP_KERNEL | __GFP_ZERO);
330 if (!bi)
331 return -1;
332
333 if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
334 &disk->dev.kobj, "%s", "integrity")) {
335 kmem_cache_free(integrity_cachep, bi);
336 return -1;
337 }
338
339 kobject_uevent(&bi->kobj, KOBJ_ADD);
340
341 bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
342 bi->sector_size = disk->queue->hardsect_size;
343 disk->integrity = bi;
344 } else
345 bi = disk->integrity;
346
347 /* Use the provided profile as template */
348 bi->name = template->name;
349 bi->generate_fn = template->generate_fn;
350 bi->verify_fn = template->verify_fn;
351 bi->tuple_size = template->tuple_size;
352 bi->set_tag_fn = template->set_tag_fn;
353 bi->get_tag_fn = template->get_tag_fn;
354 bi->tag_size = template->tag_size;
355
356 return 0;
357}
358EXPORT_SYMBOL(blk_integrity_register);
359
360/**
361 * blk_integrity_unregister - Remove block integrity profile
362 * @disk: disk whose integrity profile to deallocate
363 *
364 * Description: This function frees all memory used by the block
365 * integrity profile. To be called at device teardown.
366 */
367void blk_integrity_unregister(struct gendisk *disk)
368{
369 struct blk_integrity *bi;
370
371 if (!disk || !disk->integrity)
372 return;
373
374 bi = disk->integrity;
375
376 kobject_uevent(&bi->kobj, KOBJ_REMOVE);
377 kobject_del(&bi->kobj);
378 kobject_put(&disk->dev.kobj);
379 kmem_cache_free(integrity_cachep, bi);
380}
381EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-map.c b/block/blk-map.c
index 0b1af5a3537c..ddd96fb11a7d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -210,6 +210,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
210 if (!bio_flagged(bio, BIO_USER_MAPPED)) 210 if (!bio_flagged(bio, BIO_USER_MAPPED))
211 rq->cmd_flags |= REQ_COPY_USER; 211 rq->cmd_flags |= REQ_COPY_USER;
212 212
213 blk_queue_bounce(q, &bio);
213 bio_get(bio); 214 bio_get(bio);
214 blk_rq_bio_prep(q, rq, bio); 215 blk_rq_bio_prep(q, rq, bio);
215 rq->buffer = rq->data = NULL; 216 rq->buffer = rq->data = NULL;
@@ -268,6 +269,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
268 int reading = rq_data_dir(rq) == READ; 269 int reading = rq_data_dir(rq) == READ;
269 int do_copy = 0; 270 int do_copy = 0;
270 struct bio *bio; 271 struct bio *bio;
272 unsigned long stack_mask = ~(THREAD_SIZE - 1);
271 273
272 if (len > (q->max_hw_sectors << 9)) 274 if (len > (q->max_hw_sectors << 9))
273 return -EINVAL; 275 return -EINVAL;
@@ -278,6 +280,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
278 alignment = queue_dma_alignment(q) | q->dma_pad_mask; 280 alignment = queue_dma_alignment(q) | q->dma_pad_mask;
279 do_copy = ((kaddr & alignment) || (len & alignment)); 281 do_copy = ((kaddr & alignment) || (len & alignment));
280 282
283 if (!((kaddr & stack_mask) ^
284 ((unsigned long)current->stack & stack_mask)))
285 do_copy = 1;
286
281 if (do_copy) 287 if (do_copy)
282 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 288 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
283 else 289 else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 651136aae76e..5efc9e7a68b7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
441 || next->special) 441 || next->special)
442 return 0; 442 return 0;
443 443
444 if (blk_integrity_rq(req) != blk_integrity_rq(next))
445 return 0;
446
444 /* 447 /*
445 * If we are allowed to merge, then append bio list 448 * If we are allowed to merge, then append bio list
446 * from next to rq and release next. merge_requests_fn 449 * from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8dd86418f35d..dfc77012843f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -302,11 +302,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
302 * @q: the request queue for the device 302 * @q: the request queue for the device
303 * @mask: pad mask 303 * @mask: pad mask
304 * 304 *
305 * Set pad mask. Direct IO requests are padded to the mask specified. 305 * Set dma pad mask.
306 * 306 *
307 * Appending pad buffer to a request modifies ->data_len such that it 307 * Appending pad buffer to a request modifies the last entry of a
308 * includes the pad buffer. The original requested data length can be 308 * scatter list such that it includes the pad buffer.
309 * obtained using blk_rq_raw_data_len().
310 **/ 309 **/
311void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) 310void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
312{ 311{
@@ -315,6 +314,23 @@ void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
315EXPORT_SYMBOL(blk_queue_dma_pad); 314EXPORT_SYMBOL(blk_queue_dma_pad);
316 315
317/** 316/**
317 * blk_queue_update_dma_pad - update pad mask
318 * @q: the request queue for the device
319 * @mask: pad mask
320 *
321 * Update dma pad mask.
322 *
323 * Appending pad buffer to a request modifies the last entry of a
324 * scatter list such that it includes the pad buffer.
325 **/
326void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
327{
328 if (mask > q->dma_pad_mask)
329 q->dma_pad_mask = mask;
330}
331EXPORT_SYMBOL(blk_queue_update_dma_pad);
332
333/**
318 * blk_queue_dma_drain - Set up a drain buffer for excess dma. 334 * blk_queue_dma_drain - Set up a drain buffer for excess dma.
319 * @q: the request queue for the device 335 * @q: the request queue for the device
320 * @dma_drain_needed: fn which returns non-zero if drain is necessary 336 * @dma_drain_needed: fn which returns non-zero if drain is necessary
diff --git a/block/blk.h b/block/blk.h
index 59776ab4742a..c79f30e1df52 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
51 return q->nr_congestion_off; 51 return q->nr_congestion_off;
52} 52}
53 53
54#if defined(CONFIG_BLK_DEV_INTEGRITY)
55
56#define rq_for_each_integrity_segment(bvl, _rq, _iter) \
57 __rq_for_each_bio(_iter.bio, _rq) \
58 bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
59
60#endif /* BLK_DEV_INTEGRITY */
61
54#endif 62#endif
diff --git a/block/blktrace.c b/block/blktrace.c
index 8d3a27780260..eb9651ccb241 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -244,6 +244,7 @@ err:
244static void blk_trace_cleanup(struct blk_trace *bt) 244static void blk_trace_cleanup(struct blk_trace *bt)
245{ 245{
246 relay_close(bt->rchan); 246 relay_close(bt->rchan);
247 debugfs_remove(bt->msg_file);
247 debugfs_remove(bt->dropped_file); 248 debugfs_remove(bt->dropped_file);
248 blk_remove_tree(bt->dir); 249 blk_remove_tree(bt->dir);
249 free_percpu(bt->sequence); 250 free_percpu(bt->sequence);
@@ -291,6 +292,44 @@ static const struct file_operations blk_dropped_fops = {
291 .read = blk_dropped_read, 292 .read = blk_dropped_read,
292}; 293};
293 294
295static int blk_msg_open(struct inode *inode, struct file *filp)
296{
297 filp->private_data = inode->i_private;
298
299 return 0;
300}
301
302static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
303 size_t count, loff_t *ppos)
304{
305 char *msg;
306 struct blk_trace *bt;
307
308 if (count > BLK_TN_MAX_MSG)
309 return -EINVAL;
310
311 msg = kmalloc(count, GFP_KERNEL);
312 if (msg == NULL)
313 return -ENOMEM;
314
315 if (copy_from_user(msg, buffer, count)) {
316 kfree(msg);
317 return -EFAULT;
318 }
319
320 bt = filp->private_data;
321 __trace_note_message(bt, "%s", msg);
322 kfree(msg);
323
324 return count;
325}
326
327static const struct file_operations blk_msg_fops = {
328 .owner = THIS_MODULE,
329 .open = blk_msg_open,
330 .write = blk_msg_write,
331};
332
294/* 333/*
295 * Keep track of how many times we encountered a full subbuffer, to aid 334 * Keep track of how many times we encountered a full subbuffer, to aid
296 * the user space app in telling how many lost events there were. 335 * the user space app in telling how many lost events there were.
@@ -380,6 +419,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
380 if (!bt->dropped_file) 419 if (!bt->dropped_file)
381 goto err; 420 goto err;
382 421
422 bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
423 if (!bt->msg_file)
424 goto err;
425
383 bt->rchan = relay_open("trace", dir, buts->buf_size, 426 bt->rchan = relay_open("trace", dir, buts->buf_size,
384 buts->buf_nr, &blk_relay_callbacks, bt); 427 buts->buf_nr, &blk_relay_callbacks, bt);
385 if (!bt->rchan) 428 if (!bt->rchan)
@@ -409,6 +452,8 @@ err:
409 if (dir) 452 if (dir)
410 blk_remove_tree(dir); 453 blk_remove_tree(dir);
411 if (bt) { 454 if (bt) {
455 if (bt->msg_file)
456 debugfs_remove(bt->msg_file);
412 if (bt->dropped_file) 457 if (bt->dropped_file)
413 debugfs_remove(bt->dropped_file); 458 debugfs_remove(bt->dropped_file);
414 free_percpu(bt->sequence); 459 free_percpu(bt->sequence);
diff --git a/block/bsg.c b/block/bsg.c
index 54d617f7df3e..93e757d7174b 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -44,11 +44,12 @@ struct bsg_device {
44 char name[BUS_ID_SIZE]; 44 char name[BUS_ID_SIZE];
45 int max_queue; 45 int max_queue;
46 unsigned long flags; 46 unsigned long flags;
47 struct blk_scsi_cmd_filter *cmd_filter;
48 mode_t *f_mode;
47}; 49};
48 50
49enum { 51enum {
50 BSG_F_BLOCK = 1, 52 BSG_F_BLOCK = 1,
51 BSG_F_WRITE_PERM = 2,
52}; 53};
53 54
54#define BSG_DEFAULT_CMDS 64 55#define BSG_DEFAULT_CMDS 64
@@ -172,7 +173,7 @@ unlock:
172} 173}
173 174
174static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, 175static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
175 struct sg_io_v4 *hdr, int has_write_perm) 176 struct sg_io_v4 *hdr, struct bsg_device *bd)
176{ 177{
177 if (hdr->request_len > BLK_MAX_CDB) { 178 if (hdr->request_len > BLK_MAX_CDB) {
178 rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); 179 rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,8 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
185 return -EFAULT; 186 return -EFAULT;
186 187
187 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { 188 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
188 if (blk_verify_command(rq->cmd, has_write_perm)) 189 if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
190 bd->f_mode))
189 return -EPERM; 191 return -EPERM;
190 } else if (!capable(CAP_SYS_RAWIO)) 192 } else if (!capable(CAP_SYS_RAWIO))
191 return -EPERM; 193 return -EPERM;
@@ -263,8 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
263 rq = blk_get_request(q, rw, GFP_KERNEL); 265 rq = blk_get_request(q, rw, GFP_KERNEL);
264 if (!rq) 266 if (!rq)
265 return ERR_PTR(-ENOMEM); 267 return ERR_PTR(-ENOMEM);
266 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM, 268 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
267 &bd->flags));
268 if (ret) 269 if (ret)
269 goto out; 270 goto out;
270 271
@@ -566,12 +567,23 @@ static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
566 set_bit(BSG_F_BLOCK, &bd->flags); 567 set_bit(BSG_F_BLOCK, &bd->flags);
567} 568}
568 569
569static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file) 570static void bsg_set_cmd_filter(struct bsg_device *bd,
571 struct file *file)
570{ 572{
571 if (file->f_mode & FMODE_WRITE) 573 struct inode *inode;
572 set_bit(BSG_F_WRITE_PERM, &bd->flags); 574 struct gendisk *disk;
573 else 575
574 clear_bit(BSG_F_WRITE_PERM, &bd->flags); 576 if (!file)
577 return;
578
579 inode = file->f_dentry->d_inode;
580 if (!inode)
581 return;
582
583 disk = inode->i_bdev->bd_disk;
584
585 bd->cmd_filter = &disk->cmd_filter;
586 bd->f_mode = &file->f_mode;
575} 587}
576 588
577/* 589/*
@@ -595,6 +607,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
595 dprintk("%s: read %Zd bytes\n", bd->name, count); 607 dprintk("%s: read %Zd bytes\n", bd->name, count);
596 608
597 bsg_set_block(bd, file); 609 bsg_set_block(bd, file);
610 bsg_set_cmd_filter(bd, file);
611
598 bytes_read = 0; 612 bytes_read = 0;
599 ret = __bsg_read(buf, count, bd, NULL, &bytes_read); 613 ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
600 *ppos = bytes_read; 614 *ppos = bytes_read;
@@ -668,7 +682,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
668 dprintk("%s: write %Zd bytes\n", bd->name, count); 682 dprintk("%s: write %Zd bytes\n", bd->name, count);
669 683
670 bsg_set_block(bd, file); 684 bsg_set_block(bd, file);
671 bsg_set_write_perm(bd, file); 685 bsg_set_cmd_filter(bd, file);
672 686
673 bytes_written = 0; 687 bytes_written = 0;
674 ret = __bsg_write(bd, buf, count, &bytes_written); 688 ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -772,7 +786,9 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
772 } 786 }
773 787
774 bd->queue = rq; 788 bd->queue = rq;
789
775 bsg_set_block(bd, file); 790 bsg_set_block(bd, file);
791 bsg_set_cmd_filter(bd, file);
776 792
777 atomic_set(&bd->ref_count, 1); 793 atomic_set(&bd->ref_count, 1);
778 mutex_lock(&bsg_mutex); 794 mutex_lock(&bsg_mutex);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d01b411c72f0..1e2aff812ee2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -11,6 +11,7 @@
11#include <linux/elevator.h> 11#include <linux/elevator.h>
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/ioprio.h> 13#include <linux/ioprio.h>
14#include <linux/blktrace_api.h>
14 15
15/* 16/*
16 * tunables 17 * tunables
@@ -41,13 +42,14 @@ static int cfq_slice_idle = HZ / 125;
41 42
42#define RQ_CIC(rq) \ 43#define RQ_CIC(rq) \
43 ((struct cfq_io_context *) (rq)->elevator_private) 44 ((struct cfq_io_context *) (rq)->elevator_private)
44#define RQ_CFQQ(rq) ((rq)->elevator_private2) 45#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
45 46
46static struct kmem_cache *cfq_pool; 47static struct kmem_cache *cfq_pool;
47static struct kmem_cache *cfq_ioc_pool; 48static struct kmem_cache *cfq_ioc_pool;
48 49
49static DEFINE_PER_CPU(unsigned long, ioc_count); 50static DEFINE_PER_CPU(unsigned long, ioc_count);
50static struct completion *ioc_gone; 51static struct completion *ioc_gone;
52static DEFINE_SPINLOCK(ioc_gone_lock);
51 53
52#define CFQ_PRIO_LISTS IOPRIO_BE_NR 54#define CFQ_PRIO_LISTS IOPRIO_BE_NR
53#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 55#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -155,6 +157,7 @@ struct cfq_queue {
155 unsigned short ioprio, org_ioprio; 157 unsigned short ioprio, org_ioprio;
156 unsigned short ioprio_class, org_ioprio_class; 158 unsigned short ioprio_class, org_ioprio_class;
157 159
160 pid_t pid;
158}; 161};
159 162
160enum cfqq_state_flags { 163enum cfqq_state_flags {
@@ -198,6 +201,11 @@ CFQ_CFQQ_FNS(slice_new);
198CFQ_CFQQ_FNS(sync); 201CFQ_CFQQ_FNS(sync);
199#undef CFQ_CFQQ_FNS 202#undef CFQ_CFQQ_FNS
200 203
204#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
205 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
206#define cfq_log(cfqd, fmt, args...) \
207 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
208
201static void cfq_dispatch_insert(struct request_queue *, struct request *); 209static void cfq_dispatch_insert(struct request_queue *, struct request *);
202static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, 210static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
203 struct io_context *, gfp_t); 211 struct io_context *, gfp_t);
@@ -234,8 +242,10 @@ static inline int cfq_bio_sync(struct bio *bio)
234 */ 242 */
235static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 243static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
236{ 244{
237 if (cfqd->busy_queues) 245 if (cfqd->busy_queues) {
246 cfq_log(cfqd, "schedule dispatch");
238 kblockd_schedule_work(&cfqd->unplug_work); 247 kblockd_schedule_work(&cfqd->unplug_work);
248 }
239} 249}
240 250
241static int cfq_queue_empty(struct request_queue *q) 251static int cfq_queue_empty(struct request_queue *q)
@@ -270,6 +280,7 @@ static inline void
270cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 280cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
271{ 281{
272 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 282 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
283 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
273} 284}
274 285
275/* 286/*
@@ -539,6 +550,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
539 */ 550 */
540static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 551static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
541{ 552{
553 cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
542 BUG_ON(cfq_cfqq_on_rr(cfqq)); 554 BUG_ON(cfq_cfqq_on_rr(cfqq));
543 cfq_mark_cfqq_on_rr(cfqq); 555 cfq_mark_cfqq_on_rr(cfqq);
544 cfqd->busy_queues++; 556 cfqd->busy_queues++;
@@ -552,6 +564,7 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
552 */ 564 */
553static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 565static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
554{ 566{
567 cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
555 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 568 BUG_ON(!cfq_cfqq_on_rr(cfqq));
556 cfq_clear_cfqq_on_rr(cfqq); 569 cfq_clear_cfqq_on_rr(cfqq);
557 570
@@ -638,6 +651,8 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
638 struct cfq_data *cfqd = q->elevator->elevator_data; 651 struct cfq_data *cfqd = q->elevator->elevator_data;
639 652
640 cfqd->rq_in_driver++; 653 cfqd->rq_in_driver++;
654 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
655 cfqd->rq_in_driver);
641 656
642 /* 657 /*
643 * If the depth is larger 1, it really could be queueing. But lets 658 * If the depth is larger 1, it really could be queueing. But lets
@@ -657,6 +672,8 @@ static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
657 672
658 WARN_ON(!cfqd->rq_in_driver); 673 WARN_ON(!cfqd->rq_in_driver);
659 cfqd->rq_in_driver--; 674 cfqd->rq_in_driver--;
675 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
676 cfqd->rq_in_driver);
660} 677}
661 678
662static void cfq_remove_request(struct request *rq) 679static void cfq_remove_request(struct request *rq)
@@ -746,6 +763,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
746 struct cfq_queue *cfqq) 763 struct cfq_queue *cfqq)
747{ 764{
748 if (cfqq) { 765 if (cfqq) {
766 cfq_log_cfqq(cfqd, cfqq, "set_active");
749 cfqq->slice_end = 0; 767 cfqq->slice_end = 0;
750 cfq_clear_cfqq_must_alloc_slice(cfqq); 768 cfq_clear_cfqq_must_alloc_slice(cfqq);
751 cfq_clear_cfqq_fifo_expire(cfqq); 769 cfq_clear_cfqq_fifo_expire(cfqq);
@@ -763,6 +781,8 @@ static void
763__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 781__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
764 int timed_out) 782 int timed_out)
765{ 783{
784 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
785
766 if (cfq_cfqq_wait_request(cfqq)) 786 if (cfq_cfqq_wait_request(cfqq))
767 del_timer(&cfqd->idle_slice_timer); 787 del_timer(&cfqd->idle_slice_timer);
768 788
@@ -772,8 +792,10 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
772 /* 792 /*
773 * store what was left of this slice, if the queue idled/timed out 793 * store what was left of this slice, if the queue idled/timed out
774 */ 794 */
775 if (timed_out && !cfq_cfqq_slice_new(cfqq)) 795 if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
776 cfqq->slice_resid = cfqq->slice_end - jiffies; 796 cfqq->slice_resid = cfqq->slice_end - jiffies;
797 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
798 }
777 799
778 cfq_resort_rr_list(cfqd, cfqq); 800 cfq_resort_rr_list(cfqd, cfqq);
779 801
@@ -866,6 +888,12 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
866 return; 888 return;
867 889
868 /* 890 /*
891 * still requests with the driver, don't idle
892 */
893 if (cfqd->rq_in_driver)
894 return;
895
896 /*
869 * task has exited, don't wait 897 * task has exited, don't wait
870 */ 898 */
871 cic = cfqd->active_cic; 899 cic = cfqd->active_cic;
@@ -892,6 +920,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
892 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 920 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
893 921
894 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 922 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
923 cfq_log(cfqd, "arm_idle: %lu", sl);
895} 924}
896 925
897/* 926/*
@@ -902,6 +931,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
902 struct cfq_data *cfqd = q->elevator->elevator_data; 931 struct cfq_data *cfqd = q->elevator->elevator_data;
903 struct cfq_queue *cfqq = RQ_CFQQ(rq); 932 struct cfq_queue *cfqq = RQ_CFQQ(rq);
904 933
934 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
935
905 cfq_remove_request(rq); 936 cfq_remove_request(rq);
906 cfqq->dispatched++; 937 cfqq->dispatched++;
907 elv_dispatch_sort(q, rq); 938 elv_dispatch_sort(q, rq);
@@ -931,8 +962,9 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
931 rq = rq_entry_fifo(cfqq->fifo.next); 962 rq = rq_entry_fifo(cfqq->fifo.next);
932 963
933 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) 964 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
934 return NULL; 965 rq = NULL;
935 966
967 cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
936 return rq; 968 return rq;
937} 969}
938 970
@@ -1072,6 +1104,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
1072 1104
1073 BUG_ON(cfqd->busy_queues); 1105 BUG_ON(cfqd->busy_queues);
1074 1106
1107 cfq_log(cfqd, "forced_dispatch=%d\n", dispatched);
1075 return dispatched; 1108 return dispatched;
1076} 1109}
1077 1110
@@ -1112,6 +1145,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1112 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1145 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1113 } 1146 }
1114 1147
1148 cfq_log(cfqd, "dispatched=%d", dispatched);
1115 return dispatched; 1149 return dispatched;
1116} 1150}
1117 1151
@@ -1130,6 +1164,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1130 if (!atomic_dec_and_test(&cfqq->ref)) 1164 if (!atomic_dec_and_test(&cfqq->ref))
1131 return; 1165 return;
1132 1166
1167 cfq_log_cfqq(cfqd, cfqq, "put_queue");
1133 BUG_ON(rb_first(&cfqq->sort_list)); 1168 BUG_ON(rb_first(&cfqq->sort_list));
1134 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1169 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
1135 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1170 BUG_ON(cfq_cfqq_on_rr(cfqq));
@@ -1177,8 +1212,19 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
1177 kmem_cache_free(cfq_ioc_pool, cic); 1212 kmem_cache_free(cfq_ioc_pool, cic);
1178 elv_ioc_count_dec(ioc_count); 1213 elv_ioc_count_dec(ioc_count);
1179 1214
1180 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 1215 if (ioc_gone) {
1181 complete(ioc_gone); 1216 /*
1217 * CFQ scheduler is exiting, grab exit lock and check
1218 * the pending io context count. If it hits zero,
1219 * complete ioc_gone and set it back to NULL
1220 */
1221 spin_lock(&ioc_gone_lock);
1222 if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
1223 complete(ioc_gone);
1224 ioc_gone = NULL;
1225 }
1226 spin_unlock(&ioc_gone_lock);
1227 }
1182} 1228}
1183 1229
1184static void cfq_cic_free(struct cfq_io_context *cic) 1230static void cfq_cic_free(struct cfq_io_context *cic)
@@ -1427,6 +1473,8 @@ retry:
1427 cfq_mark_cfqq_idle_window(cfqq); 1473 cfq_mark_cfqq_idle_window(cfqq);
1428 cfq_mark_cfqq_sync(cfqq); 1474 cfq_mark_cfqq_sync(cfqq);
1429 } 1475 }
1476 cfqq->pid = current->pid;
1477 cfq_log_cfqq(cfqd, cfqq, "alloced");
1430 } 1478 }
1431 1479
1432 if (new_cfqq) 1480 if (new_cfqq)
@@ -1675,7 +1723,7 @@ static void
1675cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1723cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1676 struct cfq_io_context *cic) 1724 struct cfq_io_context *cic)
1677{ 1725{
1678 int enable_idle; 1726 int old_idle, enable_idle;
1679 1727
1680 /* 1728 /*
1681 * Don't idle for async or idle io prio class 1729 * Don't idle for async or idle io prio class
@@ -1683,7 +1731,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1683 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) 1731 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
1684 return; 1732 return;
1685 1733
1686 enable_idle = cfq_cfqq_idle_window(cfqq); 1734 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
1687 1735
1688 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 1736 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
1689 (cfqd->hw_tag && CIC_SEEKY(cic))) 1737 (cfqd->hw_tag && CIC_SEEKY(cic)))
@@ -1695,10 +1743,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1695 enable_idle = 1; 1743 enable_idle = 1;
1696 } 1744 }
1697 1745
1698 if (enable_idle) 1746 if (old_idle != enable_idle) {
1699 cfq_mark_cfqq_idle_window(cfqq); 1747 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
1700 else 1748 if (enable_idle)
1701 cfq_clear_cfqq_idle_window(cfqq); 1749 cfq_mark_cfqq_idle_window(cfqq);
1750 else
1751 cfq_clear_cfqq_idle_window(cfqq);
1752 }
1702} 1753}
1703 1754
1704/* 1755/*
@@ -1757,6 +1808,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1757 */ 1808 */
1758static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1809static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1759{ 1810{
1811 cfq_log_cfqq(cfqd, cfqq, "preempt");
1760 cfq_slice_expired(cfqd, 1); 1812 cfq_slice_expired(cfqd, 1);
1761 1813
1762 /* 1814 /*
@@ -1818,6 +1870,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
1818 struct cfq_data *cfqd = q->elevator->elevator_data; 1870 struct cfq_data *cfqd = q->elevator->elevator_data;
1819 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1871 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1820 1872
1873 cfq_log_cfqq(cfqd, cfqq, "insert_request");
1821 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); 1874 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
1822 1875
1823 cfq_add_rq_rb(rq); 1876 cfq_add_rq_rb(rq);
@@ -1835,6 +1888,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
1835 unsigned long now; 1888 unsigned long now;
1836 1889
1837 now = jiffies; 1890 now = jiffies;
1891 cfq_log_cfqq(cfqd, cfqq, "complete");
1838 1892
1839 WARN_ON(!cfqd->rq_in_driver); 1893 WARN_ON(!cfqd->rq_in_driver);
1840 WARN_ON(!cfqq->dispatched); 1894 WARN_ON(!cfqq->dispatched);
@@ -2004,6 +2058,7 @@ queue_fail:
2004 2058
2005 cfq_schedule_dispatch(cfqd); 2059 cfq_schedule_dispatch(cfqd);
2006 spin_unlock_irqrestore(q->queue_lock, flags); 2060 spin_unlock_irqrestore(q->queue_lock, flags);
2061 cfq_log(cfqd, "set_request fail");
2007 return 1; 2062 return 1;
2008} 2063}
2009 2064
@@ -2029,6 +2084,8 @@ static void cfq_idle_slice_timer(unsigned long data)
2029 unsigned long flags; 2084 unsigned long flags;
2030 int timed_out = 1; 2085 int timed_out = 1;
2031 2086
2087 cfq_log(cfqd, "idle timer fired");
2088
2032 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2089 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2033 2090
2034 cfqq = cfqd->active_queue; 2091 cfqq = cfqd->active_queue;
@@ -2317,7 +2374,7 @@ static void __exit cfq_exit(void)
2317 * pending RCU callbacks 2374 * pending RCU callbacks
2318 */ 2375 */
2319 if (elv_ioc_count_read(ioc_count)) 2376 if (elv_ioc_count_read(ioc_count))
2320 wait_for_completion(ioc_gone); 2377 wait_for_completion(&all_gone);
2321 cfq_slab_kill(); 2378 cfq_slab_kill();
2322} 2379}
2323 2380
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 000000000000..eec4404fd357
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,334 @@
1/*
2 * Copyright 2004 Peter M. Jones <pjones@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 *
18 */
19
20#include <linux/list.h>
21#include <linux/genhd.h>
22#include <linux/spinlock.h>
23#include <linux/parser.h>
24#include <linux/capability.h>
25#include <linux/bitops.h>
26
27#include <scsi/scsi.h>
28#include <linux/cdrom.h>
29
30int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
31 unsigned char *cmd, mode_t *f_mode)
32{
33 /* root can do any command. */
34 if (capable(CAP_SYS_RAWIO))
35 return 0;
36
37 /* if there's no filter set, assume we're filtering everything out */
38 if (!filter)
39 return -EPERM;
40
41 /* Anybody who can open the device can do a read-safe command */
42 if (test_bit(cmd[0], filter->read_ok))
43 return 0;
44
45 /* Write-safe commands require a writable open */
46 if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
47 return 0;
48
49 return -EPERM;
50}
51EXPORT_SYMBOL(blk_cmd_filter_verify_command);
52
53int blk_verify_command(struct file *file, unsigned char *cmd)
54{
55 struct gendisk *disk;
56 struct inode *inode;
57
58 if (!file)
59 return -EINVAL;
60
61 inode = file->f_dentry->d_inode;
62 if (!inode)
63 return -EINVAL;
64
65 disk = inode->i_bdev->bd_disk;
66
67 return blk_cmd_filter_verify_command(&disk->cmd_filter,
68 cmd, &file->f_mode);
69}
70EXPORT_SYMBOL(blk_verify_command);
71
72/* and now, the sysfs stuff */
73static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
74 int rw)
75{
76 char *npage = page;
77 unsigned long *okbits;
78 int i;
79
80 if (rw == READ)
81 okbits = filter->read_ok;
82 else
83 okbits = filter->write_ok;
84
85 for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
86 if (test_bit(i, okbits)) {
87 sprintf(npage, "%02x", i);
88 npage += 2;
89 if (i < BLK_SCSI_MAX_CMDS - 1)
90 sprintf(npage++, " ");
91 }
92 }
93
94 if (npage != page)
95 npage += sprintf(npage, "\n");
96
97 return npage - page;
98}
99
100static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter, char *page)
101{
102 return rcf_cmds_show(filter, page, READ);
103}
104
105static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
106 char *page)
107{
108 return rcf_cmds_show(filter, page, WRITE);
109}
110
111static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
112 const char *page, size_t count, int rw)
113{
114 ssize_t ret = 0;
115 unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
116 int cmd, status, len;
117 substring_t ss;
118
119 memset(&okbits, 0, sizeof(okbits));
120
121 for (len = strlen(page); len > 0; len -= 3) {
122 if (len < 2)
123 break;
124 ss.from = (char *) page + ret;
125 ss.to = (char *) page + ret + 2;
126 ret += 3;
127 status = match_hex(&ss, &cmd);
128 /* either of these cases means invalid input, so do nothing. */
129 if (status || cmd >= BLK_SCSI_MAX_CMDS)
130 return -EINVAL;
131
132 __set_bit(cmd, okbits);
133 }
134
135 if (rw == READ)
136 target_okbits = filter->read_ok;
137 else
138 target_okbits = filter->write_ok;
139
140 memmove(target_okbits, okbits, sizeof(okbits));
141 return count;
142}
143
144static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
145 const char *page, size_t count)
146{
147 return rcf_cmds_store(filter, page, count, READ);
148}
149
150static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
151 const char *page, size_t count)
152{
153 return rcf_cmds_store(filter, page, count, WRITE);
154}
155
156struct rcf_sysfs_entry {
157 struct attribute attr;
158 ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
159 ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
160};
161
162static struct rcf_sysfs_entry rcf_readcmds_entry = {
163 .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
164 .show = rcf_readcmds_show,
165 .store = rcf_readcmds_store,
166};
167
168static struct rcf_sysfs_entry rcf_writecmds_entry = {
169 .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
170 .show = rcf_writecmds_show,
171 .store = rcf_writecmds_store,
172};
173
174static struct attribute *default_attrs[] = {
175 &rcf_readcmds_entry.attr,
176 &rcf_writecmds_entry.attr,
177 NULL,
178};
179
180#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
181
182static ssize_t
183rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
184{
185 struct rcf_sysfs_entry *entry = to_rcf(attr);
186 struct blk_scsi_cmd_filter *filter;
187
188 filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
189 if (entry->show)
190 return entry->show(filter, page);
191
192 return 0;
193}
194
195static ssize_t
196rcf_attr_store(struct kobject *kobj, struct attribute *attr,
197 const char *page, size_t length)
198{
199 struct rcf_sysfs_entry *entry = to_rcf(attr);
200 struct blk_scsi_cmd_filter *filter;
201
202 if (!capable(CAP_SYS_RAWIO))
203 return -EPERM;
204
205 if (!entry->store)
206 return -EINVAL;
207
208 filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
209 return entry->store(filter, page, length);
210}
211
212static struct sysfs_ops rcf_sysfs_ops = {
213 .show = rcf_attr_show,
214 .store = rcf_attr_store,
215};
216
217static struct kobj_type rcf_ktype = {
218 .sysfs_ops = &rcf_sysfs_ops,
219 .default_attrs = default_attrs,
220};
221
222#ifndef MAINTENANCE_IN_CMD
223#define MAINTENANCE_IN_CMD 0xa3
224#endif
225
226static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
227{
228 /* Basic read-only commands */
229 __set_bit(TEST_UNIT_READY, filter->read_ok);
230 __set_bit(REQUEST_SENSE, filter->read_ok);
231 __set_bit(READ_6, filter->read_ok);
232 __set_bit(READ_10, filter->read_ok);
233 __set_bit(READ_12, filter->read_ok);
234 __set_bit(READ_16, filter->read_ok);
235 __set_bit(READ_BUFFER, filter->read_ok);
236 __set_bit(READ_DEFECT_DATA, filter->read_ok);
237 __set_bit(READ_CAPACITY, filter->read_ok);
238 __set_bit(READ_LONG, filter->read_ok);
239 __set_bit(INQUIRY, filter->read_ok);
240 __set_bit(MODE_SENSE, filter->read_ok);
241 __set_bit(MODE_SENSE_10, filter->read_ok);
242 __set_bit(LOG_SENSE, filter->read_ok);
243 __set_bit(START_STOP, filter->read_ok);
244 __set_bit(GPCMD_VERIFY_10, filter->read_ok);
245 __set_bit(VERIFY_16, filter->read_ok);
246 __set_bit(REPORT_LUNS, filter->read_ok);
247 __set_bit(SERVICE_ACTION_IN, filter->read_ok);
248 __set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
249 __set_bit(MAINTENANCE_IN_CMD, filter->read_ok);
250 __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
251
252 /* Audio CD commands */
253 __set_bit(GPCMD_PLAY_CD, filter->read_ok);
254 __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
255 __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
256 __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
257 __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
258
259 /* CD/DVD data reading */
260 __set_bit(GPCMD_READ_CD, filter->read_ok);
261 __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
262 __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
263 __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
264 __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
265 __set_bit(GPCMD_READ_HEADER, filter->read_ok);
266 __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
267 __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
268 __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
269 __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
270 __set_bit(GPCMD_SCAN, filter->read_ok);
271 __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
272 __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
273 __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
274 __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
275 __set_bit(GPCMD_SEEK, filter->read_ok);
276 __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
277
278 /* Basic writing commands */
279 __set_bit(WRITE_6, filter->write_ok);
280 __set_bit(WRITE_10, filter->write_ok);
281 __set_bit(WRITE_VERIFY, filter->write_ok);
282 __set_bit(WRITE_12, filter->write_ok);
283 __set_bit(WRITE_VERIFY_12, filter->write_ok);
284 __set_bit(WRITE_16, filter->write_ok);
285 __set_bit(WRITE_LONG, filter->write_ok);
286 __set_bit(WRITE_LONG_2, filter->write_ok);
287 __set_bit(ERASE, filter->write_ok);
288 __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
289 __set_bit(MODE_SELECT, filter->write_ok);
290 __set_bit(LOG_SELECT, filter->write_ok);
291 __set_bit(GPCMD_BLANK, filter->write_ok);
292 __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
293 __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
294 __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
295 __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
296 __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
297 __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
298 __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
299 __set_bit(GPCMD_SEND_KEY, filter->write_ok);
300 __set_bit(GPCMD_SEND_OPC, filter->write_ok);
301 __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
302 __set_bit(GPCMD_SET_SPEED, filter->write_ok);
303 __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
304 __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
305 __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
306}
307
308int blk_register_filter(struct gendisk *disk)
309{
310 int ret;
311 struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
312 struct kobject *parent = kobject_get(disk->holder_dir->parent);
313
314 if (!parent)
315 return -ENODEV;
316
317 ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
318 "%s", "cmd_filter");
319
320 if (ret < 0)
321 return ret;
322
323 rcf_set_defaults(filter);
324 return 0;
325}
326
327void blk_unregister_filter(struct gendisk *disk)
328{
329 struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
330
331 kobject_put(&filter->kobj);
332 kobject_put(disk->holder_dir->parent);
333}
334
diff --git a/block/elevator.c b/block/elevator.c
index 902dd1344d56..ed6f8f32d27e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -86,6 +86,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
86 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 86 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
87 return 0; 87 return 0;
88 88
89 /*
90 * only merge integrity protected bio into ditto rq
91 */
92 if (bio_integrity(bio) != blk_integrity_rq(rq))
93 return 0;
94
89 if (!elv_iosched_allow_merge(rq, bio)) 95 if (!elv_iosched_allow_merge(rq, bio))
90 return 0; 96 return 0;
91 97
@@ -144,7 +150,7 @@ static struct elevator_type *elevator_get(const char *name)
144 else 150 else
145 sprintf(elv, "%s-iosched", name); 151 sprintf(elv, "%s-iosched", name);
146 152
147 request_module(elv); 153 request_module("%s", elv);
148 spin_lock(&elv_list_lock); 154 spin_lock(&elv_list_lock);
149 e = elevator_find(name); 155 e = elevator_find(name);
150 } 156 }
diff --git a/block/genhd.c b/block/genhd.c
index b922d4801c87..9074f384b097 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
189 disk->minors, NULL, exact_match, exact_lock, disk); 189 disk->minors, NULL, exact_match, exact_lock, disk);
190 register_disk(disk); 190 register_disk(disk);
191 blk_register_queue(disk); 191 blk_register_queue(disk);
192 blk_register_filter(disk);
192 193
193 bdi = &disk->queue->backing_dev_info; 194 bdi = &disk->queue->backing_dev_info;
194 bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); 195 bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
200 201
201void unlink_gendisk(struct gendisk *disk) 202void unlink_gendisk(struct gendisk *disk)
202{ 203{
204 blk_unregister_filter(disk);
203 sysfs_remove_link(&disk->dev.kobj, "bdi"); 205 sysfs_remove_link(&disk->dev.kobj, "bdi");
204 bdi_unregister(&disk->queue->backing_dev_info); 206 bdi_unregister(&disk->queue->backing_dev_info);
205 blk_unregister_queue(disk); 207 blk_unregister_queue(disk);
@@ -400,6 +402,14 @@ static ssize_t disk_removable_show(struct device *dev,
400 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 402 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
401} 403}
402 404
405static ssize_t disk_ro_show(struct device *dev,
406 struct device_attribute *attr, char *buf)
407{
408 struct gendisk *disk = dev_to_disk(dev);
409
410 return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
411}
412
403static ssize_t disk_size_show(struct device *dev, 413static ssize_t disk_size_show(struct device *dev,
404 struct device_attribute *attr, char *buf) 414 struct device_attribute *attr, char *buf)
405{ 415{
@@ -472,6 +482,7 @@ static ssize_t disk_fail_store(struct device *dev,
472 482
473static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 483static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
474static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 484static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
485static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
475static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); 486static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
476static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 487static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
477static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); 488static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
@@ -483,6 +494,7 @@ static struct device_attribute dev_attr_fail =
483static struct attribute *disk_attrs[] = { 494static struct attribute *disk_attrs[] = {
484 &dev_attr_range.attr, 495 &dev_attr_range.attr,
485 &dev_attr_removable.attr, 496 &dev_attr_removable.attr,
497 &dev_attr_ro.attr,
486 &dev_attr_size.attr, 498 &dev_attr_size.attr,
487 &dev_attr_capability.attr, 499 &dev_attr_capability.attr,
488 &dev_attr_stat.attr, 500 &dev_attr_stat.attr,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c08ec92..c5b9bcfc0a6d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
105 return put_user(1, p); 105 return put_user(1, p);
106} 106}
107 107
108#define CMD_READ_SAFE 0x01
109#define CMD_WRITE_SAFE 0x02
110#define CMD_WARNED 0x04
111#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
112#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
113
114int blk_verify_command(unsigned char *cmd, int has_write_perm)
115{
116 static unsigned char cmd_type[256] = {
117
118 /* Basic read-only commands */
119 safe_for_read(TEST_UNIT_READY),
120 safe_for_read(REQUEST_SENSE),
121 safe_for_read(READ_6),
122 safe_for_read(READ_10),
123 safe_for_read(READ_12),
124 safe_for_read(READ_16),
125 safe_for_read(READ_BUFFER),
126 safe_for_read(READ_DEFECT_DATA),
127 safe_for_read(READ_LONG),
128 safe_for_read(INQUIRY),
129 safe_for_read(MODE_SENSE),
130 safe_for_read(MODE_SENSE_10),
131 safe_for_read(LOG_SENSE),
132 safe_for_read(START_STOP),
133 safe_for_read(GPCMD_VERIFY_10),
134 safe_for_read(VERIFY_16),
135
136 /* Audio CD commands */
137 safe_for_read(GPCMD_PLAY_CD),
138 safe_for_read(GPCMD_PLAY_AUDIO_10),
139 safe_for_read(GPCMD_PLAY_AUDIO_MSF),
140 safe_for_read(GPCMD_PLAY_AUDIO_TI),
141 safe_for_read(GPCMD_PAUSE_RESUME),
142
143 /* CD/DVD data reading */
144 safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
145 safe_for_read(GPCMD_READ_CD),
146 safe_for_read(GPCMD_READ_CD_MSF),
147 safe_for_read(GPCMD_READ_DISC_INFO),
148 safe_for_read(GPCMD_READ_CDVD_CAPACITY),
149 safe_for_read(GPCMD_READ_DVD_STRUCTURE),
150 safe_for_read(GPCMD_READ_HEADER),
151 safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
152 safe_for_read(GPCMD_READ_SUBCHANNEL),
153 safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
154 safe_for_read(GPCMD_REPORT_KEY),
155 safe_for_read(GPCMD_SCAN),
156 safe_for_read(GPCMD_GET_CONFIGURATION),
157 safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
158 safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
159 safe_for_read(GPCMD_GET_PERFORMANCE),
160 safe_for_read(GPCMD_SEEK),
161 safe_for_read(GPCMD_STOP_PLAY_SCAN),
162
163 /* Basic writing commands */
164 safe_for_write(WRITE_6),
165 safe_for_write(WRITE_10),
166 safe_for_write(WRITE_VERIFY),
167 safe_for_write(WRITE_12),
168 safe_for_write(WRITE_VERIFY_12),
169 safe_for_write(WRITE_16),
170 safe_for_write(WRITE_LONG),
171 safe_for_write(WRITE_LONG_2),
172 safe_for_write(ERASE),
173 safe_for_write(GPCMD_MODE_SELECT_10),
174 safe_for_write(MODE_SELECT),
175 safe_for_write(LOG_SELECT),
176 safe_for_write(GPCMD_BLANK),
177 safe_for_write(GPCMD_CLOSE_TRACK),
178 safe_for_write(GPCMD_FLUSH_CACHE),
179 safe_for_write(GPCMD_FORMAT_UNIT),
180 safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
181 safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
182 safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
183 safe_for_write(GPCMD_SEND_EVENT),
184 safe_for_write(GPCMD_SEND_KEY),
185 safe_for_write(GPCMD_SEND_OPC),
186 safe_for_write(GPCMD_SEND_CUE_SHEET),
187 safe_for_write(GPCMD_SET_SPEED),
188 safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
189 safe_for_write(GPCMD_LOAD_UNLOAD),
190 safe_for_write(GPCMD_SET_STREAMING),
191 };
192 unsigned char type = cmd_type[cmd[0]];
193
194 /* Anybody who can open the device can do a read-safe command */
195 if (type & CMD_READ_SAFE)
196 return 0;
197
198 /* Write-safe commands just require a writable open.. */
199 if ((type & CMD_WRITE_SAFE) && has_write_perm)
200 return 0;
201
202 /* And root can do any command.. */
203 if (capable(CAP_SYS_RAWIO))
204 return 0;
205
206 if (!type) {
207 cmd_type[cmd[0]] = CMD_WARNED;
208 printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
209 }
210
211 /* Otherwise fail it with an "Operation not permitted" */
212 return -EPERM;
213}
214EXPORT_SYMBOL_GPL(blk_verify_command);
215
216static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, 108static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
217 struct sg_io_hdr *hdr, int has_write_perm) 109 struct sg_io_hdr *hdr, struct file *file)
218{ 110{
219 if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) 111 if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
220 return -EFAULT; 112 return -EFAULT;
221 if (blk_verify_command(rq->cmd, has_write_perm)) 113 if (blk_verify_command(file, rq->cmd))
222 return -EPERM; 114 return -EPERM;
223 115
224 /* 116 /*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
287 struct gendisk *bd_disk, struct sg_io_hdr *hdr) 179 struct gendisk *bd_disk, struct sg_io_hdr *hdr)
288{ 180{
289 unsigned long start_time; 181 unsigned long start_time;
290 int writing = 0, ret = 0, has_write_perm = 0; 182 int writing = 0, ret = 0;
291 struct request *rq; 183 struct request *rq;
292 char sense[SCSI_SENSE_BUFFERSIZE]; 184 char sense[SCSI_SENSE_BUFFERSIZE];
293 struct bio *bio; 185 struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct request_queue *q,
316 if (!rq) 208 if (!rq)
317 return -ENOMEM; 209 return -ENOMEM;
318 210
319 if (file) 211 if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
320 has_write_perm = file->f_mode & FMODE_WRITE;
321
322 if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
323 blk_put_request(rq); 212 blk_put_request(rq);
324 return -EFAULT; 213 return -EFAULT;
325 } 214 }
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
451 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) 340 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
452 goto error; 341 goto error;
453 342
454 err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE); 343 err = blk_verify_command(file, rq->cmd);
455 if (err) 344 if (err)
456 goto error; 345 goto error;
457 346
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 57a43649a461..499ccc628d81 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -885,7 +885,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
885 /* set the min alignment and padding */ 885 /* set the min alignment and padding */
886 blk_queue_update_dma_alignment(sdev->request_queue, 886 blk_queue_update_dma_alignment(sdev->request_queue,
887 ATA_DMA_PAD_SZ - 1); 887 ATA_DMA_PAD_SZ - 1);
888 blk_queue_dma_pad(sdev->request_queue, ATA_DMA_PAD_SZ - 1); 888 blk_queue_update_dma_pad(sdev->request_queue,
889 ATA_DMA_PAD_SZ - 1);
889 890
890 /* configure draining */ 891 /* configure draining */
891 buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL); 892 buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index cd03473f3547..a002a381df92 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6628,15 +6628,18 @@ static void DAC960_DestroyProcEntries(DAC960_Controller_T *Controller)
6628 * DAC960_gam_ioctl is the ioctl function for performing RAID operations. 6628 * DAC960_gam_ioctl is the ioctl function for performing RAID operations.
6629*/ 6629*/
6630 6630
6631static int DAC960_gam_ioctl(struct inode *inode, struct file *file, 6631static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
6632 unsigned int Request, unsigned long Argument) 6632 unsigned long Argument)
6633{ 6633{
6634 int ErrorCode = 0; 6634 long ErrorCode = 0;
6635 if (!capable(CAP_SYS_ADMIN)) return -EACCES; 6635 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
6636
6637 lock_kernel();
6636 switch (Request) 6638 switch (Request)
6637 { 6639 {
6638 case DAC960_IOCTL_GET_CONTROLLER_COUNT: 6640 case DAC960_IOCTL_GET_CONTROLLER_COUNT:
6639 return DAC960_ControllerCount; 6641 ErrorCode = DAC960_ControllerCount;
6642 break;
6640 case DAC960_IOCTL_GET_CONTROLLER_INFO: 6643 case DAC960_IOCTL_GET_CONTROLLER_INFO:
6641 { 6644 {
6642 DAC960_ControllerInfo_T __user *UserSpaceControllerInfo = 6645 DAC960_ControllerInfo_T __user *UserSpaceControllerInfo =
@@ -6644,15 +6647,20 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6644 DAC960_ControllerInfo_T ControllerInfo; 6647 DAC960_ControllerInfo_T ControllerInfo;
6645 DAC960_Controller_T *Controller; 6648 DAC960_Controller_T *Controller;
6646 int ControllerNumber; 6649 int ControllerNumber;
6647 if (UserSpaceControllerInfo == NULL) return -EINVAL; 6650 if (UserSpaceControllerInfo == NULL)
6648 ErrorCode = get_user(ControllerNumber, 6651 ErrorCode = -EINVAL;
6652 else ErrorCode = get_user(ControllerNumber,
6649 &UserSpaceControllerInfo->ControllerNumber); 6653 &UserSpaceControllerInfo->ControllerNumber);
6650 if (ErrorCode != 0) return ErrorCode; 6654 if (ErrorCode != 0)
6655 break;;
6656 ErrorCode = -ENXIO;
6651 if (ControllerNumber < 0 || 6657 if (ControllerNumber < 0 ||
6652 ControllerNumber > DAC960_ControllerCount - 1) 6658 ControllerNumber > DAC960_ControllerCount - 1) {
6653 return -ENXIO; 6659 break;
6660 }
6654 Controller = DAC960_Controllers[ControllerNumber]; 6661 Controller = DAC960_Controllers[ControllerNumber];
6655 if (Controller == NULL) return -ENXIO; 6662 if (Controller == NULL)
6663 break;;
6656 memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T)); 6664 memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
6657 ControllerInfo.ControllerNumber = ControllerNumber; 6665 ControllerInfo.ControllerNumber = ControllerNumber;
6658 ControllerInfo.FirmwareType = Controller->FirmwareType; 6666 ControllerInfo.FirmwareType = Controller->FirmwareType;
@@ -6665,8 +6673,9 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6665 ControllerInfo.PCI_Address = Controller->PCI_Address; 6673 ControllerInfo.PCI_Address = Controller->PCI_Address;
6666 strcpy(ControllerInfo.ModelName, Controller->ModelName); 6674 strcpy(ControllerInfo.ModelName, Controller->ModelName);
6667 strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion); 6675 strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion);
6668 return (copy_to_user(UserSpaceControllerInfo, &ControllerInfo, 6676 ErrorCode = (copy_to_user(UserSpaceControllerInfo, &ControllerInfo,
6669 sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0); 6677 sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0);
6678 break;
6670 } 6679 }
6671 case DAC960_IOCTL_V1_EXECUTE_COMMAND: 6680 case DAC960_IOCTL_V1_EXECUTE_COMMAND:
6672 { 6681 {
@@ -6684,30 +6693,39 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6684 int ControllerNumber, DataTransferLength; 6693 int ControllerNumber, DataTransferLength;
6685 unsigned char *DataTransferBuffer = NULL; 6694 unsigned char *DataTransferBuffer = NULL;
6686 dma_addr_t DataTransferBufferDMA; 6695 dma_addr_t DataTransferBufferDMA;
6687 if (UserSpaceUserCommand == NULL) return -EINVAL; 6696 if (UserSpaceUserCommand == NULL) {
6697 ErrorCode = -EINVAL;
6698 break;
6699 }
6688 if (copy_from_user(&UserCommand, UserSpaceUserCommand, 6700 if (copy_from_user(&UserCommand, UserSpaceUserCommand,
6689 sizeof(DAC960_V1_UserCommand_T))) { 6701 sizeof(DAC960_V1_UserCommand_T))) {
6690 ErrorCode = -EFAULT; 6702 ErrorCode = -EFAULT;
6691 goto Failure1a; 6703 break;
6692 } 6704 }
6693 ControllerNumber = UserCommand.ControllerNumber; 6705 ControllerNumber = UserCommand.ControllerNumber;
6706 ErrorCode = -ENXIO;
6694 if (ControllerNumber < 0 || 6707 if (ControllerNumber < 0 ||
6695 ControllerNumber > DAC960_ControllerCount - 1) 6708 ControllerNumber > DAC960_ControllerCount - 1)
6696 return -ENXIO; 6709 break;
6697 Controller = DAC960_Controllers[ControllerNumber]; 6710 Controller = DAC960_Controllers[ControllerNumber];
6698 if (Controller == NULL) return -ENXIO; 6711 if (Controller == NULL)
6699 if (Controller->FirmwareType != DAC960_V1_Controller) return -EINVAL; 6712 break;
6713 ErrorCode = -EINVAL;
6714 if (Controller->FirmwareType != DAC960_V1_Controller)
6715 break;
6700 CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode; 6716 CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode;
6701 DataTransferLength = UserCommand.DataTransferLength; 6717 DataTransferLength = UserCommand.DataTransferLength;
6702 if (CommandOpcode & 0x80) return -EINVAL; 6718 if (CommandOpcode & 0x80)
6719 break;
6703 if (CommandOpcode == DAC960_V1_DCDB) 6720 if (CommandOpcode == DAC960_V1_DCDB)
6704 { 6721 {
6705 if (copy_from_user(&DCDB, UserCommand.DCDB, 6722 if (copy_from_user(&DCDB, UserCommand.DCDB,
6706 sizeof(DAC960_V1_DCDB_T))) { 6723 sizeof(DAC960_V1_DCDB_T))) {
6707 ErrorCode = -EFAULT; 6724 ErrorCode = -EFAULT;
6708 goto Failure1a; 6725 break;
6709 } 6726 }
6710 if (DCDB.Channel >= DAC960_V1_MaxChannels) return -EINVAL; 6727 if (DCDB.Channel >= DAC960_V1_MaxChannels)
6728 break;
6711 if (!((DataTransferLength == 0 && 6729 if (!((DataTransferLength == 0 &&
6712 DCDB.Direction 6730 DCDB.Direction
6713 == DAC960_V1_DCDB_NoDataTransfer) || 6731 == DAC960_V1_DCDB_NoDataTransfer) ||
@@ -6717,38 +6735,37 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6717 (DataTransferLength < 0 && 6735 (DataTransferLength < 0 &&
6718 DCDB.Direction 6736 DCDB.Direction
6719 == DAC960_V1_DCDB_DataTransferSystemToDevice))) 6737 == DAC960_V1_DCDB_DataTransferSystemToDevice)))
6720 return -EINVAL; 6738 break;
6721 if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength) 6739 if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength)
6722 != abs(DataTransferLength)) 6740 != abs(DataTransferLength))
6723 return -EINVAL; 6741 break;
6724 DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice, 6742 DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice,
6725 sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA); 6743 sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA);
6726 if (DCDB_IOBUF == NULL) 6744 if (DCDB_IOBUF == NULL) {
6727 return -ENOMEM; 6745 ErrorCode = -ENOMEM;
6746 break;
6747 }
6728 } 6748 }
6749 ErrorCode = -ENOMEM;
6729 if (DataTransferLength > 0) 6750 if (DataTransferLength > 0)
6730 { 6751 {
6731 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, 6752 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
6732 DataTransferLength, &DataTransferBufferDMA); 6753 DataTransferLength, &DataTransferBufferDMA);
6733 if (DataTransferBuffer == NULL) { 6754 if (DataTransferBuffer == NULL)
6734 ErrorCode = -ENOMEM; 6755 break;
6735 goto Failure1;
6736 }
6737 memset(DataTransferBuffer, 0, DataTransferLength); 6756 memset(DataTransferBuffer, 0, DataTransferLength);
6738 } 6757 }
6739 else if (DataTransferLength < 0) 6758 else if (DataTransferLength < 0)
6740 { 6759 {
6741 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, 6760 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
6742 -DataTransferLength, &DataTransferBufferDMA); 6761 -DataTransferLength, &DataTransferBufferDMA);
6743 if (DataTransferBuffer == NULL) { 6762 if (DataTransferBuffer == NULL)
6744 ErrorCode = -ENOMEM; 6763 break;
6745 goto Failure1;
6746 }
6747 if (copy_from_user(DataTransferBuffer, 6764 if (copy_from_user(DataTransferBuffer,
6748 UserCommand.DataTransferBuffer, 6765 UserCommand.DataTransferBuffer,
6749 -DataTransferLength)) { 6766 -DataTransferLength)) {
6750 ErrorCode = -EFAULT; 6767 ErrorCode = -EFAULT;
6751 goto Failure1; 6768 break;
6752 } 6769 }
6753 } 6770 }
6754 if (CommandOpcode == DAC960_V1_DCDB) 6771 if (CommandOpcode == DAC960_V1_DCDB)
@@ -6825,8 +6842,7 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6825 if (DCDB_IOBUF != NULL) 6842 if (DCDB_IOBUF != NULL)
6826 pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T), 6843 pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T),
6827 DCDB_IOBUF, DCDB_IOBUFDMA); 6844 DCDB_IOBUF, DCDB_IOBUFDMA);
6828 Failure1a: 6845 break;
6829 return ErrorCode;
6830 } 6846 }
6831 case DAC960_IOCTL_V2_EXECUTE_COMMAND: 6847 case DAC960_IOCTL_V2_EXECUTE_COMMAND:
6832 { 6848 {
@@ -6844,32 +6860,43 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6844 dma_addr_t DataTransferBufferDMA; 6860 dma_addr_t DataTransferBufferDMA;
6845 unsigned char *RequestSenseBuffer = NULL; 6861 unsigned char *RequestSenseBuffer = NULL;
6846 dma_addr_t RequestSenseBufferDMA; 6862 dma_addr_t RequestSenseBufferDMA;
6847 if (UserSpaceUserCommand == NULL) return -EINVAL; 6863
6864 ErrorCode = -EINVAL;
6865 if (UserSpaceUserCommand == NULL)
6866 break;
6848 if (copy_from_user(&UserCommand, UserSpaceUserCommand, 6867 if (copy_from_user(&UserCommand, UserSpaceUserCommand,
6849 sizeof(DAC960_V2_UserCommand_T))) { 6868 sizeof(DAC960_V2_UserCommand_T))) {
6850 ErrorCode = -EFAULT; 6869 ErrorCode = -EFAULT;
6851 goto Failure2a; 6870 break;
6852 } 6871 }
6872 ErrorCode = -ENXIO;
6853 ControllerNumber = UserCommand.ControllerNumber; 6873 ControllerNumber = UserCommand.ControllerNumber;
6854 if (ControllerNumber < 0 || 6874 if (ControllerNumber < 0 ||
6855 ControllerNumber > DAC960_ControllerCount - 1) 6875 ControllerNumber > DAC960_ControllerCount - 1)
6856 return -ENXIO; 6876 break;
6857 Controller = DAC960_Controllers[ControllerNumber]; 6877 Controller = DAC960_Controllers[ControllerNumber];
6858 if (Controller == NULL) return -ENXIO; 6878 if (Controller == NULL)
6859 if (Controller->FirmwareType != DAC960_V2_Controller) return -EINVAL; 6879 break;
6880 if (Controller->FirmwareType != DAC960_V2_Controller){
6881 ErrorCode = -EINVAL;
6882 break;
6883 }
6860 DataTransferLength = UserCommand.DataTransferLength; 6884 DataTransferLength = UserCommand.DataTransferLength;
6885 ErrorCode = -ENOMEM;
6861 if (DataTransferLength > 0) 6886 if (DataTransferLength > 0)
6862 { 6887 {
6863 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, 6888 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
6864 DataTransferLength, &DataTransferBufferDMA); 6889 DataTransferLength, &DataTransferBufferDMA);
6865 if (DataTransferBuffer == NULL) return -ENOMEM; 6890 if (DataTransferBuffer == NULL)
6891 break;
6866 memset(DataTransferBuffer, 0, DataTransferLength); 6892 memset(DataTransferBuffer, 0, DataTransferLength);
6867 } 6893 }
6868 else if (DataTransferLength < 0) 6894 else if (DataTransferLength < 0)
6869 { 6895 {
6870 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, 6896 DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
6871 -DataTransferLength, &DataTransferBufferDMA); 6897 -DataTransferLength, &DataTransferBufferDMA);
6872 if (DataTransferBuffer == NULL) return -ENOMEM; 6898 if (DataTransferBuffer == NULL)
6899 break;
6873 if (copy_from_user(DataTransferBuffer, 6900 if (copy_from_user(DataTransferBuffer,
6874 UserCommand.DataTransferBuffer, 6901 UserCommand.DataTransferBuffer,
6875 -DataTransferLength)) { 6902 -DataTransferLength)) {
@@ -6979,8 +7006,7 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6979 if (RequestSenseBuffer != NULL) 7006 if (RequestSenseBuffer != NULL)
6980 pci_free_consistent(Controller->PCIDevice, RequestSenseLength, 7007 pci_free_consistent(Controller->PCIDevice, RequestSenseLength,
6981 RequestSenseBuffer, RequestSenseBufferDMA); 7008 RequestSenseBuffer, RequestSenseBufferDMA);
6982 Failure2a: 7009 break;
6983 return ErrorCode;
6984 } 7010 }
6985 case DAC960_IOCTL_V2_GET_HEALTH_STATUS: 7011 case DAC960_IOCTL_V2_GET_HEALTH_STATUS:
6986 { 7012 {
@@ -6990,21 +7016,33 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
6990 DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer; 7016 DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer;
6991 DAC960_Controller_T *Controller; 7017 DAC960_Controller_T *Controller;
6992 int ControllerNumber; 7018 int ControllerNumber;
6993 if (UserSpaceGetHealthStatus == NULL) return -EINVAL; 7019 if (UserSpaceGetHealthStatus == NULL) {
7020 ErrorCode = -EINVAL;
7021 break;
7022 }
6994 if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus, 7023 if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus,
6995 sizeof(DAC960_V2_GetHealthStatus_T))) 7024 sizeof(DAC960_V2_GetHealthStatus_T))) {
6996 return -EFAULT; 7025 ErrorCode = -EFAULT;
7026 break;
7027 }
7028 ErrorCode = -ENXIO;
6997 ControllerNumber = GetHealthStatus.ControllerNumber; 7029 ControllerNumber = GetHealthStatus.ControllerNumber;
6998 if (ControllerNumber < 0 || 7030 if (ControllerNumber < 0 ||
6999 ControllerNumber > DAC960_ControllerCount - 1) 7031 ControllerNumber > DAC960_ControllerCount - 1)
7000 return -ENXIO; 7032 break;
7001 Controller = DAC960_Controllers[ControllerNumber]; 7033 Controller = DAC960_Controllers[ControllerNumber];
7002 if (Controller == NULL) return -ENXIO; 7034 if (Controller == NULL)
7003 if (Controller->FirmwareType != DAC960_V2_Controller) return -EINVAL; 7035 break;
7036 if (Controller->FirmwareType != DAC960_V2_Controller) {
7037 ErrorCode = -EINVAL;
7038 break;
7039 }
7004 if (copy_from_user(&HealthStatusBuffer, 7040 if (copy_from_user(&HealthStatusBuffer,
7005 GetHealthStatus.HealthStatusBuffer, 7041 GetHealthStatus.HealthStatusBuffer,
7006 sizeof(DAC960_V2_HealthStatusBuffer_T))) 7042 sizeof(DAC960_V2_HealthStatusBuffer_T))) {
7007 return -EFAULT; 7043 ErrorCode = -EFAULT;
7044 break;
7045 }
7008 while (Controller->V2.HealthStatusBuffer->StatusChangeCounter 7046 while (Controller->V2.HealthStatusBuffer->StatusChangeCounter
7009 == HealthStatusBuffer.StatusChangeCounter && 7047 == HealthStatusBuffer.StatusChangeCounter &&
7010 Controller->V2.HealthStatusBuffer->NextEventSequenceNumber 7048 Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
@@ -7012,21 +7050,28 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
7012 { 7050 {
7013 interruptible_sleep_on_timeout(&Controller->HealthStatusWaitQueue, 7051 interruptible_sleep_on_timeout(&Controller->HealthStatusWaitQueue,
7014 DAC960_MonitoringTimerInterval); 7052 DAC960_MonitoringTimerInterval);
7015 if (signal_pending(current)) return -EINTR; 7053 if (signal_pending(current)) {
7054 ErrorCode = -EINTR;
7055 break;
7056 }
7016 } 7057 }
7017 if (copy_to_user(GetHealthStatus.HealthStatusBuffer, 7058 if (copy_to_user(GetHealthStatus.HealthStatusBuffer,
7018 Controller->V2.HealthStatusBuffer, 7059 Controller->V2.HealthStatusBuffer,
7019 sizeof(DAC960_V2_HealthStatusBuffer_T))) 7060 sizeof(DAC960_V2_HealthStatusBuffer_T)))
7020 return -EFAULT; 7061 ErrorCode = -EFAULT;
7021 return 0; 7062 else
7063 ErrorCode = 0;
7022 } 7064 }
7065 default:
7066 ErrorCode = -ENOTTY;
7023 } 7067 }
7024 return -EINVAL; 7068 unlock_kernel();
7069 return ErrorCode;
7025} 7070}
7026 7071
7027static const struct file_operations DAC960_gam_fops = { 7072static const struct file_operations DAC960_gam_fops = {
7028 .owner = THIS_MODULE, 7073 .owner = THIS_MODULE,
7029 .ioctl = DAC960_gam_ioctl 7074 .unlocked_ioctl = DAC960_gam_ioctl
7030}; 7075};
7031 7076
7032static struct miscdevice DAC960_gam_dev = { 7077static struct miscdevice DAC960_gam_dev = {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 41f818be2f7e..2f1746295d06 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1003,7 +1003,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
1003 * Enough people have their dip switches set backwards to 1003 * Enough people have their dip switches set backwards to
1004 * warrant a loud message for this special case. 1004 * warrant a loud message for this special case.
1005 */ 1005 */
1006 aoemajor = be16_to_cpu(get_unaligned(&h->major)); 1006 aoemajor = get_unaligned_be16(&h->major);
1007 if (aoemajor == 0xfff) { 1007 if (aoemajor == 0xfff) {
1008 printk(KERN_ERR "aoe: Warning: shelf address is all ones. " 1008 printk(KERN_ERR "aoe: Warning: shelf address is all ones. "
1009 "Check shelf dip switches.\n"); 1009 "Check shelf dip switches.\n");
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 8b9549ab4a4e..27455ee1e9da 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -146,6 +146,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
146#include <linux/mtio.h> 146#include <linux/mtio.h>
147#include <linux/device.h> 147#include <linux/device.h>
148#include <linux/sched.h> /* current, TASK_*, schedule_timeout() */ 148#include <linux/sched.h> /* current, TASK_*, schedule_timeout() */
149#include <linux/smp_lock.h>
149 150
150#include <asm/uaccess.h> 151#include <asm/uaccess.h>
151 152
@@ -189,8 +190,7 @@ module_param_array(drive3, int, NULL, 0);
189#define ATAPI_LOG_SENSE 0x4d 190#define ATAPI_LOG_SENSE 0x4d
190 191
191static int pt_open(struct inode *inode, struct file *file); 192static int pt_open(struct inode *inode, struct file *file);
192static int pt_ioctl(struct inode *inode, struct file *file, 193static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
193 unsigned int cmd, unsigned long arg);
194static int pt_release(struct inode *inode, struct file *file); 194static int pt_release(struct inode *inode, struct file *file);
195static ssize_t pt_read(struct file *filp, char __user *buf, 195static ssize_t pt_read(struct file *filp, char __user *buf,
196 size_t count, loff_t * ppos); 196 size_t count, loff_t * ppos);
@@ -236,7 +236,7 @@ static const struct file_operations pt_fops = {
236 .owner = THIS_MODULE, 236 .owner = THIS_MODULE,
237 .read = pt_read, 237 .read = pt_read,
238 .write = pt_write, 238 .write = pt_write,
239 .ioctl = pt_ioctl, 239 .unlocked_ioctl = pt_ioctl,
240 .open = pt_open, 240 .open = pt_open,
241 .release = pt_release, 241 .release = pt_release,
242}; 242};
@@ -685,8 +685,7 @@ out:
685 return err; 685 return err;
686} 686}
687 687
688static int pt_ioctl(struct inode *inode, struct file *file, 688static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
689 unsigned int cmd, unsigned long arg)
690{ 689{
691 struct pt_unit *tape = file->private_data; 690 struct pt_unit *tape = file->private_data;
692 struct mtop __user *p = (void __user *)arg; 691 struct mtop __user *p = (void __user *)arg;
@@ -700,23 +699,26 @@ static int pt_ioctl(struct inode *inode, struct file *file,
700 switch (mtop.mt_op) { 699 switch (mtop.mt_op) {
701 700
702 case MTREW: 701 case MTREW:
702 lock_kernel();
703 pt_rewind(tape); 703 pt_rewind(tape);
704 unlock_kernel();
704 return 0; 705 return 0;
705 706
706 case MTWEOF: 707 case MTWEOF:
708 lock_kernel();
707 pt_write_fm(tape); 709 pt_write_fm(tape);
710 unlock_kernel();
708 return 0; 711 return 0;
709 712
710 default: 713 default:
711 printk("%s: Unimplemented mt_op %d\n", tape->name, 714 /* FIXME: rate limit ?? */
715 printk(KERN_DEBUG "%s: Unimplemented mt_op %d\n", tape->name,
712 mtop.mt_op); 716 mtop.mt_op);
713 return -EINVAL; 717 return -EINVAL;
714 } 718 }
715 719
716 default: 720 default:
717 printk("%s: Unimplemented ioctl 0x%x\n", tape->name, cmd); 721 return -ENOTTY;
718 return -EINVAL;
719
720 } 722 }
721} 723}
722 724
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 3ba1df93e9e3..45bee918c46a 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -49,6 +49,7 @@
49#include <linux/types.h> 49#include <linux/types.h>
50#include <linux/kernel.h> 50#include <linux/kernel.h>
51#include <linux/kthread.h> 51#include <linux/kthread.h>
52#include <linux/smp_lock.h>
52#include <linux/errno.h> 53#include <linux/errno.h>
53#include <linux/spinlock.h> 54#include <linux/spinlock.h>
54#include <linux/file.h> 55#include <linux/file.h>
@@ -2079,7 +2080,6 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
2079 unsigned char buf[64]; 2080 unsigned char buf[64];
2080 int ret; 2081 int ret;
2081 2082
2082 memset(buf, 0, sizeof(buf));
2083 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); 2083 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
2084 cgc.sense = &sense; 2084 cgc.sense = &sense;
2085 cgc.buflen = pd->mode_offset + 12; 2085 cgc.buflen = pd->mode_offset + 12;
@@ -2126,7 +2126,6 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
2126 unsigned char *cap_buf; 2126 unsigned char *cap_buf;
2127 int ret, offset; 2127 int ret, offset;
2128 2128
2129 memset(buf, 0, sizeof(buf));
2130 cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset]; 2129 cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
2131 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN); 2130 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
2132 cgc.sense = &sense; 2131 cgc.sense = &sense;
@@ -2633,11 +2632,12 @@ end_io:
2633 2632
2634 2633
2635 2634
2636static int pkt_merge_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *bvec) 2635static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
2636 struct bio_vec *bvec)
2637{ 2637{
2638 struct pktcdvd_device *pd = q->queuedata; 2638 struct pktcdvd_device *pd = q->queuedata;
2639 sector_t zone = ZONE(bio->bi_sector, pd); 2639 sector_t zone = ZONE(bmd->bi_sector, pd);
2640 int used = ((bio->bi_sector - zone) << 9) + bio->bi_size; 2640 int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size;
2641 int remaining = (pd->settings.size << 9) - used; 2641 int remaining = (pd->settings.size << 9) - used;
2642 int remaining2; 2642 int remaining2;
2643 2643
@@ -2645,7 +2645,7 @@ static int pkt_merge_bvec(struct request_queue *q, struct bio *bio, struct bio_v
2645 * A bio <= PAGE_SIZE must be allowed. If it crosses a packet 2645 * A bio <= PAGE_SIZE must be allowed. If it crosses a packet
2646 * boundary, pkt_make_request() will split the bio. 2646 * boundary, pkt_make_request() will split the bio.
2647 */ 2647 */
2648 remaining2 = PAGE_SIZE - bio->bi_size; 2648 remaining2 = PAGE_SIZE - bmd->bi_size;
2649 remaining = max(remaining, remaining2); 2649 remaining = max(remaining, remaining2);
2650 2650
2651 BUG_ON(remaining < 0); 2651 BUG_ON(remaining < 0);
@@ -2796,9 +2796,14 @@ out_mem:
2796 return ret; 2796 return ret;
2797} 2797}
2798 2798
2799static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) 2799static long pkt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2800{ 2800{
2801 struct pktcdvd_device *pd = inode->i_bdev->bd_disk->private_data; 2801 struct inode *inode = file->f_path.dentry->d_inode;
2802 struct pktcdvd_device *pd;
2803 long ret;
2804
2805 lock_kernel();
2806 pd = inode->i_bdev->bd_disk->private_data;
2802 2807
2803 VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, imajor(inode), iminor(inode)); 2808 VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, imajor(inode), iminor(inode));
2804 2809
@@ -2811,7 +2816,8 @@ static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
2811 case CDROM_LAST_WRITTEN: 2816 case CDROM_LAST_WRITTEN:
2812 case CDROM_SEND_PACKET: 2817 case CDROM_SEND_PACKET:
2813 case SCSI_IOCTL_SEND_COMMAND: 2818 case SCSI_IOCTL_SEND_COMMAND:
2814 return blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg); 2819 ret = blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
2820 break;
2815 2821
2816 case CDROMEJECT: 2822 case CDROMEJECT:
2817 /* 2823 /*
@@ -2820,14 +2826,15 @@ static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
2820 */ 2826 */
2821 if (pd->refcnt == 1) 2827 if (pd->refcnt == 1)
2822 pkt_lock_door(pd, 0); 2828 pkt_lock_door(pd, 0);
2823 return blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg); 2829 ret = blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
2830 break;
2824 2831
2825 default: 2832 default:
2826 VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd); 2833 VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd);
2827 return -ENOTTY; 2834 ret = -ENOTTY;
2828 } 2835 }
2829 2836 unlock_kernel();
2830 return 0; 2837 return ret;
2831} 2838}
2832 2839
2833static int pkt_media_changed(struct gendisk *disk) 2840static int pkt_media_changed(struct gendisk *disk)
@@ -2849,7 +2856,7 @@ static struct block_device_operations pktcdvd_ops = {
2849 .owner = THIS_MODULE, 2856 .owner = THIS_MODULE,
2850 .open = pkt_open, 2857 .open = pkt_open,
2851 .release = pkt_close, 2858 .release = pkt_close,
2852 .ioctl = pkt_ioctl, 2859 .unlocked_ioctl = pkt_ioctl,
2853 .media_changed = pkt_media_changed, 2860 .media_changed = pkt_media_changed,
2854}; 2861};
2855 2862
@@ -3014,7 +3021,8 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
3014 mutex_unlock(&ctl_mutex); 3021 mutex_unlock(&ctl_mutex);
3015} 3022}
3016 3023
3017static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) 3024static long pkt_ctl_ioctl(struct file *file, unsigned int cmd,
3025 unsigned long arg)
3018{ 3026{
3019 void __user *argp = (void __user *)arg; 3027 void __user *argp = (void __user *)arg;
3020 struct pkt_ctrl_command ctrl_cmd; 3028 struct pkt_ctrl_command ctrl_cmd;
@@ -3031,16 +3039,22 @@ static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cm
3031 case PKT_CTRL_CMD_SETUP: 3039 case PKT_CTRL_CMD_SETUP:
3032 if (!capable(CAP_SYS_ADMIN)) 3040 if (!capable(CAP_SYS_ADMIN))
3033 return -EPERM; 3041 return -EPERM;
3042 lock_kernel();
3034 ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev); 3043 ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev);
3035 ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev); 3044 ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev);
3045 unlock_kernel();
3036 break; 3046 break;
3037 case PKT_CTRL_CMD_TEARDOWN: 3047 case PKT_CTRL_CMD_TEARDOWN:
3038 if (!capable(CAP_SYS_ADMIN)) 3048 if (!capable(CAP_SYS_ADMIN))
3039 return -EPERM; 3049 return -EPERM;
3050 lock_kernel();
3040 ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev)); 3051 ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev));
3052 unlock_kernel();
3041 break; 3053 break;
3042 case PKT_CTRL_CMD_STATUS: 3054 case PKT_CTRL_CMD_STATUS:
3055 lock_kernel();
3043 pkt_get_status(&ctrl_cmd); 3056 pkt_get_status(&ctrl_cmd);
3057 unlock_kernel();
3044 break; 3058 break;
3045 default: 3059 default:
3046 return -ENOTTY; 3060 return -ENOTTY;
@@ -3053,7 +3067,7 @@ static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cm
3053 3067
3054 3068
3055static const struct file_operations pkt_ctl_fops = { 3069static const struct file_operations pkt_ctl_fops = {
3056 .ioctl = pkt_ctl_ioctl, 3070 .unlocked_ioctl = pkt_ctl_ioctl,
3057 .owner = THIS_MODULE, 3071 .owner = THIS_MODULE,
3058}; 3072};
3059 3073
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f2fff5799ddf..9ae05c584234 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -38,6 +38,7 @@
38#include <linux/interrupt.h> 38#include <linux/interrupt.h>
39#include <linux/blkdev.h> 39#include <linux/blkdev.h>
40#include <linux/hdreg.h> 40#include <linux/hdreg.h>
41#include <linux/cdrom.h>
41#include <linux/module.h> 42#include <linux/module.h>
42 43
43#include <xen/xenbus.h> 44#include <xen/xenbus.h>
@@ -153,6 +154,40 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
153 return 0; 154 return 0;
154} 155}
155 156
157int blkif_ioctl(struct inode *inode, struct file *filep,
158 unsigned command, unsigned long argument)
159{
160 struct blkfront_info *info =
161 inode->i_bdev->bd_disk->private_data;
162 int i;
163
164 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
165 command, (long)argument);
166
167 switch (command) {
168 case CDROMMULTISESSION:
169 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
170 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
171 if (put_user(0, (char __user *)(argument + i)))
172 return -EFAULT;
173 return 0;
174
175 case CDROM_GET_CAPABILITY: {
176 struct gendisk *gd = info->gd;
177 if (gd->flags & GENHD_FL_CD)
178 return 0;
179 return -EINVAL;
180 }
181
182 default:
183 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
184 command);*/
185 return -EINVAL; /* same return as native Linux */
186 }
187
188 return 0;
189}
190
156/* 191/*
157 * blkif_queue_request 192 * blkif_queue_request
158 * 193 *
@@ -324,6 +359,9 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
324 /* Make sure buffer addresses are sector-aligned. */ 359 /* Make sure buffer addresses are sector-aligned. */
325 blk_queue_dma_alignment(rq, 511); 360 blk_queue_dma_alignment(rq, 511);
326 361
362 /* Make sure we don't use bounce buffers. */
363 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
364
327 gd->queue = rq; 365 gd->queue = rq;
328 366
329 return 0; 367 return 0;
@@ -546,7 +584,7 @@ static int setup_blkring(struct xenbus_device *dev,
546 584
547 info->ring_ref = GRANT_INVALID_REF; 585 info->ring_ref = GRANT_INVALID_REF;
548 586
549 sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL); 587 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
550 if (!sring) { 588 if (!sring) {
551 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 589 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
552 return -ENOMEM; 590 return -ENOMEM;
@@ -703,7 +741,8 @@ static int blkif_recover(struct blkfront_info *info)
703 int j; 741 int j;
704 742
705 /* Stage 1: Make a safe copy of the shadow state. */ 743 /* Stage 1: Make a safe copy of the shadow state. */
706 copy = kmalloc(sizeof(info->shadow), GFP_KERNEL); 744 copy = kmalloc(sizeof(info->shadow),
745 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
707 if (!copy) 746 if (!copy)
708 return -ENOMEM; 747 return -ENOMEM;
709 memcpy(copy, info->shadow, sizeof(info->shadow)); 748 memcpy(copy, info->shadow, sizeof(info->shadow));
@@ -959,7 +998,7 @@ static int blkif_release(struct inode *inode, struct file *filep)
959 struct xenbus_device *dev = info->xbdev; 998 struct xenbus_device *dev = info->xbdev;
960 enum xenbus_state state = xenbus_read_driver_state(dev->otherend); 999 enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
961 1000
962 if (state == XenbusStateClosing) 1001 if (state == XenbusStateClosing && info->is_ready)
963 blkfront_closing(dev); 1002 blkfront_closing(dev);
964 } 1003 }
965 return 0; 1004 return 0;
@@ -971,6 +1010,7 @@ static struct block_device_operations xlvbd_block_fops =
971 .open = blkif_open, 1010 .open = blkif_open,
972 .release = blkif_release, 1011 .release = blkif_release,
973 .getgeo = blkif_getgeo, 1012 .getgeo = blkif_getgeo,
1013 .ioctl = blkif_ioctl,
974}; 1014};
975 1015
976 1016
@@ -1006,7 +1046,7 @@ static int __init xlblk_init(void)
1006module_init(xlblk_init); 1046module_init(xlblk_init);
1007 1047
1008 1048
1009static void xlblk_exit(void) 1049static void __exit xlblk_exit(void)
1010{ 1050{
1011 return xenbus_unregister_driver(&blkfront); 1051 return xenbus_unregister_driver(&blkfront);
1012} 1052}
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 69f26eb6415b..a5da35632651 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -461,37 +461,27 @@ int cdrom_get_media_event(struct cdrom_device_info *cdi,
461 struct media_event_desc *med) 461 struct media_event_desc *med)
462{ 462{
463 struct packet_command cgc; 463 struct packet_command cgc;
464 unsigned char *buffer; 464 unsigned char buffer[8];
465 struct event_header *eh; 465 struct event_header *eh = (struct event_header *) buffer;
466 int ret = 1;
467
468 buffer = kmalloc(8, GFP_KERNEL);
469 if (!buffer)
470 return -ENOMEM;
471 466
472 eh = (struct event_header *)buffer; 467 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
473
474 init_cdrom_command(&cgc, buffer, 8, CGC_DATA_READ);
475 cgc.cmd[0] = GPCMD_GET_EVENT_STATUS_NOTIFICATION; 468 cgc.cmd[0] = GPCMD_GET_EVENT_STATUS_NOTIFICATION;
476 cgc.cmd[1] = 1; /* IMMED */ 469 cgc.cmd[1] = 1; /* IMMED */
477 cgc.cmd[4] = 1 << 4; /* media event */ 470 cgc.cmd[4] = 1 << 4; /* media event */
478 cgc.cmd[8] = 8; 471 cgc.cmd[8] = sizeof(buffer);
479 cgc.quiet = 1; 472 cgc.quiet = 1;
480 473
481 if (cdi->ops->generic_packet(cdi, &cgc)) 474 if (cdi->ops->generic_packet(cdi, &cgc))
482 goto err; 475 return 1;
483 476
484 if (be16_to_cpu(eh->data_len) < sizeof(*med)) 477 if (be16_to_cpu(eh->data_len) < sizeof(*med))
485 goto err; 478 return 1;
486 479
487 if (eh->nea || eh->notification_class != 0x4) 480 if (eh->nea || eh->notification_class != 0x4)
488 goto err; 481 return 1;
489 482
490 memcpy(med, buffer + sizeof(*eh), sizeof(*med)); 483 memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
491 ret = 0; 484 return 0;
492err:
493 kfree(buffer);
494 return ret;
495} 485}
496 486
497/* 487/*
@@ -501,82 +491,68 @@ err:
501static int cdrom_mrw_probe_pc(struct cdrom_device_info *cdi) 491static int cdrom_mrw_probe_pc(struct cdrom_device_info *cdi)
502{ 492{
503 struct packet_command cgc; 493 struct packet_command cgc;
504 char *buffer; 494 char buffer[16];
505 int ret = 1;
506
507 buffer = kmalloc(16, GFP_KERNEL);
508 if (!buffer)
509 return -ENOMEM;
510 495
511 init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ); 496 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
512 497
513 cgc.timeout = HZ; 498 cgc.timeout = HZ;
514 cgc.quiet = 1; 499 cgc.quiet = 1;
515 500
516 if (!cdrom_mode_sense(cdi, &cgc, MRW_MODE_PC, 0)) { 501 if (!cdrom_mode_sense(cdi, &cgc, MRW_MODE_PC, 0)) {
517 cdi->mrw_mode_page = MRW_MODE_PC; 502 cdi->mrw_mode_page = MRW_MODE_PC;
518 ret = 0; 503 return 0;
519 } else if (!cdrom_mode_sense(cdi, &cgc, MRW_MODE_PC_PRE1, 0)) { 504 } else if (!cdrom_mode_sense(cdi, &cgc, MRW_MODE_PC_PRE1, 0)) {
520 cdi->mrw_mode_page = MRW_MODE_PC_PRE1; 505 cdi->mrw_mode_page = MRW_MODE_PC_PRE1;
521 ret = 0; 506 return 0;
522 } 507 }
523 kfree(buffer); 508
524 return ret; 509 return 1;
525} 510}
526 511
527static int cdrom_is_mrw(struct cdrom_device_info *cdi, int *write) 512static int cdrom_is_mrw(struct cdrom_device_info *cdi, int *write)
528{ 513{
529 struct packet_command cgc; 514 struct packet_command cgc;
530 struct mrw_feature_desc *mfd; 515 struct mrw_feature_desc *mfd;
531 unsigned char *buffer; 516 unsigned char buffer[16];
532 int ret; 517 int ret;
533 518
534 *write = 0; 519 *write = 0;
535 buffer = kmalloc(16, GFP_KERNEL);
536 if (!buffer)
537 return -ENOMEM;
538 520
539 init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ); 521 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
540 522
541 cgc.cmd[0] = GPCMD_GET_CONFIGURATION; 523 cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
542 cgc.cmd[3] = CDF_MRW; 524 cgc.cmd[3] = CDF_MRW;
543 cgc.cmd[8] = 16; 525 cgc.cmd[8] = sizeof(buffer);
544 cgc.quiet = 1; 526 cgc.quiet = 1;
545 527
546 if ((ret = cdi->ops->generic_packet(cdi, &cgc))) 528 if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
547 goto err; 529 return ret;
548 530
549 mfd = (struct mrw_feature_desc *)&buffer[sizeof(struct feature_header)]; 531 mfd = (struct mrw_feature_desc *)&buffer[sizeof(struct feature_header)];
550 if (be16_to_cpu(mfd->feature_code) != CDF_MRW) { 532 if (be16_to_cpu(mfd->feature_code) != CDF_MRW)
551 ret = 1; 533 return 1;
552 goto err;
553 }
554 *write = mfd->write; 534 *write = mfd->write;
555 535
556 if ((ret = cdrom_mrw_probe_pc(cdi))) { 536 if ((ret = cdrom_mrw_probe_pc(cdi))) {
557 *write = 0; 537 *write = 0;
538 return ret;
558 } 539 }
559err: 540
560 kfree(buffer); 541 return 0;
561 return ret;
562} 542}
563 543
564static int cdrom_mrw_bgformat(struct cdrom_device_info *cdi, int cont) 544static int cdrom_mrw_bgformat(struct cdrom_device_info *cdi, int cont)
565{ 545{
566 struct packet_command cgc; 546 struct packet_command cgc;
567 unsigned char *buffer; 547 unsigned char buffer[12];
568 int ret; 548 int ret;
569 549
570 printk(KERN_INFO "cdrom: %sstarting format\n", cont ? "Re" : ""); 550 printk(KERN_INFO "cdrom: %sstarting format\n", cont ? "Re" : "");
571 551
572 buffer = kmalloc(12, GFP_KERNEL);
573 if (!buffer)
574 return -ENOMEM;
575
576 /* 552 /*
577 * FmtData bit set (bit 4), format type is 1 553 * FmtData bit set (bit 4), format type is 1
578 */ 554 */
579 init_cdrom_command(&cgc, buffer, 12, CGC_DATA_WRITE); 555 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_WRITE);
580 cgc.cmd[0] = GPCMD_FORMAT_UNIT; 556 cgc.cmd[0] = GPCMD_FORMAT_UNIT;
581 cgc.cmd[1] = (1 << 4) | 1; 557 cgc.cmd[1] = (1 << 4) | 1;
582 558
@@ -603,7 +579,6 @@ static int cdrom_mrw_bgformat(struct cdrom_device_info *cdi, int cont)
603 if (ret) 579 if (ret)
604 printk(KERN_INFO "cdrom: bgformat failed\n"); 580 printk(KERN_INFO "cdrom: bgformat failed\n");
605 581
606 kfree(buffer);
607 return ret; 582 return ret;
608} 583}
609 584
@@ -663,17 +638,16 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
663{ 638{
664 struct packet_command cgc; 639 struct packet_command cgc;
665 struct mode_page_header *mph; 640 struct mode_page_header *mph;
666 char *buffer; 641 char buffer[16];
667 int ret, offset, size; 642 int ret, offset, size;
668 643
669 buffer = kmalloc(16, GFP_KERNEL); 644 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
670 if (!buffer)
671 return -ENOMEM;
672 645
673 init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ); 646 cgc.buffer = buffer;
647 cgc.buflen = sizeof(buffer);
674 648
675 if ((ret = cdrom_mode_sense(cdi, &cgc, cdi->mrw_mode_page, 0))) 649 if ((ret = cdrom_mode_sense(cdi, &cgc, cdi->mrw_mode_page, 0)))
676 goto err; 650 return ret;
677 651
678 mph = (struct mode_page_header *) buffer; 652 mph = (struct mode_page_header *) buffer;
679 offset = be16_to_cpu(mph->desc_length); 653 offset = be16_to_cpu(mph->desc_length);
@@ -683,70 +657,55 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
683 cgc.buflen = size; 657 cgc.buflen = size;
684 658
685 if ((ret = cdrom_mode_select(cdi, &cgc))) 659 if ((ret = cdrom_mode_select(cdi, &cgc)))
686 goto err; 660 return ret;
687 661
688 printk(KERN_INFO "cdrom: %s: mrw address space %s selected\n", cdi->name, mrw_address_space[space]); 662 printk(KERN_INFO "cdrom: %s: mrw address space %s selected\n", cdi->name, mrw_address_space[space]);
689 ret = 0; 663 return 0;
690err:
691 kfree(buffer);
692 return ret;
693} 664}
694 665
695static int cdrom_get_random_writable(struct cdrom_device_info *cdi, 666static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
696 struct rwrt_feature_desc *rfd) 667 struct rwrt_feature_desc *rfd)
697{ 668{
698 struct packet_command cgc; 669 struct packet_command cgc;
699 char *buffer; 670 char buffer[24];
700 int ret; 671 int ret;
701 672
702 buffer = kmalloc(24, GFP_KERNEL); 673 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
703 if (!buffer)
704 return -ENOMEM;
705
706 init_cdrom_command(&cgc, buffer, 24, CGC_DATA_READ);
707 674
708 cgc.cmd[0] = GPCMD_GET_CONFIGURATION; /* often 0x46 */ 675 cgc.cmd[0] = GPCMD_GET_CONFIGURATION; /* often 0x46 */
709 cgc.cmd[3] = CDF_RWRT; /* often 0x0020 */ 676 cgc.cmd[3] = CDF_RWRT; /* often 0x0020 */
710 cgc.cmd[8] = 24; /* often 0x18 */ 677 cgc.cmd[8] = sizeof(buffer); /* often 0x18 */
711 cgc.quiet = 1; 678 cgc.quiet = 1;
712 679
713 if ((ret = cdi->ops->generic_packet(cdi, &cgc))) 680 if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
714 goto err; 681 return ret;
715 682
716 memcpy(rfd, &buffer[sizeof(struct feature_header)], sizeof (*rfd)); 683 memcpy(rfd, &buffer[sizeof(struct feature_header)], sizeof (*rfd));
717 ret = 0; 684 return 0;
718err:
719 kfree(buffer);
720 return ret;
721} 685}
722 686
723static int cdrom_has_defect_mgt(struct cdrom_device_info *cdi) 687static int cdrom_has_defect_mgt(struct cdrom_device_info *cdi)
724{ 688{
725 struct packet_command cgc; 689 struct packet_command cgc;
726 char *buffer; 690 char buffer[16];
727 __be16 *feature_code; 691 __be16 *feature_code;
728 int ret; 692 int ret;
729 693
730 buffer = kmalloc(16, GFP_KERNEL); 694 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
731 if (!buffer)
732 return -ENOMEM;
733
734 init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
735 695
736 cgc.cmd[0] = GPCMD_GET_CONFIGURATION; 696 cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
737 cgc.cmd[3] = CDF_HWDM; 697 cgc.cmd[3] = CDF_HWDM;
738 cgc.cmd[8] = 16; 698 cgc.cmd[8] = sizeof(buffer);
739 cgc.quiet = 1; 699 cgc.quiet = 1;
740 700
741 if ((ret = cdi->ops->generic_packet(cdi, &cgc))) 701 if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
742 goto err; 702 return ret;
743 703
744 feature_code = (__be16 *) &buffer[sizeof(struct feature_header)]; 704 feature_code = (__be16 *) &buffer[sizeof(struct feature_header)];
745 if (be16_to_cpu(*feature_code) == CDF_HWDM) 705 if (be16_to_cpu(*feature_code) == CDF_HWDM)
746 ret = 0; 706 return 0;
747err: 707
748 kfree(buffer); 708 return 1;
749 return ret;
750} 709}
751 710
752 711
@@ -837,14 +796,10 @@ static int cdrom_mrw_open_write(struct cdrom_device_info *cdi)
837static int mo_open_write(struct cdrom_device_info *cdi) 796static int mo_open_write(struct cdrom_device_info *cdi)
838{ 797{
839 struct packet_command cgc; 798 struct packet_command cgc;
840 char *buffer; 799 char buffer[255];
841 int ret; 800 int ret;
842 801
843 buffer = kmalloc(255, GFP_KERNEL); 802 init_cdrom_command(&cgc, &buffer, 4, CGC_DATA_READ);
844 if (!buffer)
845 return -ENOMEM;
846
847 init_cdrom_command(&cgc, buffer, 4, CGC_DATA_READ);
848 cgc.quiet = 1; 803 cgc.quiet = 1;
849 804
850 /* 805 /*
@@ -861,15 +816,10 @@ static int mo_open_write(struct cdrom_device_info *cdi)
861 } 816 }
862 817
863 /* drive gave us no info, let the user go ahead */ 818 /* drive gave us no info, let the user go ahead */
864 if (ret) { 819 if (ret)
865 ret = 0; 820 return 0;
866 goto err;
867 }
868 821
869 ret = buffer[3] & 0x80; 822 return buffer[3] & 0x80;
870err:
871 kfree(buffer);
872 return ret;
873} 823}
874 824
875static int cdrom_ram_open_write(struct cdrom_device_info *cdi) 825static int cdrom_ram_open_write(struct cdrom_device_info *cdi)
@@ -892,19 +842,15 @@ static int cdrom_ram_open_write(struct cdrom_device_info *cdi)
892static void cdrom_mmc3_profile(struct cdrom_device_info *cdi) 842static void cdrom_mmc3_profile(struct cdrom_device_info *cdi)
893{ 843{
894 struct packet_command cgc; 844 struct packet_command cgc;
895 char *buffer; 845 char buffer[32];
896 int ret, mmc3_profile; 846 int ret, mmc3_profile;
897 847
898 buffer = kmalloc(32, GFP_KERNEL); 848 init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
899 if (!buffer)
900 return;
901
902 init_cdrom_command(&cgc, buffer, 32, CGC_DATA_READ);
903 849
904 cgc.cmd[0] = GPCMD_GET_CONFIGURATION; 850 cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
905 cgc.cmd[1] = 0; 851 cgc.cmd[1] = 0;
906 cgc.cmd[2] = cgc.cmd[3] = 0; /* Starting Feature Number */ 852 cgc.cmd[2] = cgc.cmd[3] = 0; /* Starting Feature Number */
907 cgc.cmd[8] = 32; /* Allocation Length */ 853 cgc.cmd[8] = sizeof(buffer); /* Allocation Length */
908 cgc.quiet = 1; 854 cgc.quiet = 1;
909 855
910 if ((ret = cdi->ops->generic_packet(cdi, &cgc))) 856 if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
@@ -913,7 +859,6 @@ static void cdrom_mmc3_profile(struct cdrom_device_info *cdi)
913 mmc3_profile = (buffer[6] << 8) | buffer[7]; 859 mmc3_profile = (buffer[6] << 8) | buffer[7];
914 860
915 cdi->mmc3_profile = mmc3_profile; 861 cdi->mmc3_profile = mmc3_profile;
916 kfree(buffer);
917} 862}
918 863
919static int cdrom_is_dvd_rw(struct cdrom_device_info *cdi) 864static int cdrom_is_dvd_rw(struct cdrom_device_info *cdi)
@@ -1628,15 +1573,12 @@ static void setup_send_key(struct packet_command *cgc, unsigned agid, unsigned t
1628static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) 1573static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1629{ 1574{
1630 int ret; 1575 int ret;
1631 u_char *buf; 1576 u_char buf[20];
1632 struct packet_command cgc; 1577 struct packet_command cgc;
1633 struct cdrom_device_ops *cdo = cdi->ops; 1578 struct cdrom_device_ops *cdo = cdi->ops;
1634 rpc_state_t *rpc_state; 1579 rpc_state_t rpc_state;
1635
1636 buf = kzalloc(20, GFP_KERNEL);
1637 if (!buf)
1638 return -ENOMEM;
1639 1580
1581 memset(buf, 0, sizeof(buf));
1640 init_cdrom_command(&cgc, buf, 0, CGC_DATA_READ); 1582 init_cdrom_command(&cgc, buf, 0, CGC_DATA_READ);
1641 1583
1642 switch (ai->type) { 1584 switch (ai->type) {
@@ -1647,7 +1589,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1647 setup_report_key(&cgc, ai->lsa.agid, 0); 1589 setup_report_key(&cgc, ai->lsa.agid, 0);
1648 1590
1649 if ((ret = cdo->generic_packet(cdi, &cgc))) 1591 if ((ret = cdo->generic_packet(cdi, &cgc)))
1650 goto err; 1592 return ret;
1651 1593
1652 ai->lsa.agid = buf[7] >> 6; 1594 ai->lsa.agid = buf[7] >> 6;
1653 /* Returning data, let host change state */ 1595 /* Returning data, let host change state */
@@ -1658,7 +1600,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1658 setup_report_key(&cgc, ai->lsk.agid, 2); 1600 setup_report_key(&cgc, ai->lsk.agid, 2);
1659 1601
1660 if ((ret = cdo->generic_packet(cdi, &cgc))) 1602 if ((ret = cdo->generic_packet(cdi, &cgc)))
1661 goto err; 1603 return ret;
1662 1604
1663 copy_key(ai->lsk.key, &buf[4]); 1605 copy_key(ai->lsk.key, &buf[4]);
1664 /* Returning data, let host change state */ 1606 /* Returning data, let host change state */
@@ -1669,7 +1611,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1669 setup_report_key(&cgc, ai->lsc.agid, 1); 1611 setup_report_key(&cgc, ai->lsc.agid, 1);
1670 1612
1671 if ((ret = cdo->generic_packet(cdi, &cgc))) 1613 if ((ret = cdo->generic_packet(cdi, &cgc)))
1672 goto err; 1614 return ret;
1673 1615
1674 copy_chal(ai->lsc.chal, &buf[4]); 1616 copy_chal(ai->lsc.chal, &buf[4]);
1675 /* Returning data, let host change state */ 1617 /* Returning data, let host change state */
@@ -1686,7 +1628,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1686 cgc.cmd[2] = ai->lstk.lba >> 24; 1628 cgc.cmd[2] = ai->lstk.lba >> 24;
1687 1629
1688 if ((ret = cdo->generic_packet(cdi, &cgc))) 1630 if ((ret = cdo->generic_packet(cdi, &cgc)))
1689 goto err; 1631 return ret;
1690 1632
1691 ai->lstk.cpm = (buf[4] >> 7) & 1; 1633 ai->lstk.cpm = (buf[4] >> 7) & 1;
1692 ai->lstk.cp_sec = (buf[4] >> 6) & 1; 1634 ai->lstk.cp_sec = (buf[4] >> 6) & 1;
@@ -1700,7 +1642,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1700 setup_report_key(&cgc, ai->lsasf.agid, 5); 1642 setup_report_key(&cgc, ai->lsasf.agid, 5);
1701 1643
1702 if ((ret = cdo->generic_packet(cdi, &cgc))) 1644 if ((ret = cdo->generic_packet(cdi, &cgc)))
1703 goto err; 1645 return ret;
1704 1646
1705 ai->lsasf.asf = buf[7] & 1; 1647 ai->lsasf.asf = buf[7] & 1;
1706 break; 1648 break;
@@ -1713,7 +1655,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1713 copy_chal(&buf[4], ai->hsc.chal); 1655 copy_chal(&buf[4], ai->hsc.chal);
1714 1656
1715 if ((ret = cdo->generic_packet(cdi, &cgc))) 1657 if ((ret = cdo->generic_packet(cdi, &cgc)))
1716 goto err; 1658 return ret;
1717 1659
1718 ai->type = DVD_LU_SEND_KEY1; 1660 ai->type = DVD_LU_SEND_KEY1;
1719 break; 1661 break;
@@ -1726,7 +1668,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1726 1668
1727 if ((ret = cdo->generic_packet(cdi, &cgc))) { 1669 if ((ret = cdo->generic_packet(cdi, &cgc))) {
1728 ai->type = DVD_AUTH_FAILURE; 1670 ai->type = DVD_AUTH_FAILURE;
1729 goto err; 1671 return ret;
1730 } 1672 }
1731 ai->type = DVD_AUTH_ESTABLISHED; 1673 ai->type = DVD_AUTH_ESTABLISHED;
1732 break; 1674 break;
@@ -1737,23 +1679,24 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1737 cdinfo(CD_DVD, "entering DVD_INVALIDATE_AGID\n"); 1679 cdinfo(CD_DVD, "entering DVD_INVALIDATE_AGID\n");
1738 setup_report_key(&cgc, ai->lsa.agid, 0x3f); 1680 setup_report_key(&cgc, ai->lsa.agid, 0x3f);
1739 if ((ret = cdo->generic_packet(cdi, &cgc))) 1681 if ((ret = cdo->generic_packet(cdi, &cgc)))
1740 goto err; 1682 return ret;
1741 break; 1683 break;
1742 1684
1743 /* Get region settings */ 1685 /* Get region settings */
1744 case DVD_LU_SEND_RPC_STATE: 1686 case DVD_LU_SEND_RPC_STATE:
1745 cdinfo(CD_DVD, "entering DVD_LU_SEND_RPC_STATE\n"); 1687 cdinfo(CD_DVD, "entering DVD_LU_SEND_RPC_STATE\n");
1746 setup_report_key(&cgc, 0, 8); 1688 setup_report_key(&cgc, 0, 8);
1689 memset(&rpc_state, 0, sizeof(rpc_state_t));
1690 cgc.buffer = (char *) &rpc_state;
1747 1691
1748 if ((ret = cdo->generic_packet(cdi, &cgc))) 1692 if ((ret = cdo->generic_packet(cdi, &cgc)))
1749 goto err; 1693 return ret;
1750 1694
1751 rpc_state = (rpc_state_t *)buf; 1695 ai->lrpcs.type = rpc_state.type_code;
1752 ai->lrpcs.type = rpc_state->type_code; 1696 ai->lrpcs.vra = rpc_state.vra;
1753 ai->lrpcs.vra = rpc_state->vra; 1697 ai->lrpcs.ucca = rpc_state.ucca;
1754 ai->lrpcs.ucca = rpc_state->ucca; 1698 ai->lrpcs.region_mask = rpc_state.region_mask;
1755 ai->lrpcs.region_mask = rpc_state->region_mask; 1699 ai->lrpcs.rpc_scheme = rpc_state.rpc_scheme;
1756 ai->lrpcs.rpc_scheme = rpc_state->rpc_scheme;
1757 break; 1700 break;
1758 1701
1759 /* Set region settings */ 1702 /* Set region settings */
@@ -1764,23 +1707,20 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1764 buf[4] = ai->hrpcs.pdrc; 1707 buf[4] = ai->hrpcs.pdrc;
1765 1708
1766 if ((ret = cdo->generic_packet(cdi, &cgc))) 1709 if ((ret = cdo->generic_packet(cdi, &cgc)))
1767 goto err; 1710 return ret;
1768 break; 1711 break;
1769 1712
1770 default: 1713 default:
1771 cdinfo(CD_WARNING, "Invalid DVD key ioctl (%d)\n", ai->type); 1714 cdinfo(CD_WARNING, "Invalid DVD key ioctl (%d)\n", ai->type);
1772 ret = -ENOTTY; 1715 return -ENOTTY;
1773 goto err;
1774 } 1716 }
1775 ret = 0; 1717
1776err: 1718 return 0;
1777 kfree(buf);
1778 return ret;
1779} 1719}
1780 1720
1781static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s) 1721static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
1782{ 1722{
1783 unsigned char *buf, *base; 1723 unsigned char buf[21], *base;
1784 struct dvd_layer *layer; 1724 struct dvd_layer *layer;
1785 struct packet_command cgc; 1725 struct packet_command cgc;
1786 struct cdrom_device_ops *cdo = cdi->ops; 1726 struct cdrom_device_ops *cdo = cdi->ops;
@@ -1789,11 +1729,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
1789 if (layer_num >= DVD_LAYERS) 1729 if (layer_num >= DVD_LAYERS)
1790 return -EINVAL; 1730 return -EINVAL;
1791 1731
1792 buf = kmalloc(21, GFP_KERNEL); 1732 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
1793 if (!buf)
1794 return -ENOMEM;
1795
1796 init_cdrom_command(&cgc, buf, 21, CGC_DATA_READ);
1797 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1733 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1798 cgc.cmd[6] = layer_num; 1734 cgc.cmd[6] = layer_num;
1799 cgc.cmd[7] = s->type; 1735 cgc.cmd[7] = s->type;
@@ -1805,7 +1741,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
1805 cgc.quiet = 1; 1741 cgc.quiet = 1;
1806 1742
1807 if ((ret = cdo->generic_packet(cdi, &cgc))) 1743 if ((ret = cdo->generic_packet(cdi, &cgc)))
1808 goto err; 1744 return ret;
1809 1745
1810 base = &buf[4]; 1746 base = &buf[4];
1811 layer = &s->physical.layer[layer_num]; 1747 layer = &s->physical.layer[layer_num];
@@ -1829,24 +1765,17 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
1829 layer->end_sector_l0 = base[13] << 16 | base[14] << 8 | base[15]; 1765 layer->end_sector_l0 = base[13] << 16 | base[14] << 8 | base[15];
1830 layer->bca = base[16] >> 7; 1766 layer->bca = base[16] >> 7;
1831 1767
1832 ret = 0; 1768 return 0;
1833err:
1834 kfree(buf);
1835 return ret;
1836} 1769}
1837 1770
1838static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s) 1771static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s)
1839{ 1772{
1840 int ret; 1773 int ret;
1841 u_char *buf; 1774 u_char buf[8];
1842 struct packet_command cgc; 1775 struct packet_command cgc;
1843 struct cdrom_device_ops *cdo = cdi->ops; 1776 struct cdrom_device_ops *cdo = cdi->ops;
1844 1777
1845 buf = kmalloc(8, GFP_KERNEL); 1778 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
1846 if (!buf)
1847 return -ENOMEM;
1848
1849 init_cdrom_command(&cgc, buf, 8, CGC_DATA_READ);
1850 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1779 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1851 cgc.cmd[6] = s->copyright.layer_num; 1780 cgc.cmd[6] = s->copyright.layer_num;
1852 cgc.cmd[7] = s->type; 1781 cgc.cmd[7] = s->type;
@@ -1854,15 +1783,12 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s)
1854 cgc.cmd[9] = cgc.buflen & 0xff; 1783 cgc.cmd[9] = cgc.buflen & 0xff;
1855 1784
1856 if ((ret = cdo->generic_packet(cdi, &cgc))) 1785 if ((ret = cdo->generic_packet(cdi, &cgc)))
1857 goto err; 1786 return ret;
1858 1787
1859 s->copyright.cpst = buf[4]; 1788 s->copyright.cpst = buf[4];
1860 s->copyright.rmi = buf[5]; 1789 s->copyright.rmi = buf[5];
1861 1790
1862 ret = 0; 1791 return 0;
1863err:
1864 kfree(buf);
1865 return ret;
1866} 1792}
1867 1793
1868static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s) 1794static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s)
@@ -1894,33 +1820,26 @@ static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s)
1894static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s) 1820static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s)
1895{ 1821{
1896 int ret; 1822 int ret;
1897 u_char *buf; 1823 u_char buf[4 + 188];
1898 struct packet_command cgc; 1824 struct packet_command cgc;
1899 struct cdrom_device_ops *cdo = cdi->ops; 1825 struct cdrom_device_ops *cdo = cdi->ops;
1900 1826
1901 buf = kmalloc(4 + 188, GFP_KERNEL); 1827 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
1902 if (!buf)
1903 return -ENOMEM;
1904
1905 init_cdrom_command(&cgc, buf, 4 + 188, CGC_DATA_READ);
1906 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1828 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1907 cgc.cmd[7] = s->type; 1829 cgc.cmd[7] = s->type;
1908 cgc.cmd[9] = cgc.buflen & 0xff; 1830 cgc.cmd[9] = cgc.buflen & 0xff;
1909 1831
1910 if ((ret = cdo->generic_packet(cdi, &cgc))) 1832 if ((ret = cdo->generic_packet(cdi, &cgc)))
1911 goto err; 1833 return ret;
1912 1834
1913 s->bca.len = buf[0] << 8 | buf[1]; 1835 s->bca.len = buf[0] << 8 | buf[1];
1914 if (s->bca.len < 12 || s->bca.len > 188) { 1836 if (s->bca.len < 12 || s->bca.len > 188) {
1915 cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len); 1837 cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len);
1916 ret = -EIO; 1838 return -EIO;
1917 goto err;
1918 } 1839 }
1919 memcpy(s->bca.value, &buf[4], s->bca.len); 1840 memcpy(s->bca.value, &buf[4], s->bca.len);
1920 ret = 0; 1841
1921err: 1842 return 0;
1922 kfree(buf);
1923 return ret;
1924} 1843}
1925 1844
1926static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s) 1845static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s)
@@ -2020,13 +1939,9 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
2020{ 1939{
2021 struct cdrom_device_ops *cdo = cdi->ops; 1940 struct cdrom_device_ops *cdo = cdi->ops;
2022 struct packet_command cgc; 1941 struct packet_command cgc;
2023 char *buffer; 1942 char buffer[32];
2024 int ret; 1943 int ret;
2025 1944
2026 buffer = kmalloc(32, GFP_KERNEL);
2027 if (!buffer)
2028 return -ENOMEM;
2029
2030 init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ); 1945 init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
2031 cgc.cmd[0] = GPCMD_READ_SUBCHANNEL; 1946 cgc.cmd[0] = GPCMD_READ_SUBCHANNEL;
2032 cgc.cmd[1] = 2; /* MSF addressing */ 1947 cgc.cmd[1] = 2; /* MSF addressing */
@@ -2035,7 +1950,7 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
2035 cgc.cmd[8] = 16; 1950 cgc.cmd[8] = 16;
2036 1951
2037 if ((ret = cdo->generic_packet(cdi, &cgc))) 1952 if ((ret = cdo->generic_packet(cdi, &cgc)))
2038 goto err; 1953 return ret;
2039 1954
2040 subchnl->cdsc_audiostatus = cgc.buffer[1]; 1955 subchnl->cdsc_audiostatus = cgc.buffer[1];
2041 subchnl->cdsc_format = CDROM_MSF; 1956 subchnl->cdsc_format = CDROM_MSF;
@@ -2050,10 +1965,7 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
2050 subchnl->cdsc_absaddr.msf.second = cgc.buffer[10]; 1965 subchnl->cdsc_absaddr.msf.second = cgc.buffer[10];
2051 subchnl->cdsc_absaddr.msf.frame = cgc.buffer[11]; 1966 subchnl->cdsc_absaddr.msf.frame = cgc.buffer[11];
2052 1967
2053 ret = 0; 1968 return 0;
2054err:
2055 kfree(buffer);
2056 return ret;
2057} 1969}
2058 1970
2059/* 1971/*
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 10748240cb2f..6a866d7c8ae5 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -50,17 +50,19 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
50/** 50/**
51 * linear_mergeable_bvec -- tell bio layer if two requests can be merged 51 * linear_mergeable_bvec -- tell bio layer if two requests can be merged
52 * @q: request queue 52 * @q: request queue
53 * @bio: the buffer head that's been built up so far 53 * @bvm: properties of new bio
54 * @biovec: the request that could be merged to it. 54 * @biovec: the request that could be merged to it.
55 * 55 *
56 * Return amount of bytes we can take at this offset 56 * Return amount of bytes we can take at this offset
57 */ 57 */
58static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) 58static int linear_mergeable_bvec(struct request_queue *q,
59 struct bvec_merge_data *bvm,
60 struct bio_vec *biovec)
59{ 61{
60 mddev_t *mddev = q->queuedata; 62 mddev_t *mddev = q->queuedata;
61 dev_info_t *dev0; 63 dev_info_t *dev0;
62 unsigned long maxsectors, bio_sectors = bio->bi_size >> 9; 64 unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
63 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 65 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
64 66
65 dev0 = which_dev(mddev, sector); 67 dev0 = which_dev(mddev, sector);
66 maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); 68 maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 914c04ddec7c..bcbb82594a19 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -241,18 +241,20 @@ static int create_strip_zones (mddev_t *mddev)
241/** 241/**
242 * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged 242 * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
243 * @q: request queue 243 * @q: request queue
244 * @bio: the buffer head that's been built up so far 244 * @bvm: properties of new bio
245 * @biovec: the request that could be merged to it. 245 * @biovec: the request that could be merged to it.
246 * 246 *
247 * Return amount of bytes we can accept at this offset 247 * Return amount of bytes we can accept at this offset
248 */ 248 */
249static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) 249static int raid0_mergeable_bvec(struct request_queue *q,
250 struct bvec_merge_data *bvm,
251 struct bio_vec *biovec)
250{ 252{
251 mddev_t *mddev = q->queuedata; 253 mddev_t *mddev = q->queuedata;
252 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 254 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
253 int max; 255 int max;
254 unsigned int chunk_sectors = mddev->chunk_size >> 9; 256 unsigned int chunk_sectors = mddev->chunk_size >> 9;
255 unsigned int bio_sectors = bio->bi_size >> 9; 257 unsigned int bio_sectors = bvm->bi_size >> 9;
256 258
257 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 259 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
258 if (max < 0) max = 0; /* bio_add cannot handle a negative return */ 260 if (max < 0) max = 0; /* bio_add cannot handle a negative return */
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a71277b640ab..22bb2b1b886d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -439,26 +439,27 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
439/** 439/**
440 * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged 440 * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
441 * @q: request queue 441 * @q: request queue
442 * @bio: the buffer head that's been built up so far 442 * @bvm: properties of new bio
443 * @biovec: the request that could be merged to it. 443 * @biovec: the request that could be merged to it.
444 * 444 *
445 * Return amount of bytes we can accept at this offset 445 * Return amount of bytes we can accept at this offset
446 * If near_copies == raid_disk, there are no striping issues, 446 * If near_copies == raid_disk, there are no striping issues,
447 * but in that case, the function isn't called at all. 447 * but in that case, the function isn't called at all.
448 */ 448 */
449static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio, 449static int raid10_mergeable_bvec(struct request_queue *q,
450 struct bio_vec *bio_vec) 450 struct bvec_merge_data *bvm,
451 struct bio_vec *biovec)
451{ 452{
452 mddev_t *mddev = q->queuedata; 453 mddev_t *mddev = q->queuedata;
453 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 454 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
454 int max; 455 int max;
455 unsigned int chunk_sectors = mddev->chunk_size >> 9; 456 unsigned int chunk_sectors = mddev->chunk_size >> 9;
456 unsigned int bio_sectors = bio->bi_size >> 9; 457 unsigned int bio_sectors = bvm->bi_size >> 9;
457 458
458 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 459 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
459 if (max < 0) max = 0; /* bio_add cannot handle a negative return */ 460 if (max < 0) max = 0; /* bio_add cannot handle a negative return */
460 if (max <= bio_vec->bv_len && bio_sectors == 0) 461 if (max <= biovec->bv_len && bio_sectors == 0)
461 return bio_vec->bv_len; 462 return biovec->bv_len;
462 else 463 else
463 return max; 464 return max;
464} 465}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3b27df52456b..9ce7154845c6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3314,15 +3314,17 @@ static int raid5_congested(void *data, int bits)
3314/* We want read requests to align with chunks where possible, 3314/* We want read requests to align with chunks where possible,
3315 * but write requests don't need to. 3315 * but write requests don't need to.
3316 */ 3316 */
3317static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) 3317static int raid5_mergeable_bvec(struct request_queue *q,
3318 struct bvec_merge_data *bvm,
3319 struct bio_vec *biovec)
3318{ 3320{
3319 mddev_t *mddev = q->queuedata; 3321 mddev_t *mddev = q->queuedata;
3320 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 3322 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
3321 int max; 3323 int max;
3322 unsigned int chunk_sectors = mddev->chunk_size >> 9; 3324 unsigned int chunk_sectors = mddev->chunk_size >> 9;
3323 unsigned int bio_sectors = bio->bi_size >> 9; 3325 unsigned int bio_sectors = bvm->bi_size >> 9;
3324 3326
3325 if (bio_data_dir(bio) == WRITE) 3327 if ((bvm->bi_rw & 1) == WRITE)
3326 return biovec->bv_len; /* always allow writes to be mergeable */ 3328 return biovec->bv_len; /* always allow writes to be mergeable */
3327 3329
3328 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 3330 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index d26f69b0184f..ef671d1a3bf0 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1324,7 +1324,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
1324 goto fail; 1324 goto fail;
1325 } 1325 }
1326 1326
1327 txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL); 1327 txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1328 if (!txs) { 1328 if (!txs) {
1329 err = -ENOMEM; 1329 err = -ENOMEM;
1330 xenbus_dev_fatal(dev, err, "allocating tx ring page"); 1330 xenbus_dev_fatal(dev, err, "allocating tx ring page");
@@ -1340,7 +1340,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
1340 } 1340 }
1341 1341
1342 info->tx_ring_ref = err; 1342 info->tx_ring_ref = err;
1343 rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL); 1343 rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1344 if (!rxs) { 1344 if (!rxs) {
1345 err = -ENOMEM; 1345 err = -ENOMEM;
1346 xenbus_dev_fatal(dev, err, "allocating rx ring page"); 1346 xenbus_dev_fatal(dev, err, "allocating rx ring page");
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index ea0edd1b2e76..fe694f0ee19a 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -182,8 +182,9 @@ static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp,
182 int tablesize); 182 int tablesize);
183static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, 183static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count,
184 Sg_request * srp); 184 Sg_request * srp);
185static ssize_t sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count, 185static ssize_t sg_new_write(Sg_fd *sfp, struct file *file,
186 int blocking, int read_only, Sg_request ** o_srp); 186 const char __user *buf, size_t count, int blocking,
187 int read_only, Sg_request **o_srp);
187static int sg_common_write(Sg_fd * sfp, Sg_request * srp, 188static int sg_common_write(Sg_fd * sfp, Sg_request * srp,
188 unsigned char *cmnd, int timeout, int blocking); 189 unsigned char *cmnd, int timeout, int blocking);
189static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, 190static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
@@ -204,7 +205,6 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
204static Sg_request *sg_add_request(Sg_fd * sfp); 205static Sg_request *sg_add_request(Sg_fd * sfp);
205static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); 206static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
206static int sg_res_in_use(Sg_fd * sfp); 207static int sg_res_in_use(Sg_fd * sfp);
207static int sg_allow_access(unsigned char opcode, char dev_type);
208static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len); 208static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len);
209static Sg_device *sg_get_dev(int dev); 209static Sg_device *sg_get_dev(int dev);
210#ifdef CONFIG_SCSI_PROC_FS 210#ifdef CONFIG_SCSI_PROC_FS
@@ -544,7 +544,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
544 return -EFAULT; 544 return -EFAULT;
545 blocking = !(filp->f_flags & O_NONBLOCK); 545 blocking = !(filp->f_flags & O_NONBLOCK);
546 if (old_hdr.reply_len < 0) 546 if (old_hdr.reply_len < 0)
547 return sg_new_write(sfp, buf, count, blocking, 0, NULL); 547 return sg_new_write(sfp, filp, buf, count, blocking, 0, NULL);
548 if (count < (SZ_SG_HEADER + 6)) 548 if (count < (SZ_SG_HEADER + 6))
549 return -EIO; /* The minimum scsi command length is 6 bytes. */ 549 return -EIO; /* The minimum scsi command length is 6 bytes. */
550 550
@@ -621,8 +621,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
621} 621}
622 622
623static ssize_t 623static ssize_t
624sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count, 624sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
625 int blocking, int read_only, Sg_request ** o_srp) 625 size_t count, int blocking, int read_only,
626 Sg_request **o_srp)
626{ 627{
627 int k; 628 int k;
628 Sg_request *srp; 629 Sg_request *srp;
@@ -678,8 +679,7 @@ sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
678 sg_remove_request(sfp, srp); 679 sg_remove_request(sfp, srp);
679 return -EFAULT; 680 return -EFAULT;
680 } 681 }
681 if (read_only && 682 if (read_only && !blk_verify_command(file, cmnd)) {
682 (!sg_allow_access(cmnd[0], sfp->parentdp->device->type))) {
683 sg_remove_request(sfp, srp); 683 sg_remove_request(sfp, srp);
684 return -EPERM; 684 return -EPERM;
685 } 685 }
@@ -799,7 +799,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
799 if (!access_ok(VERIFY_WRITE, p, SZ_SG_IO_HDR)) 799 if (!access_ok(VERIFY_WRITE, p, SZ_SG_IO_HDR))
800 return -EFAULT; 800 return -EFAULT;
801 result = 801 result =
802 sg_new_write(sfp, p, SZ_SG_IO_HDR, 802 sg_new_write(sfp, filp, p, SZ_SG_IO_HDR,
803 blocking, read_only, &srp); 803 blocking, read_only, &srp);
804 if (result < 0) 804 if (result < 0)
805 return result; 805 return result;
@@ -1048,7 +1048,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
1048 1048
1049 if (copy_from_user(&opcode, siocp->data, 1)) 1049 if (copy_from_user(&opcode, siocp->data, 1))
1050 return -EFAULT; 1050 return -EFAULT;
1051 if (!sg_allow_access(opcode, sdp->device->type)) 1051 if (!blk_verify_command(filp, &opcode))
1052 return -EPERM; 1052 return -EPERM;
1053 } 1053 }
1054 return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p); 1054 return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p);
@@ -2502,30 +2502,6 @@ sg_page_free(struct page *page, int size)
2502 __free_pages(page, order); 2502 __free_pages(page, order);
2503} 2503}
2504 2504
2505#ifndef MAINTENANCE_IN_CMD
2506#define MAINTENANCE_IN_CMD 0xa3
2507#endif
2508
2509static unsigned char allow_ops[] = { TEST_UNIT_READY, REQUEST_SENSE,
2510 INQUIRY, READ_CAPACITY, READ_BUFFER, READ_6, READ_10, READ_12,
2511 READ_16, MODE_SENSE, MODE_SENSE_10, LOG_SENSE, REPORT_LUNS,
2512 SERVICE_ACTION_IN, RECEIVE_DIAGNOSTIC, READ_LONG, MAINTENANCE_IN_CMD
2513};
2514
2515static int
2516sg_allow_access(unsigned char opcode, char dev_type)
2517{
2518 int k;
2519
2520 if (TYPE_SCANNER == dev_type) /* TYPE_ROM maybe burner */
2521 return 1;
2522 for (k = 0; k < sizeof (allow_ops); ++k) {
2523 if (opcode == allow_ops[k])
2524 return 1;
2525 }
2526 return 0;
2527}
2528
2529#ifdef CONFIG_SCSI_PROC_FS 2505#ifdef CONFIG_SCSI_PROC_FS
2530static int 2506static int
2531sg_idr_max_id(int id, void *p, void *data) 2507sg_idr_max_id(int id, void *p, void *data)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index c82df8bd4d89..27f5bfd1def3 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -673,24 +673,20 @@ fail:
673static void get_sectorsize(struct scsi_cd *cd) 673static void get_sectorsize(struct scsi_cd *cd)
674{ 674{
675 unsigned char cmd[10]; 675 unsigned char cmd[10];
676 unsigned char *buffer; 676 unsigned char buffer[8];
677 int the_result, retries = 3; 677 int the_result, retries = 3;
678 int sector_size; 678 int sector_size;
679 struct request_queue *queue; 679 struct request_queue *queue;
680 680
681 buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
682 if (!buffer)
683 goto Enomem;
684
685 do { 681 do {
686 cmd[0] = READ_CAPACITY; 682 cmd[0] = READ_CAPACITY;
687 memset((void *) &cmd[1], 0, 9); 683 memset((void *) &cmd[1], 0, 9);
688 memset(buffer, 0, 8); 684 memset(buffer, 0, sizeof(buffer));
689 685
690 /* Do the command and wait.. */ 686 /* Do the command and wait.. */
691 the_result = scsi_execute_req(cd->device, cmd, DMA_FROM_DEVICE, 687 the_result = scsi_execute_req(cd->device, cmd, DMA_FROM_DEVICE,
692 buffer, 8, NULL, SR_TIMEOUT, 688 buffer, sizeof(buffer), NULL,
693 MAX_RETRIES); 689 SR_TIMEOUT, MAX_RETRIES);
694 690
695 retries--; 691 retries--;
696 692
@@ -745,14 +741,8 @@ static void get_sectorsize(struct scsi_cd *cd)
745 741
746 queue = cd->device->request_queue; 742 queue = cd->device->request_queue;
747 blk_queue_hardsect_size(queue, sector_size); 743 blk_queue_hardsect_size(queue, sector_size);
748out:
749 kfree(buffer);
750 return;
751 744
752Enomem: 745 return;
753 cd->capacity = 0x1fffff;
754 cd->device->sector_size = 2048; /* A guess, just in case */
755 goto out;
756} 746}
757 747
758static void get_capabilities(struct scsi_cd *cd) 748static void get_capabilities(struct scsi_cd *cd)
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 0f86b0ff7879..9678b3e98c63 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -117,7 +117,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev,
117 char *path; 117 char *path;
118 118
119 va_start(ap, pathfmt); 119 va_start(ap, pathfmt);
120 path = kvasprintf(GFP_KERNEL, pathfmt, ap); 120 path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
121 va_end(ap); 121 va_end(ap);
122 122
123 if (!path) { 123 if (!path) {
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 227d53b12a5c..7f2f91c0e11d 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -283,9 +283,9 @@ static char *join(const char *dir, const char *name)
283 char *buffer; 283 char *buffer;
284 284
285 if (strlen(name) == 0) 285 if (strlen(name) == 0)
286 buffer = kasprintf(GFP_KERNEL, "%s", dir); 286 buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir);
287 else 287 else
288 buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name); 288 buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name);
289 return (!buffer) ? ERR_PTR(-ENOMEM) : buffer; 289 return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
290} 290}
291 291
@@ -297,7 +297,7 @@ static char **split(char *strings, unsigned int len, unsigned int *num)
297 *num = count_strings(strings, len); 297 *num = count_strings(strings, len);
298 298
299 /* Transfer to one big alloc for easy freeing. */ 299 /* Transfer to one big alloc for easy freeing. */
300 ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL); 300 ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
301 if (!ret) { 301 if (!ret) {
302 kfree(strings); 302 kfree(strings);
303 return ERR_PTR(-ENOMEM); 303 return ERR_PTR(-ENOMEM);
@@ -751,7 +751,7 @@ static int process_msg(void)
751 } 751 }
752 752
753 753
754 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 754 msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH);
755 if (msg == NULL) { 755 if (msg == NULL) {
756 err = -ENOMEM; 756 err = -ENOMEM;
757 goto out; 757 goto out;
@@ -763,7 +763,7 @@ static int process_msg(void)
763 goto out; 763 goto out;
764 } 764 }
765 765
766 body = kmalloc(msg->hdr.len + 1, GFP_KERNEL); 766 body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
767 if (body == NULL) { 767 if (body == NULL) {
768 kfree(msg); 768 kfree(msg);
769 err = -ENOMEM; 769 err = -ENOMEM;
diff --git a/fs/Makefile b/fs/Makefile
index 1e7a11bd4da1..277b079dec9e 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,7 @@ else
19obj-y += no-block.o 19obj-y += no-block.o
20endif 20endif
21 21
22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
22obj-$(CONFIG_INOTIFY) += inotify.o 23obj-$(CONFIG_INOTIFY) += inotify.o
23obj-$(CONFIG_INOTIFY_USER) += inotify_user.o 24obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
24obj-$(CONFIG_EPOLL) += eventpoll.o 25obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
new file mode 100644
index 000000000000..63e2ee63058d
--- /dev/null
+++ b/fs/bio-integrity.c
@@ -0,0 +1,719 @@
1/*
2 * bio-integrity.c - bio data integrity extensions
3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19 * USA.
20 *
21 */
22
23#include <linux/blkdev.h>
24#include <linux/mempool.h>
25#include <linux/bio.h>
26#include <linux/workqueue.h>
27
28static struct kmem_cache *bio_integrity_slab __read_mostly;
29static struct workqueue_struct *kintegrityd_wq;
30
31/**
32 * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
33 * @bio: bio to attach integrity metadata to
34 * @gfp_mask: Memory allocation mask
35 * @nr_vecs: Number of integrity metadata scatter-gather elements
36 * @bs: bio_set to allocate from
37 *
38 * Description: This function prepares a bio for attaching integrity
39 * metadata. nr_vecs specifies the maximum number of pages containing
40 * integrity metadata that can be attached.
41 */
42struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
43 gfp_t gfp_mask,
44 unsigned int nr_vecs,
45 struct bio_set *bs)
46{
47 struct bio_integrity_payload *bip;
48 struct bio_vec *iv;
49 unsigned long idx;
50
51 BUG_ON(bio == NULL);
52
53 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
54 if (unlikely(bip == NULL)) {
55 printk(KERN_ERR "%s: could not alloc bip\n", __func__);
56 return NULL;
57 }
58
59 memset(bip, 0, sizeof(*bip));
60
61 iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs);
62 if (unlikely(iv == NULL)) {
63 printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
64 mempool_free(bip, bs->bio_integrity_pool);
65 return NULL;
66 }
67
68 bip->bip_pool = idx;
69 bip->bip_vec = iv;
70 bip->bip_bio = bio;
71 bio->bi_integrity = bip;
72
73 return bip;
74}
75EXPORT_SYMBOL(bio_integrity_alloc_bioset);
76
77/**
78 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
79 * @bio: bio to attach integrity metadata to
80 * @gfp_mask: Memory allocation mask
81 * @nr_vecs: Number of integrity metadata scatter-gather elements
82 *
83 * Description: This function prepares a bio for attaching integrity
84 * metadata. nr_vecs specifies the maximum number of pages containing
85 * integrity metadata that can be attached.
86 */
87struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
88 gfp_t gfp_mask,
89 unsigned int nr_vecs)
90{
91 return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
92}
93EXPORT_SYMBOL(bio_integrity_alloc);
94
95/**
96 * bio_integrity_free - Free bio integrity payload
97 * @bio: bio containing bip to be freed
98 * @bs: bio_set this bio was allocated from
99 *
100 * Description: Used to free the integrity portion of a bio. Usually
101 * called from bio_free().
102 */
103void bio_integrity_free(struct bio *bio, struct bio_set *bs)
104{
105 struct bio_integrity_payload *bip = bio->bi_integrity;
106
107 BUG_ON(bip == NULL);
108
109 /* A cloned bio doesn't own the integrity metadata */
110 if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
111 kfree(bip->bip_buf);
112
113 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
114 mempool_free(bip, bs->bio_integrity_pool);
115
116 bio->bi_integrity = NULL;
117}
118EXPORT_SYMBOL(bio_integrity_free);
119
120/**
121 * bio_integrity_add_page - Attach integrity metadata
122 * @bio: bio to update
123 * @page: page containing integrity metadata
124 * @len: number of bytes of integrity metadata in page
125 * @offset: start offset within page
126 *
127 * Description: Attach a page containing integrity metadata to bio.
128 */
129int bio_integrity_add_page(struct bio *bio, struct page *page,
130 unsigned int len, unsigned int offset)
131{
132 struct bio_integrity_payload *bip = bio->bi_integrity;
133 struct bio_vec *iv;
134
135 if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
136 printk(KERN_ERR "%s: bip_vec full\n", __func__);
137 return 0;
138 }
139
140 iv = bip_vec_idx(bip, bip->bip_vcnt);
141 BUG_ON(iv == NULL);
142 BUG_ON(iv->bv_page != NULL);
143
144 iv->bv_page = page;
145 iv->bv_len = len;
146 iv->bv_offset = offset;
147 bip->bip_vcnt++;
148
149 return len;
150}
151EXPORT_SYMBOL(bio_integrity_add_page);
152
153/**
154 * bio_integrity_enabled - Check whether integrity can be passed
155 * @bio: bio to check
156 *
157 * Description: Determines whether bio_integrity_prep() can be called
158 * on this bio or not. bio data direction and target device must be
159 * set prior to calling. The functions honors the write_generate and
160 * read_verify flags in sysfs.
161 */
162int bio_integrity_enabled(struct bio *bio)
163{
164 /* Already protected? */
165 if (bio_integrity(bio))
166 return 0;
167
168 return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
169}
170EXPORT_SYMBOL(bio_integrity_enabled);
171
172/**
173 * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
174 * @bi: blk_integrity profile for device
175 * @sectors: Number of 512 sectors to convert
176 *
177 * Description: The block layer calculates everything in 512 byte
178 * sectors but integrity metadata is done in terms of the hardware
179 * sector size of the storage device. Convert the block layer sectors
180 * to physical sectors.
181 */
182static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
183 unsigned int sectors)
184{
185 /* At this point there are only 512b or 4096b DIF/EPP devices */
186 if (bi->sector_size == 4096)
187 return sectors >>= 3;
188
189 return sectors;
190}
191
192/**
193 * bio_integrity_tag_size - Retrieve integrity tag space
194 * @bio: bio to inspect
195 *
196 * Description: Returns the maximum number of tag bytes that can be
197 * attached to this bio. Filesystems can use this to determine how
198 * much metadata to attach to an I/O.
199 */
200unsigned int bio_integrity_tag_size(struct bio *bio)
201{
202 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
203
204 BUG_ON(bio->bi_size == 0);
205
206 return bi->tag_size * (bio->bi_size / bi->sector_size);
207}
208EXPORT_SYMBOL(bio_integrity_tag_size);
209
210int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
211{
212 struct bio_integrity_payload *bip = bio->bi_integrity;
213 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
214 unsigned int nr_sectors;
215
216 BUG_ON(bip->bip_buf == NULL);
217
218 if (bi->tag_size == 0)
219 return -1;
220
221 nr_sectors = bio_integrity_hw_sectors(bi,
222 DIV_ROUND_UP(len, bi->tag_size));
223
224 if (nr_sectors * bi->tuple_size > bip->bip_size) {
225 printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
226 __func__, nr_sectors * bi->tuple_size, bip->bip_size);
227 return -1;
228 }
229
230 if (set)
231 bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
232 else
233 bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
234
235 return 0;
236}
237
238/**
239 * bio_integrity_set_tag - Attach a tag buffer to a bio
240 * @bio: bio to attach buffer to
241 * @tag_buf: Pointer to a buffer containing tag data
242 * @len: Length of the included buffer
243 *
244 * Description: Use this function to tag a bio by leveraging the extra
245 * space provided by devices formatted with integrity protection. The
246 * size of the integrity buffer must be <= to the size reported by
247 * bio_integrity_tag_size().
248 */
249int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
250{
251 BUG_ON(bio_data_dir(bio) != WRITE);
252
253 return bio_integrity_tag(bio, tag_buf, len, 1);
254}
255EXPORT_SYMBOL(bio_integrity_set_tag);
256
257/**
258 * bio_integrity_get_tag - Retrieve a tag buffer from a bio
259 * @bio: bio to retrieve buffer from
260 * @tag_buf: Pointer to a buffer for the tag data
261 * @len: Length of the target buffer
262 *
263 * Description: Use this function to retrieve the tag buffer from a
264 * completed I/O. The size of the integrity buffer must be <= to the
265 * size reported by bio_integrity_tag_size().
266 */
267int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
268{
269 BUG_ON(bio_data_dir(bio) != READ);
270
271 return bio_integrity_tag(bio, tag_buf, len, 0);
272}
273EXPORT_SYMBOL(bio_integrity_get_tag);
274
275/**
276 * bio_integrity_generate - Generate integrity metadata for a bio
277 * @bio: bio to generate integrity metadata for
278 *
279 * Description: Generates integrity metadata for a bio by calling the
280 * block device's generation callback function. The bio must have a
281 * bip attached with enough room to accommodate the generated
282 * integrity metadata.
283 */
284static void bio_integrity_generate(struct bio *bio)
285{
286 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
287 struct blk_integrity_exchg bix;
288 struct bio_vec *bv;
289 sector_t sector = bio->bi_sector;
290 unsigned int i, sectors, total;
291 void *prot_buf = bio->bi_integrity->bip_buf;
292
293 total = 0;
294 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
295 bix.sector_size = bi->sector_size;
296
297 bio_for_each_segment(bv, bio, i) {
298 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
299 bix.data_buf = kaddr + bv->bv_offset;
300 bix.data_size = bv->bv_len;
301 bix.prot_buf = prot_buf;
302 bix.sector = sector;
303
304 bi->generate_fn(&bix);
305
306 sectors = bv->bv_len / bi->sector_size;
307 sector += sectors;
308 prot_buf += sectors * bi->tuple_size;
309 total += sectors * bi->tuple_size;
310 BUG_ON(total > bio->bi_integrity->bip_size);
311
312 kunmap_atomic(kaddr, KM_USER0);
313 }
314}
315
316/**
317 * bio_integrity_prep - Prepare bio for integrity I/O
318 * @bio: bio to prepare
319 *
320 * Description: Allocates a buffer for integrity metadata, maps the
321 * pages and attaches them to a bio. The bio must have data
322 * direction, target device and start sector set priot to calling. In
323 * the WRITE case, integrity metadata will be generated using the
324 * block device's integrity function. In the READ case, the buffer
325 * will be prepared for DMA and a suitable end_io handler set up.
326 */
327int bio_integrity_prep(struct bio *bio)
328{
329 struct bio_integrity_payload *bip;
330 struct blk_integrity *bi;
331 struct request_queue *q;
332 void *buf;
333 unsigned long start, end;
334 unsigned int len, nr_pages;
335 unsigned int bytes, offset, i;
336 unsigned int sectors;
337
338 bi = bdev_get_integrity(bio->bi_bdev);
339 q = bdev_get_queue(bio->bi_bdev);
340 BUG_ON(bi == NULL);
341 BUG_ON(bio_integrity(bio));
342
343 sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
344
345 /* Allocate kernel buffer for protection data */
346 len = sectors * blk_integrity_tuple_size(bi);
347 buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
348 if (unlikely(buf == NULL)) {
349 printk(KERN_ERR "could not allocate integrity buffer\n");
350 return -EIO;
351 }
352
353 end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
354 start = ((unsigned long) buf) >> PAGE_SHIFT;
355 nr_pages = end - start;
356
357 /* Allocate bio integrity payload and integrity vectors */
358 bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
359 if (unlikely(bip == NULL)) {
360 printk(KERN_ERR "could not allocate data integrity bioset\n");
361 kfree(buf);
362 return -EIO;
363 }
364
365 bip->bip_buf = buf;
366 bip->bip_size = len;
367 bip->bip_sector = bio->bi_sector;
368
369 /* Map it */
370 offset = offset_in_page(buf);
371 for (i = 0 ; i < nr_pages ; i++) {
372 int ret;
373 bytes = PAGE_SIZE - offset;
374
375 if (len <= 0)
376 break;
377
378 if (bytes > len)
379 bytes = len;
380
381 ret = bio_integrity_add_page(bio, virt_to_page(buf),
382 bytes, offset);
383
384 if (ret == 0)
385 return 0;
386
387 if (ret < bytes)
388 break;
389
390 buf += bytes;
391 len -= bytes;
392 offset = 0;
393 }
394
395 /* Install custom I/O completion handler if read verify is enabled */
396 if (bio_data_dir(bio) == READ) {
397 bip->bip_end_io = bio->bi_end_io;
398 bio->bi_end_io = bio_integrity_endio;
399 }
400
401 /* Auto-generate integrity metadata if this is a write */
402 if (bio_data_dir(bio) == WRITE)
403 bio_integrity_generate(bio);
404
405 return 0;
406}
407EXPORT_SYMBOL(bio_integrity_prep);
408
409/**
410 * bio_integrity_verify - Verify integrity metadata for a bio
411 * @bio: bio to verify
412 *
413 * Description: This function is called to verify the integrity of a
414 * bio. The data in the bio io_vec is compared to the integrity
415 * metadata returned by the HBA.
416 */
417static int bio_integrity_verify(struct bio *bio)
418{
419 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
420 struct blk_integrity_exchg bix;
421 struct bio_vec *bv;
422 sector_t sector = bio->bi_integrity->bip_sector;
423 unsigned int i, sectors, total, ret;
424 void *prot_buf = bio->bi_integrity->bip_buf;
425
426 ret = total = 0;
427 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
428 bix.sector_size = bi->sector_size;
429
430 bio_for_each_segment(bv, bio, i) {
431 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
432 bix.data_buf = kaddr + bv->bv_offset;
433 bix.data_size = bv->bv_len;
434 bix.prot_buf = prot_buf;
435 bix.sector = sector;
436
437 ret = bi->verify_fn(&bix);
438
439 if (ret) {
440 kunmap_atomic(kaddr, KM_USER0);
441 break;
442 }
443
444 sectors = bv->bv_len / bi->sector_size;
445 sector += sectors;
446 prot_buf += sectors * bi->tuple_size;
447 total += sectors * bi->tuple_size;
448 BUG_ON(total > bio->bi_integrity->bip_size);
449
450 kunmap_atomic(kaddr, KM_USER0);
451 }
452
453 return ret;
454}
455
456/**
457 * bio_integrity_verify_fn - Integrity I/O completion worker
458 * @work: Work struct stored in bio to be verified
459 *
460 * Description: This workqueue function is called to complete a READ
461 * request. The function verifies the transferred integrity metadata
462 * and then calls the original bio end_io function.
463 */
464static void bio_integrity_verify_fn(struct work_struct *work)
465{
466 struct bio_integrity_payload *bip =
467 container_of(work, struct bio_integrity_payload, bip_work);
468 struct bio *bio = bip->bip_bio;
469 int error = bip->bip_error;
470
471 if (bio_integrity_verify(bio)) {
472 clear_bit(BIO_UPTODATE, &bio->bi_flags);
473 error = -EIO;
474 }
475
476 /* Restore original bio completion handler */
477 bio->bi_end_io = bip->bip_end_io;
478
479 if (bio->bi_end_io)
480 bio->bi_end_io(bio, error);
481}
482
483/**
484 * bio_integrity_endio - Integrity I/O completion function
485 * @bio: Protected bio
486 * @error: Pointer to errno
487 *
488 * Description: Completion for integrity I/O
489 *
490 * Normally I/O completion is done in interrupt context. However,
491 * verifying I/O integrity is a time-consuming task which must be run
492 * in process context. This function postpones completion
493 * accordingly.
494 */
495void bio_integrity_endio(struct bio *bio, int error)
496{
497 struct bio_integrity_payload *bip = bio->bi_integrity;
498
499 BUG_ON(bip->bip_bio != bio);
500
501 bip->bip_error = error;
502 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
503 queue_work(kintegrityd_wq, &bip->bip_work);
504}
505EXPORT_SYMBOL(bio_integrity_endio);
506
507/**
508 * bio_integrity_mark_head - Advance bip_vec skip bytes
509 * @bip: Integrity vector to advance
510 * @skip: Number of bytes to advance it
511 */
512void bio_integrity_mark_head(struct bio_integrity_payload *bip,
513 unsigned int skip)
514{
515 struct bio_vec *iv;
516 unsigned int i;
517
518 bip_for_each_vec(iv, bip, i) {
519 if (skip == 0) {
520 bip->bip_idx = i;
521 return;
522 } else if (skip >= iv->bv_len) {
523 skip -= iv->bv_len;
524 } else { /* skip < iv->bv_len) */
525 iv->bv_offset += skip;
526 iv->bv_len -= skip;
527 bip->bip_idx = i;
528 return;
529 }
530 }
531}
532
533/**
534 * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
535 * @bip: Integrity vector to truncate
536 * @len: New length of integrity vector
537 */
538void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
539 unsigned int len)
540{
541 struct bio_vec *iv;
542 unsigned int i;
543
544 bip_for_each_vec(iv, bip, i) {
545 if (len == 0) {
546 bip->bip_vcnt = i;
547 return;
548 } else if (len >= iv->bv_len) {
549 len -= iv->bv_len;
550 } else { /* len < iv->bv_len) */
551 iv->bv_len = len;
552 len = 0;
553 }
554 }
555}
556
557/**
558 * bio_integrity_advance - Advance integrity vector
559 * @bio: bio whose integrity vector to update
560 * @bytes_done: number of data bytes that have been completed
561 *
562 * Description: This function calculates how many integrity bytes the
563 * number of completed data bytes correspond to and advances the
564 * integrity vector accordingly.
565 */
566void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
567{
568 struct bio_integrity_payload *bip = bio->bi_integrity;
569 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
570 unsigned int nr_sectors;
571
572 BUG_ON(bip == NULL);
573 BUG_ON(bi == NULL);
574
575 nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
576 bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
577}
578EXPORT_SYMBOL(bio_integrity_advance);
579
580/**
581 * bio_integrity_trim - Trim integrity vector
582 * @bio: bio whose integrity vector to update
583 * @offset: offset to first data sector
584 * @sectors: number of data sectors
585 *
586 * Description: Used to trim the integrity vector in a cloned bio.
587 * The ivec will be advanced corresponding to 'offset' data sectors
588 * and the length will be truncated corresponding to 'len' data
589 * sectors.
590 */
591void bio_integrity_trim(struct bio *bio, unsigned int offset,
592 unsigned int sectors)
593{
594 struct bio_integrity_payload *bip = bio->bi_integrity;
595 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
596 unsigned int nr_sectors;
597
598 BUG_ON(bip == NULL);
599 BUG_ON(bi == NULL);
600 BUG_ON(!bio_flagged(bio, BIO_CLONED));
601
602 nr_sectors = bio_integrity_hw_sectors(bi, sectors);
603 bip->bip_sector = bip->bip_sector + offset;
604 bio_integrity_mark_head(bip, offset * bi->tuple_size);
605 bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
606}
607EXPORT_SYMBOL(bio_integrity_trim);
608
609/**
610 * bio_integrity_split - Split integrity metadata
611 * @bio: Protected bio
612 * @bp: Resulting bio_pair
613 * @sectors: Offset
614 *
615 * Description: Splits an integrity page into a bio_pair.
616 */
617void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
618{
619 struct blk_integrity *bi;
620 struct bio_integrity_payload *bip = bio->bi_integrity;
621 unsigned int nr_sectors;
622
623 if (bio_integrity(bio) == 0)
624 return;
625
626 bi = bdev_get_integrity(bio->bi_bdev);
627 BUG_ON(bi == NULL);
628 BUG_ON(bip->bip_vcnt != 1);
629
630 nr_sectors = bio_integrity_hw_sectors(bi, sectors);
631
632 bp->bio1.bi_integrity = &bp->bip1;
633 bp->bio2.bi_integrity = &bp->bip2;
634
635 bp->iv1 = bip->bip_vec[0];
636 bp->iv2 = bip->bip_vec[0];
637
638 bp->bip1.bip_vec = &bp->iv1;
639 bp->bip2.bip_vec = &bp->iv2;
640
641 bp->iv1.bv_len = sectors * bi->tuple_size;
642 bp->iv2.bv_offset += sectors * bi->tuple_size;
643 bp->iv2.bv_len -= sectors * bi->tuple_size;
644
645 bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
646 bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
647
648 bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
649 bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
650}
651EXPORT_SYMBOL(bio_integrity_split);
652
653/**
654 * bio_integrity_clone - Callback for cloning bios with integrity metadata
655 * @bio: New bio
656 * @bio_src: Original bio
657 * @bs: bio_set to allocate bip from
658 *
659 * Description: Called to allocate a bip when cloning a bio
660 */
661int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
662 struct bio_set *bs)
663{
664 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
665 struct bio_integrity_payload *bip;
666
667 BUG_ON(bip_src == NULL);
668
669 bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs);
670
671 if (bip == NULL)
672 return -EIO;
673
674 memcpy(bip->bip_vec, bip_src->bip_vec,
675 bip_src->bip_vcnt * sizeof(struct bio_vec));
676
677 bip->bip_sector = bip_src->bip_sector;
678 bip->bip_vcnt = bip_src->bip_vcnt;
679 bip->bip_idx = bip_src->bip_idx;
680
681 return 0;
682}
683EXPORT_SYMBOL(bio_integrity_clone);
684
685int bioset_integrity_create(struct bio_set *bs, int pool_size)
686{
687 bs->bio_integrity_pool = mempool_create_slab_pool(pool_size,
688 bio_integrity_slab);
689 if (!bs->bio_integrity_pool)
690 return -1;
691
692 return 0;
693}
694EXPORT_SYMBOL(bioset_integrity_create);
695
696void bioset_integrity_free(struct bio_set *bs)
697{
698 if (bs->bio_integrity_pool)
699 mempool_destroy(bs->bio_integrity_pool);
700}
701EXPORT_SYMBOL(bioset_integrity_free);
702
703void __init bio_integrity_init_slab(void)
704{
705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
706 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
707}
708EXPORT_SYMBOL(bio_integrity_init_slab);
709
710static int __init integrity_init(void)
711{
712 kintegrityd_wq = create_workqueue("kintegrityd");
713
714 if (!kintegrityd_wq)
715 panic("Failed to create kintegrityd\n");
716
717 return 0;
718}
719subsys_initcall(integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 78562574cb52..88322b066acb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -28,25 +28,10 @@
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 29#include <scsi/sg.h> /* for struct sg_iovec */
30 30
31#define BIO_POOL_SIZE 2
32
33static struct kmem_cache *bio_slab __read_mostly; 31static struct kmem_cache *bio_slab __read_mostly;
34 32
35#define BIOVEC_NR_POOLS 6
36
37/*
38 * a small number of entries is fine, not going to be performance critical.
39 * basically we just need to survive
40 */
41#define BIO_SPLIT_ENTRIES 2
42mempool_t *bio_split_pool __read_mostly; 33mempool_t *bio_split_pool __read_mostly;
43 34
44struct biovec_slab {
45 int nr_vecs;
46 char *name;
47 struct kmem_cache *slab;
48};
49
50/* 35/*
51 * if you change this list, also change bvec_alloc or things will 36 * if you change this list, also change bvec_alloc or things will
52 * break badly! cannot be bigger than what you can fit into an 37 * break badly! cannot be bigger than what you can fit into an
@@ -60,23 +45,17 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
60#undef BV 45#undef BV
61 46
62/* 47/*
63 * bio_set is used to allow other portions of the IO system to
64 * allocate their own private memory pools for bio and iovec structures.
65 * These memory pools in turn all allocate from the bio_slab
66 * and the bvec_slabs[].
67 */
68struct bio_set {
69 mempool_t *bio_pool;
70 mempool_t *bvec_pools[BIOVEC_NR_POOLS];
71};
72
73/*
74 * fs_bio_set is the bio_set containing bio and iovec memory pools used by 48 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
75 * IO code that does not need private memory pools. 49 * IO code that does not need private memory pools.
76 */ 50 */
77static struct bio_set *fs_bio_set; 51struct bio_set *fs_bio_set;
52
53unsigned int bvec_nr_vecs(unsigned short idx)
54{
55 return bvec_slabs[idx].nr_vecs;
56}
78 57
79static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 58struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
80{ 59{
81 struct bio_vec *bvl; 60 struct bio_vec *bvl;
82 61
@@ -117,6 +96,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set)
117 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); 96 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 } 97 }
119 98
99 if (bio_integrity(bio))
100 bio_integrity_free(bio, bio_set);
101
120 mempool_free(bio, bio_set->bio_pool); 102 mempool_free(bio, bio_set->bio_pool);
121} 103}
122 104
@@ -275,9 +257,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
275{ 257{
276 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); 258 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
277 259
278 if (b) { 260 if (!b)
279 b->bi_destructor = bio_fs_destructor; 261 return NULL;
280 __bio_clone(b, bio); 262
263 b->bi_destructor = bio_fs_destructor;
264 __bio_clone(b, bio);
265
266 if (bio_integrity(bio)) {
267 int ret;
268
269 ret = bio_integrity_clone(b, bio, fs_bio_set);
270
271 if (ret < 0)
272 return NULL;
281 } 273 }
282 274
283 return b; 275 return b;
@@ -333,10 +325,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
333 if (page == prev->bv_page && 325 if (page == prev->bv_page &&
334 offset == prev->bv_offset + prev->bv_len) { 326 offset == prev->bv_offset + prev->bv_len) {
335 prev->bv_len += len; 327 prev->bv_len += len;
336 if (q->merge_bvec_fn && 328
337 q->merge_bvec_fn(q, bio, prev) < len) { 329 if (q->merge_bvec_fn) {
338 prev->bv_len -= len; 330 struct bvec_merge_data bvm = {
339 return 0; 331 .bi_bdev = bio->bi_bdev,
332 .bi_sector = bio->bi_sector,
333 .bi_size = bio->bi_size,
334 .bi_rw = bio->bi_rw,
335 };
336
337 if (q->merge_bvec_fn(q, &bvm, prev) < len) {
338 prev->bv_len -= len;
339 return 0;
340 }
340 } 341 }
341 342
342 goto done; 343 goto done;
@@ -377,11 +378,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
377 * queue to get further control 378 * queue to get further control
378 */ 379 */
379 if (q->merge_bvec_fn) { 380 if (q->merge_bvec_fn) {
381 struct bvec_merge_data bvm = {
382 .bi_bdev = bio->bi_bdev,
383 .bi_sector = bio->bi_sector,
384 .bi_size = bio->bi_size,
385 .bi_rw = bio->bi_rw,
386 };
387
380 /* 388 /*
381 * merge_bvec_fn() returns number of bytes it can accept 389 * merge_bvec_fn() returns number of bytes it can accept
382 * at this offset 390 * at this offset
383 */ 391 */
384 if (q->merge_bvec_fn(q, bio, bvec) < len) { 392 if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
385 bvec->bv_page = NULL; 393 bvec->bv_page = NULL;
386 bvec->bv_len = 0; 394 bvec->bv_len = 0;
387 bvec->bv_offset = 0; 395 bvec->bv_offset = 0;
@@ -1249,6 +1257,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1249 bp->bio1.bi_private = bi; 1257 bp->bio1.bi_private = bi;
1250 bp->bio2.bi_private = pool; 1258 bp->bio2.bi_private = pool;
1251 1259
1260 if (bio_integrity(bi))
1261 bio_integrity_split(bi, bp, first_sectors);
1262
1252 return bp; 1263 return bp;
1253} 1264}
1254 1265
@@ -1290,6 +1301,7 @@ void bioset_free(struct bio_set *bs)
1290 if (bs->bio_pool) 1301 if (bs->bio_pool)
1291 mempool_destroy(bs->bio_pool); 1302 mempool_destroy(bs->bio_pool);
1292 1303
1304 bioset_integrity_free(bs);
1293 biovec_free_pools(bs); 1305 biovec_free_pools(bs);
1294 1306
1295 kfree(bs); 1307 kfree(bs);
@@ -1306,6 +1318,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
1306 if (!bs->bio_pool) 1318 if (!bs->bio_pool)
1307 goto bad; 1319 goto bad;
1308 1320
1321 if (bioset_integrity_create(bs, bio_pool_size))
1322 goto bad;
1323
1309 if (!biovec_create_pools(bs, bvec_pool_size)) 1324 if (!biovec_create_pools(bs, bvec_pool_size))
1310 return bs; 1325 return bs;
1311 1326
@@ -1332,6 +1347,7 @@ static int __init init_bio(void)
1332{ 1347{
1333 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1348 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1334 1349
1350 bio_integrity_init_slab();
1335 biovec_init_slabs(); 1351 biovec_init_slabs();
1336 1352
1337 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1353 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 9590b9024300..78f613cb9c76 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -45,6 +45,7 @@ const struct file_operations ramfs_file_operations = {
45 .mmap = generic_file_mmap, 45 .mmap = generic_file_mmap,
46 .fsync = simple_sync_file, 46 .fsync = simple_sync_file,
47 .splice_read = generic_file_splice_read, 47 .splice_read = generic_file_splice_read,
48 .splice_write = generic_file_splice_write,
48 .llseek = generic_file_llseek, 49 .llseek = generic_file_llseek,
49}; 50};
50 51
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0989bc2c2f69..52312ec93ff4 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,6 +43,7 @@ const struct file_operations ramfs_file_operations = {
43 .aio_write = generic_file_aio_write, 43 .aio_write = generic_file_aio_write,
44 .fsync = simple_sync_file, 44 .fsync = simple_sync_file,
45 .splice_read = generic_file_splice_read, 45 .splice_read = generic_file_splice_read,
46 .splice_write = generic_file_splice_write,
46 .llseek = generic_file_llseek, 47 .llseek = generic_file_llseek,
47}; 48};
48 49
diff --git a/fs/splice.c b/fs/splice.c
index aa5f6f60b305..399442179d89 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -379,13 +379,22 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
379 lock_page(page); 379 lock_page(page);
380 380
381 /* 381 /*
382 * page was truncated, stop here. if this isn't the 382 * Page was truncated, or invalidated by the
383 * first page, we'll just complete what we already 383 * filesystem. Redo the find/create, but this time the
384 * added 384 * page is kept locked, so there's no chance of another
385 * race with truncate/invalidate.
385 */ 386 */
386 if (!page->mapping) { 387 if (!page->mapping) {
387 unlock_page(page); 388 unlock_page(page);
388 break; 389 page = find_or_create_page(mapping, index,
390 mapping_gfp_mask(mapping));
391
392 if (!page) {
393 error = -ENOMEM;
394 break;
395 }
396 page_cache_release(pages[page_nr]);
397 pages[page_nr] = page;
389 } 398 }
390 /* 399 /*
391 * page was already under io and is now done, great 400 * page was already under io and is now done, great
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 61c15eaf3fb3..0933a14e6414 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -64,6 +64,7 @@ struct bio_vec {
64 64
65struct bio_set; 65struct bio_set;
66struct bio; 66struct bio;
67struct bio_integrity_payload;
67typedef void (bio_end_io_t) (struct bio *, int); 68typedef void (bio_end_io_t) (struct bio *, int);
68typedef void (bio_destructor_t) (struct bio *); 69typedef void (bio_destructor_t) (struct bio *);
69 70
@@ -112,6 +113,9 @@ struct bio {
112 atomic_t bi_cnt; /* pin count */ 113 atomic_t bi_cnt; /* pin count */
113 114
114 void *bi_private; 115 void *bi_private;
116#if defined(CONFIG_BLK_DEV_INTEGRITY)
117 struct bio_integrity_payload *bi_integrity; /* data integrity */
118#endif
115 119
116 bio_destructor_t *bi_destructor; /* destructor */ 120 bio_destructor_t *bi_destructor; /* destructor */
117}; 121};
@@ -271,6 +275,29 @@ static inline void *bio_data(struct bio *bio)
271 */ 275 */
272#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) 276#define bio_get(bio) atomic_inc(&(bio)->bi_cnt)
273 277
278#if defined(CONFIG_BLK_DEV_INTEGRITY)
279/*
280 * bio integrity payload
281 */
282struct bio_integrity_payload {
283 struct bio *bip_bio; /* parent bio */
284 struct bio_vec *bip_vec; /* integrity data vector */
285
286 sector_t bip_sector; /* virtual start sector */
287
288 void *bip_buf; /* generated integrity data */
289 bio_end_io_t *bip_end_io; /* saved I/O completion fn */
290
291 int bip_error; /* saved I/O error */
292 unsigned int bip_size;
293
294 unsigned short bip_pool; /* pool the ivec came from */
295 unsigned short bip_vcnt; /* # of integrity bio_vecs */
296 unsigned short bip_idx; /* current bip_vec index */
297
298 struct work_struct bip_work; /* I/O completion */
299};
300#endif /* CONFIG_BLK_DEV_INTEGRITY */
274 301
275/* 302/*
276 * A bio_pair is used when we need to split a bio. 303 * A bio_pair is used when we need to split a bio.
@@ -283,10 +310,14 @@ static inline void *bio_data(struct bio *bio)
283 * in bio2.bi_private 310 * in bio2.bi_private
284 */ 311 */
285struct bio_pair { 312struct bio_pair {
286 struct bio bio1, bio2; 313 struct bio bio1, bio2;
287 struct bio_vec bv1, bv2; 314 struct bio_vec bv1, bv2;
288 atomic_t cnt; 315#if defined(CONFIG_BLK_DEV_INTEGRITY)
289 int error; 316 struct bio_integrity_payload bip1, bip2;
317 struct bio_vec iv1, iv2;
318#endif
319 atomic_t cnt;
320 int error;
290}; 321};
291extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, 322extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool,
292 int first_sectors); 323 int first_sectors);
@@ -333,6 +364,39 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *,
333 int, int); 364 int, int);
334extern int bio_uncopy_user(struct bio *); 365extern int bio_uncopy_user(struct bio *);
335void zero_fill_bio(struct bio *bio); 366void zero_fill_bio(struct bio *bio);
367extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
368extern unsigned int bvec_nr_vecs(unsigned short idx);
369
370/*
371 * bio_set is used to allow other portions of the IO system to
372 * allocate their own private memory pools for bio and iovec structures.
373 * These memory pools in turn all allocate from the bio_slab
374 * and the bvec_slabs[].
375 */
376#define BIO_POOL_SIZE 2
377#define BIOVEC_NR_POOLS 6
378
379struct bio_set {
380 mempool_t *bio_pool;
381#if defined(CONFIG_BLK_DEV_INTEGRITY)
382 mempool_t *bio_integrity_pool;
383#endif
384 mempool_t *bvec_pools[BIOVEC_NR_POOLS];
385};
386
387struct biovec_slab {
388 int nr_vecs;
389 char *name;
390 struct kmem_cache *slab;
391};
392
393extern struct bio_set *fs_bio_set;
394
395/*
396 * a small number of entries is fine, not going to be performance critical.
397 * basically we just need to survive
398 */
399#define BIO_SPLIT_ENTRIES 2
336 400
337#ifdef CONFIG_HIGHMEM 401#ifdef CONFIG_HIGHMEM
338/* 402/*
@@ -381,5 +445,63 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
381 __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) 445 __bio_kmap_irq((bio), (bio)->bi_idx, (flags))
382#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) 446#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
383 447
448#if defined(CONFIG_BLK_DEV_INTEGRITY)
449
450#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
451#define bip_vec(bip) bip_vec_idx(bip, 0)
452
453#define __bip_for_each_vec(bvl, bip, i, start_idx) \
454 for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \
455 i < (bip)->bip_vcnt; \
456 bvl++, i++)
457
458#define bip_for_each_vec(bvl, bip, i) \
459 __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
460
461static inline int bio_integrity(struct bio *bio)
462{
463#if defined(CONFIG_BLK_DEV_INTEGRITY)
464 return bio->bi_integrity != NULL;
465#else
466 return 0;
467#endif
468}
469
470extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
471extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
472extern void bio_integrity_free(struct bio *, struct bio_set *);
473extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
474extern int bio_integrity_enabled(struct bio *bio);
475extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
476extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
477extern int bio_integrity_prep(struct bio *);
478extern void bio_integrity_endio(struct bio *, int);
479extern void bio_integrity_advance(struct bio *, unsigned int);
480extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
481extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
482extern int bio_integrity_clone(struct bio *, struct bio *, struct bio_set *);
483extern int bioset_integrity_create(struct bio_set *, int);
484extern void bioset_integrity_free(struct bio_set *);
485extern void bio_integrity_init_slab(void);
486
487#else /* CONFIG_BLK_DEV_INTEGRITY */
488
489#define bio_integrity(a) (0)
490#define bioset_integrity_create(a, b) (0)
491#define bio_integrity_prep(a) (0)
492#define bio_integrity_enabled(a) (0)
493#define bio_integrity_clone(a, b, c) (0)
494#define bioset_integrity_free(a) do { } while (0)
495#define bio_integrity_free(a, b) do { } while (0)
496#define bio_integrity_endio(a, b) do { } while (0)
497#define bio_integrity_advance(a, b) do { } while (0)
498#define bio_integrity_trim(a, b, c) do { } while (0)
499#define bio_integrity_split(a, b, c) do { } while (0)
500#define bio_integrity_set_tag(a, b, c) do { } while (0)
501#define bio_integrity_get_tag(a, b, c) do { } while (0)
502#define bio_integrity_init_slab(a) do { } while (0)
503
504#endif /* CONFIG_BLK_DEV_INTEGRITY */
505
384#endif /* CONFIG_BLOCK */ 506#endif /* CONFIG_BLOCK */
385#endif /* __LINUX_BIO_H */ 507#endif /* __LINUX_BIO_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2a1b71e93c3..1ffd8bfdc4c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -23,7 +23,6 @@
23struct scsi_ioctl_command; 23struct scsi_ioctl_command;
24 24
25struct request_queue; 25struct request_queue;
26typedef struct request_queue request_queue_t __deprecated;
27struct elevator_queue; 26struct elevator_queue;
28typedef struct elevator_queue elevator_t; 27typedef struct elevator_queue elevator_t;
29struct request_pm_state; 28struct request_pm_state;
@@ -34,12 +33,6 @@ struct sg_io_hdr;
34#define BLKDEV_MIN_RQ 4 33#define BLKDEV_MIN_RQ 4
35#define BLKDEV_MAX_RQ 128 /* Default maximum */ 34#define BLKDEV_MAX_RQ 128 /* Default maximum */
36 35
37int put_io_context(struct io_context *ioc);
38void exit_io_context(void);
39struct io_context *get_io_context(gfp_t gfp_flags, int node);
40struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
41void copy_io_context(struct io_context **pdst, struct io_context **psrc);
42
43struct request; 36struct request;
44typedef void (rq_end_io_fn)(struct request *, int); 37typedef void (rq_end_io_fn)(struct request *, int);
45 38
@@ -113,6 +106,7 @@ enum rq_flag_bits {
113 __REQ_ALLOCED, /* request came from our alloc pool */ 106 __REQ_ALLOCED, /* request came from our alloc pool */
114 __REQ_RW_META, /* metadata io request */ 107 __REQ_RW_META, /* metadata io request */
115 __REQ_COPY_USER, /* contains copies of user pages */ 108 __REQ_COPY_USER, /* contains copies of user pages */
109 __REQ_INTEGRITY, /* integrity metadata has been remapped */
116 __REQ_NR_BITS, /* stops here */ 110 __REQ_NR_BITS, /* stops here */
117}; 111};
118 112
@@ -135,6 +129,7 @@ enum rq_flag_bits {
135#define REQ_ALLOCED (1 << __REQ_ALLOCED) 129#define REQ_ALLOCED (1 << __REQ_ALLOCED)
136#define REQ_RW_META (1 << __REQ_RW_META) 130#define REQ_RW_META (1 << __REQ_RW_META)
137#define REQ_COPY_USER (1 << __REQ_COPY_USER) 131#define REQ_COPY_USER (1 << __REQ_COPY_USER)
132#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
138 133
139#define BLK_MAX_CDB 16 134#define BLK_MAX_CDB 16
140 135
@@ -259,7 +254,14 @@ typedef int (prep_rq_fn) (struct request_queue *, struct request *);
259typedef void (unplug_fn) (struct request_queue *); 254typedef void (unplug_fn) (struct request_queue *);
260 255
261struct bio_vec; 256struct bio_vec;
262typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); 257struct bvec_merge_data {
258 struct block_device *bi_bdev;
259 sector_t bi_sector;
260 unsigned bi_size;
261 unsigned long bi_rw;
262};
263typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
264 struct bio_vec *);
263typedef void (prepare_flush_fn) (struct request_queue *, struct request *); 265typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
264typedef void (softirq_done_fn)(struct request *); 266typedef void (softirq_done_fn)(struct request *);
265typedef int (dma_drain_needed_fn)(struct request *); 267typedef int (dma_drain_needed_fn)(struct request *);
@@ -426,6 +428,32 @@ static inline void queue_flag_set_unlocked(unsigned int flag,
426 __set_bit(flag, &q->queue_flags); 428 __set_bit(flag, &q->queue_flags);
427} 429}
428 430
431static inline int queue_flag_test_and_clear(unsigned int flag,
432 struct request_queue *q)
433{
434 WARN_ON_ONCE(!queue_is_locked(q));
435
436 if (test_bit(flag, &q->queue_flags)) {
437 __clear_bit(flag, &q->queue_flags);
438 return 1;
439 }
440
441 return 0;
442}
443
444static inline int queue_flag_test_and_set(unsigned int flag,
445 struct request_queue *q)
446{
447 WARN_ON_ONCE(!queue_is_locked(q));
448
449 if (!test_bit(flag, &q->queue_flags)) {
450 __set_bit(flag, &q->queue_flags);
451 return 0;
452 }
453
454 return 1;
455}
456
429static inline void queue_flag_set(unsigned int flag, struct request_queue *q) 457static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
430{ 458{
431 WARN_ON_ONCE(!queue_is_locked(q)); 459 WARN_ON_ONCE(!queue_is_locked(q));
@@ -676,7 +704,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
676 struct request *, int); 704 struct request *, int);
677extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 705extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
678 struct request *, int, rq_end_io_fn *); 706 struct request *, int, rq_end_io_fn *);
679extern int blk_verify_command(unsigned char *, int);
680extern void blk_unplug(struct request_queue *q); 707extern void blk_unplug(struct request_queue *q);
681 708
682static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 709static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -749,6 +776,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
749extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); 776extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
750extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); 777extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
751extern void blk_queue_dma_pad(struct request_queue *, unsigned int); 778extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
779extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
752extern int blk_queue_dma_drain(struct request_queue *q, 780extern int blk_queue_dma_drain(struct request_queue *q,
753 dma_drain_needed_fn *dma_drain_needed, 781 dma_drain_needed_fn *dma_drain_needed,
754 void *buf, unsigned int size); 782 void *buf, unsigned int size);
@@ -802,6 +830,15 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
802 830
803extern int blkdev_issue_flush(struct block_device *, sector_t *); 831extern int blkdev_issue_flush(struct block_device *, sector_t *);
804 832
833/*
834* command filter functions
835*/
836extern int blk_verify_command(struct file *file, unsigned char *cmd);
837extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
838 unsigned char *cmd, mode_t *f_mode);
839extern int blk_register_filter(struct gendisk *disk);
840extern void blk_unregister_filter(struct gendisk *disk);
841
805#define MAX_PHYS_SEGMENTS 128 842#define MAX_PHYS_SEGMENTS 128
806#define MAX_HW_SEGMENTS 128 843#define MAX_HW_SEGMENTS 128
807#define SAFE_MAX_SECTORS 255 844#define SAFE_MAX_SECTORS 255
@@ -865,28 +902,116 @@ void kblockd_flush_work(struct work_struct *work);
865#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 902#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
866 MODULE_ALIAS("block-major-" __stringify(major) "-*") 903 MODULE_ALIAS("block-major-" __stringify(major) "-*")
867 904
905#if defined(CONFIG_BLK_DEV_INTEGRITY)
868 906
869#else /* CONFIG_BLOCK */ 907#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */
870/* 908#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */
871 * stubs for when the block layer is configured out
872 */
873#define buffer_heads_over_limit 0
874 909
875static inline long nr_blockdev_pages(void) 910struct blk_integrity_exchg {
911 void *prot_buf;
912 void *data_buf;
913 sector_t sector;
914 unsigned int data_size;
915 unsigned short sector_size;
916 const char *disk_name;
917};
918
919typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
920typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
921typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
922typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
923
924struct blk_integrity {
925 integrity_gen_fn *generate_fn;
926 integrity_vrfy_fn *verify_fn;
927 integrity_set_tag_fn *set_tag_fn;
928 integrity_get_tag_fn *get_tag_fn;
929
930 unsigned short flags;
931 unsigned short tuple_size;
932 unsigned short sector_size;
933 unsigned short tag_size;
934
935 const char *name;
936
937 struct kobject kobj;
938};
939
940extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
941extern void blk_integrity_unregister(struct gendisk *);
942extern int blk_integrity_compare(struct block_device *, struct block_device *);
943extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
944extern int blk_rq_count_integrity_sg(struct request *);
945
946static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
876{ 947{
948 if (bi)
949 return bi->tuple_size;
950
877 return 0; 951 return 0;
878} 952}
879 953
880static inline void exit_io_context(void) 954static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
881{ 955{
956 return bdev->bd_disk->integrity;
882} 957}
883 958
884struct io_context; 959static inline unsigned int bdev_get_tag_size(struct block_device *bdev)
885static inline int put_io_context(struct io_context *ioc)
886{ 960{
887 return 1; 961 struct blk_integrity *bi = bdev_get_integrity(bdev);
962
963 if (bi)
964 return bi->tag_size;
965
966 return 0;
967}
968
969static inline int bdev_integrity_enabled(struct block_device *bdev, int rw)
970{
971 struct blk_integrity *bi = bdev_get_integrity(bdev);
972
973 if (bi == NULL)
974 return 0;
975
976 if (rw == READ && bi->verify_fn != NULL &&
977 (bi->flags & INTEGRITY_FLAG_READ))
978 return 1;
979
980 if (rw == WRITE && bi->generate_fn != NULL &&
981 (bi->flags & INTEGRITY_FLAG_WRITE))
982 return 1;
983
984 return 0;
888} 985}
889 986
987static inline int blk_integrity_rq(struct request *rq)
988{
989 return bio_integrity(rq->bio);
990}
991
992#else /* CONFIG_BLK_DEV_INTEGRITY */
993
994#define blk_integrity_rq(rq) (0)
995#define blk_rq_count_integrity_sg(a) (0)
996#define blk_rq_map_integrity_sg(a, b) (0)
997#define bdev_get_integrity(a) (0)
998#define bdev_get_tag_size(a) (0)
999#define blk_integrity_compare(a, b) (0)
1000#define blk_integrity_register(a, b) (0)
1001#define blk_integrity_unregister(a) do { } while (0);
1002
1003#endif /* CONFIG_BLK_DEV_INTEGRITY */
1004
1005#else /* CONFIG_BLOCK */
1006/*
1007 * stubs for when the block layer is configured out
1008 */
1009#define buffer_heads_over_limit 0
1010
1011static inline long nr_blockdev_pages(void)
1012{
1013 return 0;
1014}
890 1015
891#endif /* CONFIG_BLOCK */ 1016#endif /* CONFIG_BLOCK */
892 1017
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index e3ef903aae88..d084b8d227a5 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -129,6 +129,7 @@ struct blk_trace {
129 u32 dev; 129 u32 dev;
130 struct dentry *dir; 130 struct dentry *dir;
131 struct dentry *dropped_file; 131 struct dentry *dropped_file;
132 struct dentry *msg_file;
132 atomic_t dropped; 133 atomic_t dropped;
133}; 134};
134 135
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ae7aec3cabee..e8787417f65a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -110,6 +110,14 @@ struct hd_struct {
110#define GENHD_FL_SUPPRESS_PARTITION_INFO 32 110#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
111#define GENHD_FL_FAIL 64 111#define GENHD_FL_FAIL 64
112 112
113#define BLK_SCSI_MAX_CMDS (256)
114#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
115
116struct blk_scsi_cmd_filter {
117 unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
118 unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
119 struct kobject kobj;
120};
113 121
114struct gendisk { 122struct gendisk {
115 int major; /* major number of driver */ 123 int major; /* major number of driver */
@@ -120,6 +128,7 @@ struct gendisk {
120 struct hd_struct **part; /* [indexed by minor] */ 128 struct hd_struct **part; /* [indexed by minor] */
121 struct block_device_operations *fops; 129 struct block_device_operations *fops;
122 struct request_queue *queue; 130 struct request_queue *queue;
131 struct blk_scsi_cmd_filter cmd_filter;
123 void *private_data; 132 void *private_data;
124 sector_t capacity; 133 sector_t capacity;
125 134
@@ -141,6 +150,9 @@ struct gendisk {
141 struct disk_stats dkstats; 150 struct disk_stats dkstats;
142#endif 151#endif
143 struct work_struct async_notify; 152 struct work_struct async_notify;
153#ifdef CONFIG_BLK_DEV_INTEGRITY
154 struct blk_integrity *integrity;
155#endif
144}; 156};
145 157
146/* 158/*
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 2b7a1187cb29..08b987bccf89 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -99,4 +99,22 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
99 return NULL; 99 return NULL;
100} 100}
101 101
102#ifdef CONFIG_BLOCK
103int put_io_context(struct io_context *ioc);
104void exit_io_context(void);
105struct io_context *get_io_context(gfp_t gfp_flags, int node);
106struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
107void copy_io_context(struct io_context **pdst, struct io_context **psrc);
108#else
109static inline void exit_io_context(void)
110{
111}
112
113struct io_context;
114static inline int put_io_context(struct io_context *ioc)
115{
116 return 1;
117}
118#endif
119
102#endif 120#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 8f6185e69b69..ceb258782835 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -13,6 +13,7 @@
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h> 15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h>
16#include <linux/key.h> 17#include <linux/key.h>
17#include <linux/security.h> 18#include <linux/security.h>
18#include <linux/cpu.h> 19#include <linux/cpu.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf80..b71ccd09fc8d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -23,6 +23,7 @@
23#include <linux/sem.h> 23#include <linux/sem.h>
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/fdtable.h> 25#include <linux/fdtable.h>
26#include <linux/iocontext.h>
26#include <linux/key.h> 27#include <linux/key.h>
27#include <linux/binfmts.h> 28#include <linux/binfmts.h>
28#include <linux/mman.h> 29#include <linux/mman.h>