aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-09-30 15:07:01 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-30 15:07:01 -0400
commit56f29d7fe452890eeeb7f2b0138b2d95b9745fb6 (patch)
tree01b6b70297c53c9beb8d11eb186fbad9c166b1a2
parente823aff2d6eb43083abcc75a32ddfb167c324089 (diff)
parent059af497c23492cb1ddcbba11c09dad385960bc0 (diff)
Merge branch 'block' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'block' of git://brick.kernel.dk/data/git/linux-2.6-block: (67 commits) [PATCH] blk_queue_start_tag() shared map race fix [PATCH] Update axboe@suse.de email address [PATCH] fix creating zero sized bio mempools in low memory system [PATCH] CONFIG_BLOCK: blk_congestion_wait() fix [PATCH] CONFIG_BLOCK internal.h cleanups [PATCH] BLOCK: Make USB storage depend on SCSI rather than selecting it [try #6] [PATCH] BLOCK: Make it possible to disable the block layer [try #6] [PATCH] BLOCK: Remove no-longer necessary linux/buffer_head.h inclusions [try #6] [PATCH] BLOCK: Remove no-longer necessary linux/mpage.h inclusions [try #6] [PATCH] BLOCK: Move the msdos device ioctl compat stuff to the msdos driver [try #6] [PATCH] BLOCK: Move the Ext3 device ioctl compat stuff to the Ext3 driver [try #6] [PATCH] BLOCK: Move the Ext2 device ioctl compat stuff to the Ext2 driver [try #6] [PATCH] BLOCK: Move the ReiserFS device ioctl compat stuff to the ReiserFS driver [try #6] [PATCH] BLOCK: Move common FS-specific ioctls to linux/fs.h [try #6] [PATCH] BLOCK: Move the loop device ioctl compat stuff to the loop driver [try #6] [PATCH] BLOCK: Move __invalidate_device() to block_dev.c [try #6] [PATCH] BLOCK: Dissociate generic_writepages() from mpage stuff [try #6] [PATCH] BLOCK: Remove dependence on existence of blockdev_superblock [try #6] [PATCH] BLOCK: Move extern declarations out of fs/*.c into header files [try #6] [PATCH] BLOCK: Don't call block_sync_page() from AFS [try #6] ...
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/mips/kernel/signal_n32.c4
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--block/Kconfig20
-rw-r--r--block/Kconfig.iosched3
-rw-r--r--block/Makefile2
-rw-r--r--block/as-iosched.c672
-rw-r--r--block/blktrace.c26
-rw-r--r--block/cfq-iosched.c865
-rw-r--r--block/deadline-iosched.c464
-rw-r--r--block/elevator.c315
-rw-r--r--block/ll_rw_blk.c236
-rw-r--r--block/noop-iosched.c2
-rw-r--r--block/scsi_ioctl.c6
-rw-r--r--drivers/block/DAC960.c2
-rw-r--r--drivers/block/Kconfig4
-rw-r--r--drivers/block/cciss.c1
-rw-r--r--drivers/block/cpqarray.c1
-rw-r--r--drivers/block/floppy.c4
-rw-r--r--drivers/block/loop.c160
-rw-r--r--drivers/block/nbd.c8
-rw-r--r--drivers/block/paride/pd.c6
-rw-r--r--drivers/block/pktcdvd.c8
-rw-r--r--drivers/block/swim3.c4
-rw-r--r--drivers/block/swim_iop.c4
-rw-r--r--drivers/block/xd.c2
-rw-r--r--drivers/cdrom/Kconfig2
-rw-r--r--drivers/cdrom/cdrom.c2
-rw-r--r--drivers/cdrom/cdu31a.c4
-rw-r--r--drivers/char/Kconfig1
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/fc4/fc.c1
-rw-r--r--drivers/ide/Kconfig4
-rw-r--r--drivers/ide/ide-cd.c69
-rw-r--r--drivers/ide/ide-disk.c5
-rw-r--r--drivers/ide/ide-dma.c2
-rw-r--r--drivers/ide/ide-floppy.c17
-rw-r--r--drivers/ide/ide-io.c50
-rw-r--r--drivers/ide/ide-lib.c5
-rw-r--r--drivers/ide/ide-tape.c14
-rw-r--r--drivers/ide/ide-taskfile.c8
-rw-r--r--drivers/ide/ide.c8
-rw-r--r--drivers/ide/legacy/hd.c2
-rw-r--r--drivers/md/Kconfig3
-rw-r--r--drivers/md/dm-emc.c3
-rw-r--r--drivers/message/i2o/Kconfig2
-rw-r--r--drivers/message/i2o/i2o_block.c7
-rw-r--r--drivers/mmc/Kconfig2
-rw-r--r--drivers/mmc/Makefile3
-rw-r--r--drivers/mmc/mmc_queue.c6
-rw-r--r--drivers/mtd/Kconfig12
-rw-r--r--drivers/mtd/devices/Kconfig2
-rw-r--r--drivers/mtd/mtd_blkdevs.c2
-rw-r--r--drivers/s390/block/Kconfig2
-rw-r--r--drivers/s390/block/dasd_diag.c2
-rw-r--r--drivers/s390/block/dasd_eckd.c2
-rw-r--r--drivers/s390/block/dasd_fba.c2
-rw-r--r--drivers/scsi/Kconfig2
-rw-r--r--drivers/scsi/aic7xxx_old.c4
-rw-r--r--drivers/scsi/ide-scsi.c16
-rw-r--r--drivers/scsi/pluto.c6
-rw-r--r--drivers/scsi/scsi.c13
-rw-r--r--drivers/scsi/scsi_lib.c37
-rw-r--r--drivers/scsi/sd.c5
-rw-r--r--drivers/scsi/sun3_NCR5380.c2
-rw-r--r--drivers/scsi/sun3_scsi.c2
-rw-r--r--drivers/scsi/sun3_scsi_vme.c2
-rw-r--r--drivers/usb/storage/Kconfig5
-rw-r--r--fs/Kconfig31
-rw-r--r--fs/Makefile14
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/binfmt_elf.c1
-rw-r--r--fs/bio.c4
-rw-r--r--fs/block_dev.c23
-rw-r--r--fs/buffer.c174
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/cifs/inode.c1
-rw-r--r--fs/cifs/ioctl.c7
-rw-r--r--fs/compat.c10
-rw-r--r--fs/compat_ioctl.c208
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/ext2/dir.c3
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c6
-rw-r--r--fs/ext2/ioctl.c32
-rw-r--r--fs/ext3/dir.c3
-rw-r--r--fs/ext3/file.c3
-rw-r--r--fs/ext3/inode.c5
-rw-r--r--fs/ext3/ioctl.c55
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/fat/dir.c56
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/hfsplus/hfsplus_fs.h8
-rw-r--r--fs/hfsplus/ioctl.c17
-rw-r--r--fs/inode.c21
-rw-r--r--fs/internal.h55
-rw-r--r--fs/ioprio.c19
-rw-r--r--fs/jfs/ioctl.c15
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/nfs/write.c1
-rw-r--r--fs/no-block.c22
-rw-r--r--fs/partitions/Makefile2
-rw-r--r--fs/proc/proc_misc.c11
-rw-r--r--fs/quota.c44
-rw-r--r--fs/reiserfs/dir.c3
-rw-r--r--fs/reiserfs/file.c4
-rw-r--r--fs/reiserfs/ioctl.c35
-rw-r--r--fs/splice.c2
-rw-r--r--fs/super.c35
-rw-r--r--fs/sync.c113
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blkdev.h333
-rw-r--r--include/linux/blktrace_api.h3
-rw-r--r--include/linux/buffer_head.h19
-rw-r--r--include/linux/compat_ioctl.h8
-rw-r--r--include/linux/elevator.h68
-rw-r--r--include/linux/ext2_fs.h64
-rw-r--r--include/linux/ext3_fs.h26
-rw-r--r--include/linux/fs.h67
-rw-r--r--include/linux/genhd.h4
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/mpage.h7
-rw-r--r--include/linux/raid/md.h3
-rw-r--r--include/linux/raid/md_k.h3
-rw-r--r--include/linux/ramfs.h1
-rw-r--r--include/linux/rbtree.h2
-rw-r--r--include/linux/reiserfs_fs.h37
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/tty.h3
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/scsi/scsi_tcq.h5
-rw-r--r--init/Kconfig2
-rw-r--r--init/do_mounts.c13
-rw-r--r--kernel/compat.c2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--lib/rbtree.c6
-rw-r--r--mm/Makefile3
-rw-r--r--mm/bounce.c302
-rw-r--r--mm/filemap.c34
-rw-r--r--mm/highmem.c281
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/page-writeback.c143
-rw-r--r--mm/truncate.c26
147 files changed, 2841 insertions, 2884 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 28e1c7b9244a..f0cd5a3f6de6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -501,7 +501,7 @@ S: Maintained
501 501
502BLOCK LAYER 502BLOCK LAYER
503P: Jens Axboe 503P: Jens Axboe
504M: axboe@suse.de 504M: axboe@kernel.dk
505L: linux-kernel@vger.kernel.org 505L: linux-kernel@vger.kernel.org
506T: git kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git 506T: git kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
507S: Maintained 507S: Maintained
@@ -1380,7 +1380,7 @@ S: Maintained
1380 1380
1381IDE/ATAPI CDROM DRIVER 1381IDE/ATAPI CDROM DRIVER
1382P: Jens Axboe 1382P: Jens Axboe
1383M: axboe@suse.de 1383M: axboe@kernel.dk
1384L: linux-kernel@vger.kernel.org 1384L: linux-kernel@vger.kernel.org
1385W: http://www.kernel.dk 1385W: http://www.kernel.dk
1386S: Maintained 1386S: Maintained
@@ -2531,7 +2531,7 @@ S: Maintained
2531 2531
2532SCSI CDROM DRIVER 2532SCSI CDROM DRIVER
2533P: Jens Axboe 2533P: Jens Axboe
2534M: axboe@suse.de 2534M: axboe@kernel.dk
2535L: linux-scsi@vger.kernel.org 2535L: linux-scsi@vger.kernel.org
2536W: http://www.kernel.dk 2536W: http://www.kernel.dk
2537S: Maintained 2537S: Maintained
@@ -2976,7 +2976,7 @@ S: Maintained
2976 2976
2977UNIFORM CDROM DRIVER 2977UNIFORM CDROM DRIVER
2978P: Jens Axboe 2978P: Jens Axboe
2979M: axboe@suse.de 2979M: axboe@kernel.dk
2980L: linux-kernel@vger.kernel.org 2980L: linux-kernel@vger.kernel.org
2981W: http://www.kernel.dk 2981W: http://www.kernel.dk
2982S: Maintained 2982S: Maintained
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index 477c5334ec1b..50c17eaa7f25 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -42,6 +42,8 @@
42 42
43#include "signal-common.h" 43#include "signal-common.h"
44 44
45extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
46
45/* 47/*
46 * Including <asm/unistd.h> would give use the 64-bit syscall numbers ... 48 * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
47 */ 49 */
@@ -81,8 +83,6 @@ struct rt_sigframe_n32 {
81#endif 83#endif
82}; 84};
83 85
84extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
85
86save_static_function(sysn32_rt_sigsuspend); 86save_static_function(sysn32_rt_sigsuspend);
87__attribute_used__ noinline static int 87__attribute_used__ noinline static int
88_sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs) 88_sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 5fa4c8e258a4..fda4a3940698 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -981,8 +981,6 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
981 __u64 offset; 981 __u64 offset;
982 int len; 982 int len;
983 983
984 if(req->rq_status == RQ_INACTIVE) return(1);
985
986 /* This should be impossible now */ 984 /* This should be impossible now */
987 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ 985 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
988 printk("Write attempted on readonly ubd device %s\n", 986 printk("Write attempted on readonly ubd device %s\n",
diff --git a/block/Kconfig b/block/Kconfig
index b6f5f0a79655..9af6c614dfde 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -1,6 +1,24 @@
1# 1#
2# Block layer core configuration 2# Block layer core configuration
3# 3#
4config BLOCK
5 bool "Enable the block layer"
6 default y
7 help
8 This permits the block layer to be removed from the kernel if it's not
9 needed (on some embedded devices for example). If this option is
10 disabled, then blockdev files will become unusable and some
11 filesystems (such as ext3) will become unavailable.
12
13 This option will also disable SCSI character devices and USB storage
14 since they make use of various block layer definitions and
15 facilities.
16
17 Say Y here unless you know you really don't want to mount disks and
18 suchlike.
19
20if BLOCK
21
4#XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64 22#XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
5#for instance. 23#for instance.
6config LBD 24config LBD
@@ -33,4 +51,6 @@ config LSF
33 51
34 If unsure, say Y. 52 If unsure, say Y.
35 53
54endif
55
36source block/Kconfig.iosched 56source block/Kconfig.iosched
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 48d090e266fc..903f0d3b6852 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -1,3 +1,4 @@
1if BLOCK
1 2
2menu "IO Schedulers" 3menu "IO Schedulers"
3 4
@@ -67,3 +68,5 @@ config DEFAULT_IOSCHED
67 default "noop" if DEFAULT_NOOP 68 default "noop" if DEFAULT_NOOP
68 69
69endmenu 70endmenu
71
72endif
diff --git a/block/Makefile b/block/Makefile
index c05de0e0037f..4b84d0d5947b 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the kernel block layer 2# Makefile for the kernel block layer
3# 3#
4 4
5obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o 5obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
6 6
7obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 7obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
8obj-$(CONFIG_IOSCHED_AS) += as-iosched.o 8obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 5da56d48fbd3..165509e8659e 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Anticipatory & deadline i/o scheduler. 2 * Anticipatory & deadline i/o scheduler.
3 * 3 *
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
5 * Nick Piggin <nickpiggin@yahoo.com.au> 5 * Nick Piggin <nickpiggin@yahoo.com.au>
6 * 6 *
7 */ 7 */
@@ -14,7 +14,6 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <linux/hash.h>
18#include <linux/rbtree.h> 17#include <linux/rbtree.h>
19#include <linux/interrupt.h> 18#include <linux/interrupt.h>
20 19
@@ -93,9 +92,8 @@ struct as_data {
93 struct rb_root sort_list[2]; 92 struct rb_root sort_list[2];
94 struct list_head fifo_list[2]; 93 struct list_head fifo_list[2];
95 94
96 struct as_rq *next_arq[2]; /* next in sort order */ 95 struct request *next_rq[2]; /* next in sort order */
97 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ 96 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
98 struct hlist_head *hash; /* request hash */
99 97
100 unsigned long exit_prob; /* probability a task will exit while 98 unsigned long exit_prob; /* probability a task will exit while
101 being waited on */ 99 being waited on */
@@ -115,7 +113,6 @@ struct as_data {
115 int write_batch_count; /* max # of reqs in a write batch */ 113 int write_batch_count; /* max # of reqs in a write batch */
116 int current_write_count; /* how many requests left this batch */ 114 int current_write_count; /* how many requests left this batch */
117 int write_batch_idled; /* has the write batch gone idle? */ 115 int write_batch_idled; /* has the write batch gone idle? */
118 mempool_t *arq_pool;
119 116
120 enum anticipation_status antic_status; 117 enum anticipation_status antic_status;
121 unsigned long antic_start; /* jiffies: when it started */ 118 unsigned long antic_start; /* jiffies: when it started */
@@ -133,8 +130,6 @@ struct as_data {
133 unsigned long antic_expire; 130 unsigned long antic_expire;
134}; 131};
135 132
136#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)
137
138/* 133/*
139 * per-request data. 134 * per-request data.
140 */ 135 */
@@ -150,40 +145,14 @@ enum arq_state {
150 AS_RQ_POSTSCHED, /* when they shouldn't be */ 145 AS_RQ_POSTSCHED, /* when they shouldn't be */
151}; 146};
152 147
153struct as_rq { 148#define RQ_IOC(rq) ((struct io_context *) (rq)->elevator_private)
154 /* 149#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
155 * rbtree index, key is the starting offset 150#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)
156 */
157 struct rb_node rb_node;
158 sector_t rb_key;
159
160 struct request *request;
161
162 struct io_context *io_context; /* The submitting task */
163
164 /*
165 * request hash, key is the ending offset (for back merge lookup)
166 */
167 struct hlist_node hash;
168
169 /*
170 * expire fifo
171 */
172 struct list_head fifo;
173 unsigned long expires;
174 151
175 unsigned int is_sync; 152static DEFINE_PER_CPU(unsigned long, ioc_count);
176 enum arq_state state;
177};
178
179#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)
180
181static kmem_cache_t *arq_pool;
182
183static atomic_t ioc_count = ATOMIC_INIT(0);
184static struct completion *ioc_gone; 153static struct completion *ioc_gone;
185 154
186static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); 155static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
187static void as_antic_stop(struct as_data *ad); 156static void as_antic_stop(struct as_data *ad);
188 157
189/* 158/*
@@ -194,7 +163,8 @@ static void as_antic_stop(struct as_data *ad);
194static void free_as_io_context(struct as_io_context *aic) 163static void free_as_io_context(struct as_io_context *aic)
195{ 164{
196 kfree(aic); 165 kfree(aic);
197 if (atomic_dec_and_test(&ioc_count) && ioc_gone) 166 elv_ioc_count_dec(ioc_count);
167 if (ioc_gone && !elv_ioc_count_read(ioc_count))
198 complete(ioc_gone); 168 complete(ioc_gone);
199} 169}
200 170
@@ -230,7 +200,7 @@ static struct as_io_context *alloc_as_io_context(void)
230 ret->seek_total = 0; 200 ret->seek_total = 0;
231 ret->seek_samples = 0; 201 ret->seek_samples = 0;
232 ret->seek_mean = 0; 202 ret->seek_mean = 0;
233 atomic_inc(&ioc_count); 203 elv_ioc_count_inc(ioc_count);
234 } 204 }
235 205
236 return ret; 206 return ret;
@@ -240,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void)
240 * If the current task has no AS IO context then create one and initialise it. 210 * If the current task has no AS IO context then create one and initialise it.
241 * Then take a ref on the task's io context and return it. 211 * Then take a ref on the task's io context and return it.
242 */ 212 */
243static struct io_context *as_get_io_context(void) 213static struct io_context *as_get_io_context(int node)
244{ 214{
245 struct io_context *ioc = get_io_context(GFP_ATOMIC); 215 struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
246 if (ioc && !ioc->aic) { 216 if (ioc && !ioc->aic) {
247 ioc->aic = alloc_as_io_context(); 217 ioc->aic = alloc_as_io_context();
248 if (!ioc->aic) { 218 if (!ioc->aic) {
@@ -253,194 +223,43 @@ static struct io_context *as_get_io_context(void)
253 return ioc; 223 return ioc;
254} 224}
255 225
256static void as_put_io_context(struct as_rq *arq) 226static void as_put_io_context(struct request *rq)
257{ 227{
258 struct as_io_context *aic; 228 struct as_io_context *aic;
259 229
260 if (unlikely(!arq->io_context)) 230 if (unlikely(!RQ_IOC(rq)))
261 return; 231 return;
262 232
263 aic = arq->io_context->aic; 233 aic = RQ_IOC(rq)->aic;
264 234
265 if (arq->is_sync == REQ_SYNC && aic) { 235 if (rq_is_sync(rq) && aic) {
266 spin_lock(&aic->lock); 236 spin_lock(&aic->lock);
267 set_bit(AS_TASK_IORUNNING, &aic->state); 237 set_bit(AS_TASK_IORUNNING, &aic->state);
268 aic->last_end_request = jiffies; 238 aic->last_end_request = jiffies;
269 spin_unlock(&aic->lock); 239 spin_unlock(&aic->lock);
270 } 240 }
271 241
272 put_io_context(arq->io_context); 242 put_io_context(RQ_IOC(rq));
273}
274
275/*
276 * the back merge hash support functions
277 */
278static const int as_hash_shift = 6;
279#define AS_HASH_BLOCK(sec) ((sec) >> 3)
280#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
281#define AS_HASH_ENTRIES (1 << as_hash_shift)
282#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
283
284static inline void __as_del_arq_hash(struct as_rq *arq)
285{
286 hlist_del_init(&arq->hash);
287}
288
289static inline void as_del_arq_hash(struct as_rq *arq)
290{
291 if (!hlist_unhashed(&arq->hash))
292 __as_del_arq_hash(arq);
293}
294
295static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
296{
297 struct request *rq = arq->request;
298
299 BUG_ON(!hlist_unhashed(&arq->hash));
300
301 hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
302}
303
304/*
305 * move hot entry to front of chain
306 */
307static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
308{
309 struct request *rq = arq->request;
310 struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
311
312 if (hlist_unhashed(&arq->hash)) {
313 WARN_ON(1);
314 return;
315 }
316
317 if (&arq->hash != head->first) {
318 hlist_del(&arq->hash);
319 hlist_add_head(&arq->hash, head);
320 }
321}
322
323static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
324{
325 struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
326 struct hlist_node *entry, *next;
327 struct as_rq *arq;
328
329 hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
330 struct request *__rq = arq->request;
331
332 BUG_ON(hlist_unhashed(&arq->hash));
333
334 if (!rq_mergeable(__rq)) {
335 as_del_arq_hash(arq);
336 continue;
337 }
338
339 if (rq_hash_key(__rq) == offset)
340 return __rq;
341 }
342
343 return NULL;
344} 243}
345 244
346/* 245/*
347 * rb tree support functions 246 * rb tree support functions
348 */ 247 */
349#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) 248#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))])
350#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])
351#define rq_rb_key(rq) (rq)->sector
352
353/*
354 * as_find_first_arq finds the first (lowest sector numbered) request
355 * for the specified data_dir. Used to sweep back to the start of the disk
356 * (1-way elevator) after we process the last (highest sector) request.
357 */
358static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
359{
360 struct rb_node *n = ad->sort_list[data_dir].rb_node;
361
362 if (n == NULL)
363 return NULL;
364
365 for (;;) {
366 if (n->rb_left == NULL)
367 return rb_entry_arq(n);
368
369 n = n->rb_left;
370 }
371}
372
373/*
374 * Add the request to the rb tree if it is unique. If there is an alias (an
375 * existing request against the same sector), which can happen when using
376 * direct IO, then return the alias.
377 */
378static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
379{
380 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
381 struct rb_node *parent = NULL;
382 struct as_rq *__arq;
383 struct request *rq = arq->request;
384
385 arq->rb_key = rq_rb_key(rq);
386
387 while (*p) {
388 parent = *p;
389 __arq = rb_entry_arq(parent);
390
391 if (arq->rb_key < __arq->rb_key)
392 p = &(*p)->rb_left;
393 else if (arq->rb_key > __arq->rb_key)
394 p = &(*p)->rb_right;
395 else
396 return __arq;
397 }
398
399 rb_link_node(&arq->rb_node, parent, p);
400 rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
401
402 return NULL;
403}
404 249
405static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) 250static void as_add_rq_rb(struct as_data *ad, struct request *rq)
406{ 251{
407 struct as_rq *alias; 252 struct request *alias;
408 253
409 while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { 254 while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
410 as_move_to_dispatch(ad, alias); 255 as_move_to_dispatch(ad, alias);
411 as_antic_stop(ad); 256 as_antic_stop(ad);
412 } 257 }
413} 258}
414 259
415static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) 260static inline void as_del_rq_rb(struct as_data *ad, struct request *rq)
416{
417 if (!RB_EMPTY_NODE(&arq->rb_node)) {
418 WARN_ON(1);
419 return;
420 }
421
422 rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
423 RB_CLEAR_NODE(&arq->rb_node);
424}
425
426static struct request *
427as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
428{ 261{
429 struct rb_node *n = ad->sort_list[data_dir].rb_node; 262 elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
430 struct as_rq *arq;
431
432 while (n) {
433 arq = rb_entry_arq(n);
434
435 if (sector < arq->rb_key)
436 n = n->rb_left;
437 else if (sector > arq->rb_key)
438 n = n->rb_right;
439 else
440 return arq->request;
441 }
442
443 return NULL;
444} 263}
445 264
446/* 265/*
@@ -458,26 +277,26 @@ as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir)
458 * as_choose_req selects the preferred one of two requests of the same data_dir 277 * as_choose_req selects the preferred one of two requests of the same data_dir
459 * ignoring time - eg. timeouts, which is the job of as_dispatch_request 278 * ignoring time - eg. timeouts, which is the job of as_dispatch_request
460 */ 279 */
461static struct as_rq * 280static struct request *
462as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) 281as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
463{ 282{
464 int data_dir; 283 int data_dir;
465 sector_t last, s1, s2, d1, d2; 284 sector_t last, s1, s2, d1, d2;
466 int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ 285 int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */
467 const sector_t maxback = MAXBACK; 286 const sector_t maxback = MAXBACK;
468 287
469 if (arq1 == NULL || arq1 == arq2) 288 if (rq1 == NULL || rq1 == rq2)
470 return arq2; 289 return rq2;
471 if (arq2 == NULL) 290 if (rq2 == NULL)
472 return arq1; 291 return rq1;
473 292
474 data_dir = arq1->is_sync; 293 data_dir = rq_is_sync(rq1);
475 294
476 last = ad->last_sector[data_dir]; 295 last = ad->last_sector[data_dir];
477 s1 = arq1->request->sector; 296 s1 = rq1->sector;
478 s2 = arq2->request->sector; 297 s2 = rq2->sector;
479 298
480 BUG_ON(data_dir != arq2->is_sync); 299 BUG_ON(data_dir != rq_is_sync(rq2));
481 300
482 /* 301 /*
483 * Strict one way elevator _except_ in the case where we allow 302 * Strict one way elevator _except_ in the case where we allow
@@ -504,61 +323,58 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
504 323
505 /* Found required data */ 324 /* Found required data */
506 if (!r1_wrap && r2_wrap) 325 if (!r1_wrap && r2_wrap)
507 return arq1; 326 return rq1;
508 else if (!r2_wrap && r1_wrap) 327 else if (!r2_wrap && r1_wrap)
509 return arq2; 328 return rq2;
510 else if (r1_wrap && r2_wrap) { 329 else if (r1_wrap && r2_wrap) {
511 /* both behind the head */ 330 /* both behind the head */
512 if (s1 <= s2) 331 if (s1 <= s2)
513 return arq1; 332 return rq1;
514 else 333 else
515 return arq2; 334 return rq2;
516 } 335 }
517 336
518 /* Both requests in front of the head */ 337 /* Both requests in front of the head */
519 if (d1 < d2) 338 if (d1 < d2)
520 return arq1; 339 return rq1;
521 else if (d2 < d1) 340 else if (d2 < d1)
522 return arq2; 341 return rq2;
523 else { 342 else {
524 if (s1 >= s2) 343 if (s1 >= s2)
525 return arq1; 344 return rq1;
526 else 345 else
527 return arq2; 346 return rq2;
528 } 347 }
529} 348}
530 349
531/* 350/*
532 * as_find_next_arq finds the next request after @prev in elevator order. 351 * as_find_next_rq finds the next request after @prev in elevator order.
533 * this with as_choose_req form the basis for how the scheduler chooses 352 * this with as_choose_req form the basis for how the scheduler chooses
534 * what request to process next. Anticipation works on top of this. 353 * what request to process next. Anticipation works on top of this.
535 */ 354 */
536static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) 355static struct request *
356as_find_next_rq(struct as_data *ad, struct request *last)
537{ 357{
538 const int data_dir = last->is_sync;
539 struct as_rq *ret;
540 struct rb_node *rbnext = rb_next(&last->rb_node); 358 struct rb_node *rbnext = rb_next(&last->rb_node);
541 struct rb_node *rbprev = rb_prev(&last->rb_node); 359 struct rb_node *rbprev = rb_prev(&last->rb_node);
542 struct as_rq *arq_next, *arq_prev; 360 struct request *next = NULL, *prev = NULL;
543 361
544 BUG_ON(!RB_EMPTY_NODE(&last->rb_node)); 362 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
545 363
546 if (rbprev) 364 if (rbprev)
547 arq_prev = rb_entry_arq(rbprev); 365 prev = rb_entry_rq(rbprev);
548 else
549 arq_prev = NULL;
550 366
551 if (rbnext) 367 if (rbnext)
552 arq_next = rb_entry_arq(rbnext); 368 next = rb_entry_rq(rbnext);
553 else { 369 else {
554 arq_next = as_find_first_arq(ad, data_dir); 370 const int data_dir = rq_is_sync(last);
555 if (arq_next == last)
556 arq_next = NULL;
557 }
558 371
559 ret = as_choose_req(ad, arq_next, arq_prev); 372 rbnext = rb_first(&ad->sort_list[data_dir]);
373 if (rbnext && rbnext != &last->rb_node)
374 next = rb_entry_rq(rbnext);
375 }
560 376
561 return ret; 377 return as_choose_req(ad, next, prev);
562} 378}
563 379
564/* 380/*
@@ -712,8 +528,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
712static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, 528static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
713 struct request *rq) 529 struct request *rq)
714{ 530{
715 struct as_rq *arq = RQ_DATA(rq); 531 int data_dir = rq_is_sync(rq);
716 int data_dir = arq->is_sync;
717 unsigned long thinktime = 0; 532 unsigned long thinktime = 0;
718 sector_t seek_dist; 533 sector_t seek_dist;
719 534
@@ -752,11 +567,11 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
752 * previous one issued. 567 * previous one issued.
753 */ 568 */
754static int as_close_req(struct as_data *ad, struct as_io_context *aic, 569static int as_close_req(struct as_data *ad, struct as_io_context *aic,
755 struct as_rq *arq) 570 struct request *rq)
756{ 571{
757 unsigned long delay; /* milliseconds */ 572 unsigned long delay; /* milliseconds */
758 sector_t last = ad->last_sector[ad->batch_data_dir]; 573 sector_t last = ad->last_sector[ad->batch_data_dir];
759 sector_t next = arq->request->sector; 574 sector_t next = rq->sector;
760 sector_t delta; /* acceptable close offset (in sectors) */ 575 sector_t delta; /* acceptable close offset (in sectors) */
761 sector_t s; 576 sector_t s;
762 577
@@ -813,7 +628,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
813 * 628 *
814 * If this task has queued some other IO, do not enter enticipation. 629 * If this task has queued some other IO, do not enter enticipation.
815 */ 630 */
816static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) 631static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
817{ 632{
818 struct io_context *ioc; 633 struct io_context *ioc;
819 struct as_io_context *aic; 634 struct as_io_context *aic;
@@ -821,7 +636,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
821 ioc = ad->io_context; 636 ioc = ad->io_context;
822 BUG_ON(!ioc); 637 BUG_ON(!ioc);
823 638
824 if (arq && ioc == arq->io_context) { 639 if (rq && ioc == RQ_IOC(rq)) {
825 /* request from same process */ 640 /* request from same process */
826 return 1; 641 return 1;
827 } 642 }
@@ -848,7 +663,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
848 return 1; 663 return 1;
849 } 664 }
850 665
851 if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { 666 if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) {
852 /* 667 /*
853 * Found a close request that is not one of ours. 668 * Found a close request that is not one of ours.
854 * 669 *
@@ -864,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
864 ad->exit_no_coop = (7*ad->exit_no_coop)/8; 679 ad->exit_no_coop = (7*ad->exit_no_coop)/8;
865 } 680 }
866 681
867 as_update_iohist(ad, aic, arq->request); 682 as_update_iohist(ad, aic, rq);
868 return 1; 683 return 1;
869 } 684 }
870 685
@@ -891,10 +706,10 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
891} 706}
892 707
893/* 708/*
894 * as_can_anticipate indicates whether we should either run arq 709 * as_can_anticipate indicates whether we should either run rq
895 * or keep anticipating a better request. 710 * or keep anticipating a better request.
896 */ 711 */
897static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) 712static int as_can_anticipate(struct as_data *ad, struct request *rq)
898{ 713{
899 if (!ad->io_context) 714 if (!ad->io_context)
900 /* 715 /*
@@ -908,7 +723,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
908 */ 723 */
909 return 0; 724 return 0;
910 725
911 if (as_can_break_anticipation(ad, arq)) 726 if (as_can_break_anticipation(ad, rq))
912 /* 727 /*
913 * This request is a good candidate. Don't keep anticipating, 728 * This request is a good candidate. Don't keep anticipating,
914 * run it. 729 * run it.
@@ -926,16 +741,16 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
926} 741}
927 742
928/* 743/*
929 * as_update_arq must be called whenever a request (arq) is added to 744 * as_update_rq must be called whenever a request (rq) is added to
930 * the sort_list. This function keeps caches up to date, and checks if the 745 * the sort_list. This function keeps caches up to date, and checks if the
931 * request might be one we are "anticipating" 746 * request might be one we are "anticipating"
932 */ 747 */
933static void as_update_arq(struct as_data *ad, struct as_rq *arq) 748static void as_update_rq(struct as_data *ad, struct request *rq)
934{ 749{
935 const int data_dir = arq->is_sync; 750 const int data_dir = rq_is_sync(rq);
936 751
937 /* keep the next_arq cache up to date */ 752 /* keep the next_rq cache up to date */
938 ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); 753 ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]);
939 754
940 /* 755 /*
941 * have we been anticipating this request? 756 * have we been anticipating this request?
@@ -944,7 +759,7 @@ static void as_update_arq(struct as_data *ad, struct as_rq *arq)
944 */ 759 */
945 if (ad->antic_status == ANTIC_WAIT_REQ 760 if (ad->antic_status == ANTIC_WAIT_REQ
946 || ad->antic_status == ANTIC_WAIT_NEXT) { 761 || ad->antic_status == ANTIC_WAIT_NEXT) {
947 if (as_can_break_anticipation(ad, arq)) 762 if (as_can_break_anticipation(ad, rq))
948 as_antic_stop(ad); 763 as_antic_stop(ad);
949 } 764 }
950} 765}
@@ -984,12 +799,11 @@ static void update_write_batch(struct as_data *ad)
984static void as_completed_request(request_queue_t *q, struct request *rq) 799static void as_completed_request(request_queue_t *q, struct request *rq)
985{ 800{
986 struct as_data *ad = q->elevator->elevator_data; 801 struct as_data *ad = q->elevator->elevator_data;
987 struct as_rq *arq = RQ_DATA(rq);
988 802
989 WARN_ON(!list_empty(&rq->queuelist)); 803 WARN_ON(!list_empty(&rq->queuelist));
990 804
991 if (arq->state != AS_RQ_REMOVED) { 805 if (RQ_STATE(rq) != AS_RQ_REMOVED) {
992 printk("arq->state %d\n", arq->state); 806 printk("rq->state %d\n", RQ_STATE(rq));
993 WARN_ON(1); 807 WARN_ON(1);
994 goto out; 808 goto out;
995 } 809 }
@@ -1009,14 +823,14 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
1009 * actually serviced. This should help devices with big TCQ windows 823 * actually serviced. This should help devices with big TCQ windows
1010 * and writeback caches 824 * and writeback caches
1011 */ 825 */
1012 if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { 826 if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
1013 update_write_batch(ad); 827 update_write_batch(ad);
1014 ad->current_batch_expires = jiffies + 828 ad->current_batch_expires = jiffies +
1015 ad->batch_expire[REQ_SYNC]; 829 ad->batch_expire[REQ_SYNC];
1016 ad->new_batch = 0; 830 ad->new_batch = 0;
1017 } 831 }
1018 832
1019 if (ad->io_context == arq->io_context && ad->io_context) { 833 if (ad->io_context == RQ_IOC(rq) && ad->io_context) {
1020 ad->antic_start = jiffies; 834 ad->antic_start = jiffies;
1021 ad->ioc_finished = 1; 835 ad->ioc_finished = 1;
1022 if (ad->antic_status == ANTIC_WAIT_REQ) { 836 if (ad->antic_status == ANTIC_WAIT_REQ) {
@@ -1028,9 +842,9 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
1028 } 842 }
1029 } 843 }
1030 844
1031 as_put_io_context(arq); 845 as_put_io_context(rq);
1032out: 846out:
1033 arq->state = AS_RQ_POSTSCHED; 847 RQ_SET_STATE(rq, AS_RQ_POSTSCHED);
1034} 848}
1035 849
1036/* 850/*
@@ -1041,27 +855,27 @@ out:
1041 */ 855 */
1042static void as_remove_queued_request(request_queue_t *q, struct request *rq) 856static void as_remove_queued_request(request_queue_t *q, struct request *rq)
1043{ 857{
1044 struct as_rq *arq = RQ_DATA(rq); 858 const int data_dir = rq_is_sync(rq);
1045 const int data_dir = arq->is_sync;
1046 struct as_data *ad = q->elevator->elevator_data; 859 struct as_data *ad = q->elevator->elevator_data;
860 struct io_context *ioc;
1047 861
1048 WARN_ON(arq->state != AS_RQ_QUEUED); 862 WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
1049 863
1050 if (arq->io_context && arq->io_context->aic) { 864 ioc = RQ_IOC(rq);
1051 BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); 865 if (ioc && ioc->aic) {
1052 atomic_dec(&arq->io_context->aic->nr_queued); 866 BUG_ON(!atomic_read(&ioc->aic->nr_queued));
867 atomic_dec(&ioc->aic->nr_queued);
1053 } 868 }
1054 869
1055 /* 870 /*
1056 * Update the "next_arq" cache if we are about to remove its 871 * Update the "next_rq" cache if we are about to remove its
1057 * entry 872 * entry
1058 */ 873 */
1059 if (ad->next_arq[data_dir] == arq) 874 if (ad->next_rq[data_dir] == rq)
1060 ad->next_arq[data_dir] = as_find_next_arq(ad, arq); 875 ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
1061 876
1062 list_del_init(&arq->fifo); 877 rq_fifo_clear(rq);
1063 as_del_arq_hash(arq); 878 as_del_rq_rb(ad, rq);
1064 as_del_arq_rb(ad, arq);
1065} 879}
1066 880
1067/* 881/*
@@ -1074,7 +888,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
1074 */ 888 */
1075static int as_fifo_expired(struct as_data *ad, int adir) 889static int as_fifo_expired(struct as_data *ad, int adir)
1076{ 890{
1077 struct as_rq *arq; 891 struct request *rq;
1078 long delta_jif; 892 long delta_jif;
1079 893
1080 delta_jif = jiffies - ad->last_check_fifo[adir]; 894 delta_jif = jiffies - ad->last_check_fifo[adir];
@@ -1088,9 +902,9 @@ static int as_fifo_expired(struct as_data *ad, int adir)
1088 if (list_empty(&ad->fifo_list[adir])) 902 if (list_empty(&ad->fifo_list[adir]))
1089 return 0; 903 return 0;
1090 904
1091 arq = list_entry_fifo(ad->fifo_list[adir].next); 905 rq = rq_entry_fifo(ad->fifo_list[adir].next);
1092 906
1093 return time_after(jiffies, arq->expires); 907 return time_after(jiffies, rq_fifo_time(rq));
1094} 908}
1095 909
1096/* 910/*
@@ -1113,25 +927,25 @@ static inline int as_batch_expired(struct as_data *ad)
1113/* 927/*
1114 * move an entry to dispatch queue 928 * move an entry to dispatch queue
1115 */ 929 */
1116static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) 930static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
1117{ 931{
1118 struct request *rq = arq->request; 932 const int data_dir = rq_is_sync(rq);
1119 const int data_dir = arq->is_sync;
1120 933
1121 BUG_ON(!RB_EMPTY_NODE(&arq->rb_node)); 934 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
1122 935
1123 as_antic_stop(ad); 936 as_antic_stop(ad);
1124 ad->antic_status = ANTIC_OFF; 937 ad->antic_status = ANTIC_OFF;
1125 938
1126 /* 939 /*
1127 * This has to be set in order to be correctly updated by 940 * This has to be set in order to be correctly updated by
1128 * as_find_next_arq 941 * as_find_next_rq
1129 */ 942 */
1130 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; 943 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
1131 944
1132 if (data_dir == REQ_SYNC) { 945 if (data_dir == REQ_SYNC) {
946 struct io_context *ioc = RQ_IOC(rq);
1133 /* In case we have to anticipate after this */ 947 /* In case we have to anticipate after this */
1134 copy_io_context(&ad->io_context, &arq->io_context); 948 copy_io_context(&ad->io_context, &ioc);
1135 } else { 949 } else {
1136 if (ad->io_context) { 950 if (ad->io_context) {
1137 put_io_context(ad->io_context); 951 put_io_context(ad->io_context);
@@ -1143,19 +957,19 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1143 } 957 }
1144 ad->ioc_finished = 0; 958 ad->ioc_finished = 0;
1145 959
1146 ad->next_arq[data_dir] = as_find_next_arq(ad, arq); 960 ad->next_rq[data_dir] = as_find_next_rq(ad, rq);
1147 961
1148 /* 962 /*
1149 * take it off the sort and fifo list, add to dispatch queue 963 * take it off the sort and fifo list, add to dispatch queue
1150 */ 964 */
1151 as_remove_queued_request(ad->q, rq); 965 as_remove_queued_request(ad->q, rq);
1152 WARN_ON(arq->state != AS_RQ_QUEUED); 966 WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED);
1153 967
1154 elv_dispatch_sort(ad->q, rq); 968 elv_dispatch_sort(ad->q, rq);
1155 969
1156 arq->state = AS_RQ_DISPATCHED; 970 RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
1157 if (arq->io_context && arq->io_context->aic) 971 if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
1158 atomic_inc(&arq->io_context->aic->nr_dispatched); 972 atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
1159 ad->nr_dispatched++; 973 ad->nr_dispatched++;
1160} 974}
1161 975
@@ -1167,9 +981,9 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1167static int as_dispatch_request(request_queue_t *q, int force) 981static int as_dispatch_request(request_queue_t *q, int force)
1168{ 982{
1169 struct as_data *ad = q->elevator->elevator_data; 983 struct as_data *ad = q->elevator->elevator_data;
1170 struct as_rq *arq;
1171 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); 984 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
1172 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); 985 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
986 struct request *rq;
1173 987
1174 if (unlikely(force)) { 988 if (unlikely(force)) {
1175 /* 989 /*
@@ -1185,14 +999,14 @@ static int as_dispatch_request(request_queue_t *q, int force)
1185 ad->changed_batch = 0; 999 ad->changed_batch = 0;
1186 ad->new_batch = 0; 1000 ad->new_batch = 0;
1187 1001
1188 while (ad->next_arq[REQ_SYNC]) { 1002 while (ad->next_rq[REQ_SYNC]) {
1189 as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); 1003 as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
1190 dispatched++; 1004 dispatched++;
1191 } 1005 }
1192 ad->last_check_fifo[REQ_SYNC] = jiffies; 1006 ad->last_check_fifo[REQ_SYNC] = jiffies;
1193 1007
1194 while (ad->next_arq[REQ_ASYNC]) { 1008 while (ad->next_rq[REQ_ASYNC]) {
1195 as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); 1009 as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
1196 dispatched++; 1010 dispatched++;
1197 } 1011 }
1198 ad->last_check_fifo[REQ_ASYNC] = jiffies; 1012 ad->last_check_fifo[REQ_ASYNC] = jiffies;
@@ -1216,19 +1030,19 @@ static int as_dispatch_request(request_queue_t *q, int force)
1216 /* 1030 /*
1217 * batch is still running or no reads or no writes 1031 * batch is still running or no reads or no writes
1218 */ 1032 */
1219 arq = ad->next_arq[ad->batch_data_dir]; 1033 rq = ad->next_rq[ad->batch_data_dir];
1220 1034
1221 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { 1035 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
1222 if (as_fifo_expired(ad, REQ_SYNC)) 1036 if (as_fifo_expired(ad, REQ_SYNC))
1223 goto fifo_expired; 1037 goto fifo_expired;
1224 1038
1225 if (as_can_anticipate(ad, arq)) { 1039 if (as_can_anticipate(ad, rq)) {
1226 as_antic_waitreq(ad); 1040 as_antic_waitreq(ad);
1227 return 0; 1041 return 0;
1228 } 1042 }
1229 } 1043 }
1230 1044
1231 if (arq) { 1045 if (rq) {
1232 /* we have a "next request" */ 1046 /* we have a "next request" */
1233 if (reads && !writes) 1047 if (reads && !writes)
1234 ad->current_batch_expires = 1048 ad->current_batch_expires =
@@ -1256,7 +1070,7 @@ static int as_dispatch_request(request_queue_t *q, int force)
1256 ad->changed_batch = 1; 1070 ad->changed_batch = 1;
1257 } 1071 }
1258 ad->batch_data_dir = REQ_SYNC; 1072 ad->batch_data_dir = REQ_SYNC;
1259 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); 1073 rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
1260 ad->last_check_fifo[ad->batch_data_dir] = jiffies; 1074 ad->last_check_fifo[ad->batch_data_dir] = jiffies;
1261 goto dispatch_request; 1075 goto dispatch_request;
1262 } 1076 }
@@ -1282,7 +1096,7 @@ dispatch_writes:
1282 ad->batch_data_dir = REQ_ASYNC; 1096 ad->batch_data_dir = REQ_ASYNC;
1283 ad->current_write_count = ad->write_batch_count; 1097 ad->current_write_count = ad->write_batch_count;
1284 ad->write_batch_idled = 0; 1098 ad->write_batch_idled = 0;
1285 arq = ad->next_arq[ad->batch_data_dir]; 1099 rq = ad->next_rq[ad->batch_data_dir];
1286 goto dispatch_request; 1100 goto dispatch_request;
1287 } 1101 }
1288 1102
@@ -1296,8 +1110,7 @@ dispatch_request:
1296 1110
1297 if (as_fifo_expired(ad, ad->batch_data_dir)) { 1111 if (as_fifo_expired(ad, ad->batch_data_dir)) {
1298fifo_expired: 1112fifo_expired:
1299 arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); 1113 rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next);
1300 BUG_ON(arq == NULL);
1301 } 1114 }
1302 1115
1303 if (ad->changed_batch) { 1116 if (ad->changed_batch) {
@@ -1316,70 +1129,58 @@ fifo_expired:
1316 } 1129 }
1317 1130
1318 /* 1131 /*
1319 * arq is the selected appropriate request. 1132 * rq is the selected appropriate request.
1320 */ 1133 */
1321 as_move_to_dispatch(ad, arq); 1134 as_move_to_dispatch(ad, rq);
1322 1135
1323 return 1; 1136 return 1;
1324} 1137}
1325 1138
1326/* 1139/*
1327 * add arq to rbtree and fifo 1140 * add rq to rbtree and fifo
1328 */ 1141 */
1329static void as_add_request(request_queue_t *q, struct request *rq) 1142static void as_add_request(request_queue_t *q, struct request *rq)
1330{ 1143{
1331 struct as_data *ad = q->elevator->elevator_data; 1144 struct as_data *ad = q->elevator->elevator_data;
1332 struct as_rq *arq = RQ_DATA(rq);
1333 int data_dir; 1145 int data_dir;
1334 1146
1335 arq->state = AS_RQ_NEW; 1147 RQ_SET_STATE(rq, AS_RQ_NEW);
1336 1148
1337 if (rq_data_dir(arq->request) == READ 1149 data_dir = rq_is_sync(rq);
1338 || (arq->request->flags & REQ_RW_SYNC))
1339 arq->is_sync = 1;
1340 else
1341 arq->is_sync = 0;
1342 data_dir = arq->is_sync;
1343 1150
1344 arq->io_context = as_get_io_context(); 1151 rq->elevator_private = as_get_io_context(q->node);
1345 1152
1346 if (arq->io_context) { 1153 if (RQ_IOC(rq)) {
1347 as_update_iohist(ad, arq->io_context->aic, arq->request); 1154 as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
1348 atomic_inc(&arq->io_context->aic->nr_queued); 1155 atomic_inc(&RQ_IOC(rq)->aic->nr_queued);
1349 } 1156 }
1350 1157
1351 as_add_arq_rb(ad, arq); 1158 as_add_rq_rb(ad, rq);
1352 if (rq_mergeable(arq->request))
1353 as_add_arq_hash(ad, arq);
1354 1159
1355 /* 1160 /*
1356 * set expire time (only used for reads) and add to fifo list 1161 * set expire time (only used for reads) and add to fifo list
1357 */ 1162 */
1358 arq->expires = jiffies + ad->fifo_expire[data_dir]; 1163 rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]);
1359 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); 1164 list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]);
1360 1165
1361 as_update_arq(ad, arq); /* keep state machine up to date */ 1166 as_update_rq(ad, rq); /* keep state machine up to date */
1362 arq->state = AS_RQ_QUEUED; 1167 RQ_SET_STATE(rq, AS_RQ_QUEUED);
1363} 1168}
1364 1169
1365static void as_activate_request(request_queue_t *q, struct request *rq) 1170static void as_activate_request(request_queue_t *q, struct request *rq)
1366{ 1171{
1367 struct as_rq *arq = RQ_DATA(rq); 1172 WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED);
1368 1173 RQ_SET_STATE(rq, AS_RQ_REMOVED);
1369 WARN_ON(arq->state != AS_RQ_DISPATCHED); 1174 if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
1370 arq->state = AS_RQ_REMOVED; 1175 atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched);
1371 if (arq->io_context && arq->io_context->aic)
1372 atomic_dec(&arq->io_context->aic->nr_dispatched);
1373} 1176}
1374 1177
1375static void as_deactivate_request(request_queue_t *q, struct request *rq) 1178static void as_deactivate_request(request_queue_t *q, struct request *rq)
1376{ 1179{
1377 struct as_rq *arq = RQ_DATA(rq); 1180 WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED);
1378 1181 RQ_SET_STATE(rq, AS_RQ_DISPATCHED);
1379 WARN_ON(arq->state != AS_RQ_REMOVED); 1182 if (RQ_IOC(rq) && RQ_IOC(rq)->aic)
1380 arq->state = AS_RQ_DISPATCHED; 1183 atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched);
1381 if (arq->io_context && arq->io_context->aic)
1382 atomic_inc(&arq->io_context->aic->nr_dispatched);
1383} 1184}
1384 1185
1385/* 1186/*
@@ -1396,93 +1197,35 @@ static int as_queue_empty(request_queue_t *q)
1396 && list_empty(&ad->fifo_list[REQ_SYNC]); 1197 && list_empty(&ad->fifo_list[REQ_SYNC]);
1397} 1198}
1398 1199
1399static struct request *as_former_request(request_queue_t *q,
1400 struct request *rq)
1401{
1402 struct as_rq *arq = RQ_DATA(rq);
1403 struct rb_node *rbprev = rb_prev(&arq->rb_node);
1404 struct request *ret = NULL;
1405
1406 if (rbprev)
1407 ret = rb_entry_arq(rbprev)->request;
1408
1409 return ret;
1410}
1411
1412static struct request *as_latter_request(request_queue_t *q,
1413 struct request *rq)
1414{
1415 struct as_rq *arq = RQ_DATA(rq);
1416 struct rb_node *rbnext = rb_next(&arq->rb_node);
1417 struct request *ret = NULL;
1418
1419 if (rbnext)
1420 ret = rb_entry_arq(rbnext)->request;
1421
1422 return ret;
1423}
1424
1425static int 1200static int
1426as_merge(request_queue_t *q, struct request **req, struct bio *bio) 1201as_merge(request_queue_t *q, struct request **req, struct bio *bio)
1427{ 1202{
1428 struct as_data *ad = q->elevator->elevator_data; 1203 struct as_data *ad = q->elevator->elevator_data;
1429 sector_t rb_key = bio->bi_sector + bio_sectors(bio); 1204 sector_t rb_key = bio->bi_sector + bio_sectors(bio);
1430 struct request *__rq; 1205 struct request *__rq;
1431 int ret;
1432
1433 /*
1434 * see if the merge hash can satisfy a back merge
1435 */
1436 __rq = as_find_arq_hash(ad, bio->bi_sector);
1437 if (__rq) {
1438 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
1439
1440 if (elv_rq_merge_ok(__rq, bio)) {
1441 ret = ELEVATOR_BACK_MERGE;
1442 goto out;
1443 }
1444 }
1445 1206
1446 /* 1207 /*
1447 * check for front merge 1208 * check for front merge
1448 */ 1209 */
1449 __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); 1210 __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key);
1450 if (__rq) { 1211 if (__rq && elv_rq_merge_ok(__rq, bio)) {
1451 BUG_ON(rb_key != rq_rb_key(__rq)); 1212 *req = __rq;
1452 1213 return ELEVATOR_FRONT_MERGE;
1453 if (elv_rq_merge_ok(__rq, bio)) {
1454 ret = ELEVATOR_FRONT_MERGE;
1455 goto out;
1456 }
1457 } 1214 }
1458 1215
1459 return ELEVATOR_NO_MERGE; 1216 return ELEVATOR_NO_MERGE;
1460out:
1461 if (ret) {
1462 if (rq_mergeable(__rq))
1463 as_hot_arq_hash(ad, RQ_DATA(__rq));
1464 }
1465 *req = __rq;
1466 return ret;
1467} 1217}
1468 1218
1469static void as_merged_request(request_queue_t *q, struct request *req) 1219static void as_merged_request(request_queue_t *q, struct request *req, int type)
1470{ 1220{
1471 struct as_data *ad = q->elevator->elevator_data; 1221 struct as_data *ad = q->elevator->elevator_data;
1472 struct as_rq *arq = RQ_DATA(req);
1473
1474 /*
1475 * hash always needs to be repositioned, key is end sector
1476 */
1477 as_del_arq_hash(arq);
1478 as_add_arq_hash(ad, arq);
1479 1222
1480 /* 1223 /*
1481 * if the merge was a front merge, we need to reposition request 1224 * if the merge was a front merge, we need to reposition request
1482 */ 1225 */
1483 if (rq_rb_key(req) != arq->rb_key) { 1226 if (type == ELEVATOR_FRONT_MERGE) {
1484 as_del_arq_rb(ad, arq); 1227 as_del_rq_rb(ad, req);
1485 as_add_arq_rb(ad, arq); 1228 as_add_rq_rb(ad, req);
1486 /* 1229 /*
1487 * Note! At this stage of this and the next function, our next 1230 * Note! At this stage of this and the next function, our next
1488 * request may not be optimal - eg the request may have "grown" 1231 * request may not be optimal - eg the request may have "grown"
@@ -1494,38 +1237,22 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1494static void as_merged_requests(request_queue_t *q, struct request *req, 1237static void as_merged_requests(request_queue_t *q, struct request *req,
1495 struct request *next) 1238 struct request *next)
1496{ 1239{
1497 struct as_data *ad = q->elevator->elevator_data;
1498 struct as_rq *arq = RQ_DATA(req);
1499 struct as_rq *anext = RQ_DATA(next);
1500
1501 BUG_ON(!arq);
1502 BUG_ON(!anext);
1503
1504 /* 1240 /*
1505 * reposition arq (this is the merged request) in hash, and in rbtree 1241 * if next expires before rq, assign its expire time to arq
1506 * in case of a front merge 1242 * and move into next position (next will be deleted) in fifo
1507 */ 1243 */
1508 as_del_arq_hash(arq); 1244 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
1509 as_add_arq_hash(ad, arq); 1245 if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
1510 1246 struct io_context *rioc = RQ_IOC(req);
1511 if (rq_rb_key(req) != arq->rb_key) { 1247 struct io_context *nioc = RQ_IOC(next);
1512 as_del_arq_rb(ad, arq);
1513 as_add_arq_rb(ad, arq);
1514 }
1515 1248
1516 /* 1249 list_move(&req->queuelist, &next->queuelist);
1517 * if anext expires before arq, assign its expire time to arq 1250 rq_set_fifo_time(req, rq_fifo_time(next));
1518 * and move into anext position (anext will be deleted) in fifo
1519 */
1520 if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) {
1521 if (time_before(anext->expires, arq->expires)) {
1522 list_move(&arq->fifo, &anext->fifo);
1523 arq->expires = anext->expires;
1524 /* 1251 /*
1525 * Don't copy here but swap, because when anext is 1252 * Don't copy here but swap, because when anext is
1526 * removed below, it must contain the unused context 1253 * removed below, it must contain the unused context
1527 */ 1254 */
1528 swap_io_context(&arq->io_context, &anext->io_context); 1255 swap_io_context(&rioc, &nioc);
1529 } 1256 }
1530 } 1257 }
1531 1258
@@ -1533,9 +1260,9 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1533 * kill knowledge of next, this one is a goner 1260 * kill knowledge of next, this one is a goner
1534 */ 1261 */
1535 as_remove_queued_request(q, next); 1262 as_remove_queued_request(q, next);
1536 as_put_io_context(anext); 1263 as_put_io_context(next);
1537 1264
1538 anext->state = AS_RQ_MERGED; 1265 RQ_SET_STATE(next, AS_RQ_MERGED);
1539} 1266}
1540 1267
1541/* 1268/*
@@ -1553,61 +1280,18 @@ static void as_work_handler(void *data)
1553 unsigned long flags; 1280 unsigned long flags;
1554 1281
1555 spin_lock_irqsave(q->queue_lock, flags); 1282 spin_lock_irqsave(q->queue_lock, flags);
1556 if (!as_queue_empty(q)) 1283 blk_start_queueing(q);
1557 q->request_fn(q);
1558 spin_unlock_irqrestore(q->queue_lock, flags); 1284 spin_unlock_irqrestore(q->queue_lock, flags);
1559} 1285}
1560 1286
1561static void as_put_request(request_queue_t *q, struct request *rq) 1287static int as_may_queue(request_queue_t *q, int rw)
1562{
1563 struct as_data *ad = q->elevator->elevator_data;
1564 struct as_rq *arq = RQ_DATA(rq);
1565
1566 if (!arq) {
1567 WARN_ON(1);
1568 return;
1569 }
1570
1571 if (unlikely(arq->state != AS_RQ_POSTSCHED &&
1572 arq->state != AS_RQ_PRESCHED &&
1573 arq->state != AS_RQ_MERGED)) {
1574 printk("arq->state %d\n", arq->state);
1575 WARN_ON(1);
1576 }
1577
1578 mempool_free(arq, ad->arq_pool);
1579 rq->elevator_private = NULL;
1580}
1581
1582static int as_set_request(request_queue_t *q, struct request *rq,
1583 struct bio *bio, gfp_t gfp_mask)
1584{
1585 struct as_data *ad = q->elevator->elevator_data;
1586 struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
1587
1588 if (arq) {
1589 memset(arq, 0, sizeof(*arq));
1590 RB_CLEAR_NODE(&arq->rb_node);
1591 arq->request = rq;
1592 arq->state = AS_RQ_PRESCHED;
1593 arq->io_context = NULL;
1594 INIT_HLIST_NODE(&arq->hash);
1595 INIT_LIST_HEAD(&arq->fifo);
1596 rq->elevator_private = arq;
1597 return 0;
1598 }
1599
1600 return 1;
1601}
1602
1603static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
1604{ 1288{
1605 int ret = ELV_MQUEUE_MAY; 1289 int ret = ELV_MQUEUE_MAY;
1606 struct as_data *ad = q->elevator->elevator_data; 1290 struct as_data *ad = q->elevator->elevator_data;
1607 struct io_context *ioc; 1291 struct io_context *ioc;
1608 if (ad->antic_status == ANTIC_WAIT_REQ || 1292 if (ad->antic_status == ANTIC_WAIT_REQ ||
1609 ad->antic_status == ANTIC_WAIT_NEXT) { 1293 ad->antic_status == ANTIC_WAIT_NEXT) {
1610 ioc = as_get_io_context(); 1294 ioc = as_get_io_context(q->node);
1611 if (ad->io_context == ioc) 1295 if (ad->io_context == ioc)
1612 ret = ELV_MQUEUE_MUST; 1296 ret = ELV_MQUEUE_MUST;
1613 put_io_context(ioc); 1297 put_io_context(ioc);
@@ -1626,23 +1310,16 @@ static void as_exit_queue(elevator_t *e)
1626 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); 1310 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
1627 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); 1311 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
1628 1312
1629 mempool_destroy(ad->arq_pool);
1630 put_io_context(ad->io_context); 1313 put_io_context(ad->io_context);
1631 kfree(ad->hash);
1632 kfree(ad); 1314 kfree(ad);
1633} 1315}
1634 1316
1635/* 1317/*
1636 * initialize elevator private data (as_data), and alloc a arq for 1318 * initialize elevator private data (as_data).
1637 * each request on the free lists
1638 */ 1319 */
1639static void *as_init_queue(request_queue_t *q, elevator_t *e) 1320static void *as_init_queue(request_queue_t *q, elevator_t *e)
1640{ 1321{
1641 struct as_data *ad; 1322 struct as_data *ad;
1642 int i;
1643
1644 if (!arq_pool)
1645 return NULL;
1646 1323
1647 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); 1324 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
1648 if (!ad) 1325 if (!ad)
@@ -1651,30 +1328,12 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
1651 1328
1652 ad->q = q; /* Identify what queue the data belongs to */ 1329 ad->q = q; /* Identify what queue the data belongs to */
1653 1330
1654 ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
1655 GFP_KERNEL, q->node);
1656 if (!ad->hash) {
1657 kfree(ad);
1658 return NULL;
1659 }
1660
1661 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1662 mempool_free_slab, arq_pool, q->node);
1663 if (!ad->arq_pool) {
1664 kfree(ad->hash);
1665 kfree(ad);
1666 return NULL;
1667 }
1668
1669 /* anticipatory scheduling helpers */ 1331 /* anticipatory scheduling helpers */
1670 ad->antic_timer.function = as_antic_timeout; 1332 ad->antic_timer.function = as_antic_timeout;
1671 ad->antic_timer.data = (unsigned long)q; 1333 ad->antic_timer.data = (unsigned long)q;
1672 init_timer(&ad->antic_timer); 1334 init_timer(&ad->antic_timer);
1673 INIT_WORK(&ad->antic_work, as_work_handler, q); 1335 INIT_WORK(&ad->antic_work, as_work_handler, q);
1674 1336
1675 for (i = 0; i < AS_HASH_ENTRIES; i++)
1676 INIT_HLIST_HEAD(&ad->hash[i]);
1677
1678 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); 1337 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
1679 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); 1338 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
1680 ad->sort_list[REQ_SYNC] = RB_ROOT; 1339 ad->sort_list[REQ_SYNC] = RB_ROOT;
@@ -1787,10 +1446,8 @@ static struct elevator_type iosched_as = {
1787 .elevator_deactivate_req_fn = as_deactivate_request, 1446 .elevator_deactivate_req_fn = as_deactivate_request,
1788 .elevator_queue_empty_fn = as_queue_empty, 1447 .elevator_queue_empty_fn = as_queue_empty,
1789 .elevator_completed_req_fn = as_completed_request, 1448 .elevator_completed_req_fn = as_completed_request,
1790 .elevator_former_req_fn = as_former_request, 1449 .elevator_former_req_fn = elv_rb_former_request,
1791 .elevator_latter_req_fn = as_latter_request, 1450 .elevator_latter_req_fn = elv_rb_latter_request,
1792 .elevator_set_req_fn = as_set_request,
1793 .elevator_put_req_fn = as_put_request,
1794 .elevator_may_queue_fn = as_may_queue, 1451 .elevator_may_queue_fn = as_may_queue,
1795 .elevator_init_fn = as_init_queue, 1452 .elevator_init_fn = as_init_queue,
1796 .elevator_exit_fn = as_exit_queue, 1453 .elevator_exit_fn = as_exit_queue,
@@ -1806,11 +1463,6 @@ static int __init as_init(void)
1806{ 1463{
1807 int ret; 1464 int ret;
1808 1465
1809 arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
1810 0, 0, NULL, NULL);
1811 if (!arq_pool)
1812 return -ENOMEM;
1813
1814 ret = elv_register(&iosched_as); 1466 ret = elv_register(&iosched_as);
1815 if (!ret) { 1467 if (!ret) {
1816 /* 1468 /*
@@ -1822,7 +1474,6 @@ static int __init as_init(void)
1822 return 0; 1474 return 0;
1823 } 1475 }
1824 1476
1825 kmem_cache_destroy(arq_pool);
1826 return ret; 1477 return ret;
1827} 1478}
1828 1479
@@ -1833,10 +1484,9 @@ static void __exit as_exit(void)
1833 ioc_gone = &all_gone; 1484 ioc_gone = &all_gone;
1834 /* ioc_gone's update must be visible before reading ioc_count */ 1485 /* ioc_gone's update must be visible before reading ioc_count */
1835 smp_wmb(); 1486 smp_wmb();
1836 if (atomic_read(&ioc_count)) 1487 if (elv_ioc_count_read(ioc_count))
1837 wait_for_completion(ioc_gone); 1488 wait_for_completion(ioc_gone);
1838 synchronize_rcu(); 1489 synchronize_rcu();
1839 kmem_cache_destroy(arq_pool);
1840} 1490}
1841 1491
1842module_init(as_init); 1492module_init(as_init);
diff --git a/block/blktrace.c b/block/blktrace.c
index 8ff33441d8a2..135593c8e45b 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006 Jens Axboe <axboe@suse.de> 2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
69/* 69/*
70 * Bio action bits of interest 70 * Bio action bits of interest
71 */ 71 */
72static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) }; 72static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
73 73
74/* 74/*
75 * More could be added as needed, taking care to increment the decrementer 75 * More could be added as needed, taking care to increment the decrementer
@@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC
81 (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) 81 (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
82#define trace_ahead_bit(rw) \ 82#define trace_ahead_bit(rw) \
83 (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) 83 (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
84#define trace_meta_bit(rw) \
85 (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
84 86
85/* 87/*
86 * The worker for the various blk_add_trace*() types. Fills out a 88 * The worker for the various blk_add_trace*() types. Fills out a
@@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
103 what |= bio_act[trace_barrier_bit(rw)]; 105 what |= bio_act[trace_barrier_bit(rw)];
104 what |= bio_act[trace_sync_bit(rw)]; 106 what |= bio_act[trace_sync_bit(rw)];
105 what |= bio_act[trace_ahead_bit(rw)]; 107 what |= bio_act[trace_ahead_bit(rw)];
108 what |= bio_act[trace_meta_bit(rw)];
106 109
107 pid = tsk->pid; 110 pid = tsk->pid;
108 if (unlikely(act_log_check(bt, what, sector, pid))) 111 if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -473,6 +476,9 @@ static void blk_check_time(unsigned long long *t)
473 *t -= (a + b) / 2; 476 *t -= (a + b) / 2;
474} 477}
475 478
479/*
480 * calibrate our inter-CPU timings
481 */
476static void blk_trace_check_cpu_time(void *data) 482static void blk_trace_check_cpu_time(void *data)
477{ 483{
478 unsigned long long *t; 484 unsigned long long *t;
@@ -490,20 +496,6 @@ static void blk_trace_check_cpu_time(void *data)
490 put_cpu(); 496 put_cpu();
491} 497}
492 498
493/*
494 * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
495 * timings
496 */
497static void blk_trace_calibrate_offsets(void)
498{
499 unsigned long flags;
500
501 smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
502 local_irq_save(flags);
503 blk_trace_check_cpu_time(NULL);
504 local_irq_restore(flags);
505}
506
507static void blk_trace_set_ht_offsets(void) 499static void blk_trace_set_ht_offsets(void)
508{ 500{
509#if defined(CONFIG_SCHED_SMT) 501#if defined(CONFIG_SCHED_SMT)
@@ -532,7 +524,7 @@ static void blk_trace_set_ht_offsets(void)
532static __init int blk_trace_init(void) 524static __init int blk_trace_init(void)
533{ 525{
534 mutex_init(&blk_tree_mutex); 526 mutex_init(&blk_tree_mutex);
535 blk_trace_calibrate_offsets(); 527 on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1);
536 blk_trace_set_ht_offsets(); 528 blk_trace_set_ht_offsets();
537 529
538 return 0; 530 return 0;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3a3aee08ec5f..99116e2a310a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4,7 +4,7 @@
4 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 * 6 *
7 * Copyright (C) 2003 Jens Axboe <axboe@suse.de> 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
@@ -17,7 +17,6 @@
17 * tunables 17 * tunables
18 */ 18 */
19static const int cfq_quantum = 4; /* max queue in one round of service */ 19static const int cfq_quantum = 4; /* max queue in one round of service */
20static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
21static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 20static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
22static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 21static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
23static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ 22static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
@@ -32,8 +31,6 @@ static int cfq_slice_idle = HZ / 125;
32 31
33#define CFQ_KEY_ASYNC (0) 32#define CFQ_KEY_ASYNC (0)
34 33
35static DEFINE_SPINLOCK(cfq_exit_lock);
36
37/* 34/*
38 * for the hash of cfqq inside the cfqd 35 * for the hash of cfqq inside the cfqd
39 */ 36 */
@@ -41,37 +38,19 @@ static DEFINE_SPINLOCK(cfq_exit_lock);
41#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) 38#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
42#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) 39#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
43 40
44/*
45 * for the hash of crq inside the cfqq
46 */
47#define CFQ_MHASH_SHIFT 6
48#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
49#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
50#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
51#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
52#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
53
54#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) 41#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
55#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
56 42
57#define RQ_DATA(rq) (rq)->elevator_private 43#define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private)
44#define RQ_CFQQ(rq) ((rq)->elevator_private2)
58 45
59/*
60 * rb-tree defines
61 */
62#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
63#define rq_rb_key(rq) (rq)->sector
64
65static kmem_cache_t *crq_pool;
66static kmem_cache_t *cfq_pool; 46static kmem_cache_t *cfq_pool;
67static kmem_cache_t *cfq_ioc_pool; 47static kmem_cache_t *cfq_ioc_pool;
68 48
69static atomic_t ioc_count = ATOMIC_INIT(0); 49static DEFINE_PER_CPU(unsigned long, ioc_count);
70static struct completion *ioc_gone; 50static struct completion *ioc_gone;
71 51
72#define CFQ_PRIO_LISTS IOPRIO_BE_NR 52#define CFQ_PRIO_LISTS IOPRIO_BE_NR
73#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 53#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
74#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
75#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 54#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
76 55
77#define ASYNC (0) 56#define ASYNC (0)
@@ -103,29 +82,14 @@ struct cfq_data {
103 unsigned int busy_queues; 82 unsigned int busy_queues;
104 83
105 /* 84 /*
106 * non-ordered list of empty cfqq's
107 */
108 struct list_head empty_list;
109
110 /*
111 * cfqq lookup hash 85 * cfqq lookup hash
112 */ 86 */
113 struct hlist_head *cfq_hash; 87 struct hlist_head *cfq_hash;
114 88
115 /*
116 * global crq hash for all queues
117 */
118 struct hlist_head *crq_hash;
119
120 mempool_t *crq_pool;
121
122 int rq_in_driver; 89 int rq_in_driver;
123 int hw_tag; 90 int hw_tag;
124 91
125 /* 92 /*
126 * schedule slice state info
127 */
128 /*
129 * idle window management 93 * idle window management
130 */ 94 */
131 struct timer_list idle_slice_timer; 95 struct timer_list idle_slice_timer;
@@ -141,13 +105,10 @@ struct cfq_data {
141 sector_t last_sector; 105 sector_t last_sector;
142 unsigned long last_end_request; 106 unsigned long last_end_request;
143 107
144 unsigned int rq_starved;
145
146 /* 108 /*
147 * tunables, see top of file 109 * tunables, see top of file
148 */ 110 */
149 unsigned int cfq_quantum; 111 unsigned int cfq_quantum;
150 unsigned int cfq_queued;
151 unsigned int cfq_fifo_expire[2]; 112 unsigned int cfq_fifo_expire[2];
152 unsigned int cfq_back_penalty; 113 unsigned int cfq_back_penalty;
153 unsigned int cfq_back_max; 114 unsigned int cfq_back_max;
@@ -170,23 +131,24 @@ struct cfq_queue {
170 struct hlist_node cfq_hash; 131 struct hlist_node cfq_hash;
171 /* hash key */ 132 /* hash key */
172 unsigned int key; 133 unsigned int key;
173 /* on either rr or empty list of cfqd */ 134 /* member of the rr/busy/cur/idle cfqd list */
174 struct list_head cfq_list; 135 struct list_head cfq_list;
175 /* sorted list of pending requests */ 136 /* sorted list of pending requests */
176 struct rb_root sort_list; 137 struct rb_root sort_list;
177 /* if fifo isn't expired, next request to serve */ 138 /* if fifo isn't expired, next request to serve */
178 struct cfq_rq *next_crq; 139 struct request *next_rq;
179 /* requests queued in sort_list */ 140 /* requests queued in sort_list */
180 int queued[2]; 141 int queued[2];
181 /* currently allocated requests */ 142 /* currently allocated requests */
182 int allocated[2]; 143 int allocated[2];
144 /* pending metadata requests */
145 int meta_pending;
183 /* fifo list of requests in sort_list */ 146 /* fifo list of requests in sort_list */
184 struct list_head fifo; 147 struct list_head fifo;
185 148
186 unsigned long slice_start; 149 unsigned long slice_start;
187 unsigned long slice_end; 150 unsigned long slice_end;
188 unsigned long slice_left; 151 unsigned long slice_left;
189 unsigned long service_last;
190 152
191 /* number of requests that are on the dispatch list */ 153 /* number of requests that are on the dispatch list */
192 int on_dispatch[2]; 154 int on_dispatch[2];
@@ -199,18 +161,6 @@ struct cfq_queue {
199 unsigned int flags; 161 unsigned int flags;
200}; 162};
201 163
202struct cfq_rq {
203 struct rb_node rb_node;
204 sector_t rb_key;
205 struct request *request;
206 struct hlist_node hash;
207
208 struct cfq_queue *cfq_queue;
209 struct cfq_io_context *io_context;
210
211 unsigned int crq_flags;
212};
213
214enum cfqq_state_flags { 164enum cfqq_state_flags {
215 CFQ_CFQQ_FLAG_on_rr = 0, 165 CFQ_CFQQ_FLAG_on_rr = 0,
216 CFQ_CFQQ_FLAG_wait_request, 166 CFQ_CFQQ_FLAG_wait_request,
@@ -220,6 +170,7 @@ enum cfqq_state_flags {
220 CFQ_CFQQ_FLAG_fifo_expire, 170 CFQ_CFQQ_FLAG_fifo_expire,
221 CFQ_CFQQ_FLAG_idle_window, 171 CFQ_CFQQ_FLAG_idle_window,
222 CFQ_CFQQ_FLAG_prio_changed, 172 CFQ_CFQQ_FLAG_prio_changed,
173 CFQ_CFQQ_FLAG_queue_new,
223}; 174};
224 175
225#define CFQ_CFQQ_FNS(name) \ 176#define CFQ_CFQQ_FNS(name) \
@@ -244,70 +195,14 @@ CFQ_CFQQ_FNS(must_dispatch);
244CFQ_CFQQ_FNS(fifo_expire); 195CFQ_CFQQ_FNS(fifo_expire);
245CFQ_CFQQ_FNS(idle_window); 196CFQ_CFQQ_FNS(idle_window);
246CFQ_CFQQ_FNS(prio_changed); 197CFQ_CFQQ_FNS(prio_changed);
198CFQ_CFQQ_FNS(queue_new);
247#undef CFQ_CFQQ_FNS 199#undef CFQ_CFQQ_FNS
248 200
249enum cfq_rq_state_flags {
250 CFQ_CRQ_FLAG_is_sync = 0,
251};
252
253#define CFQ_CRQ_FNS(name) \
254static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \
255{ \
256 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \
257} \
258static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \
259{ \
260 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \
261} \
262static inline int cfq_crq_##name(const struct cfq_rq *crq) \
263{ \
264 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
265}
266
267CFQ_CRQ_FNS(is_sync);
268#undef CFQ_CRQ_FNS
269
270static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 201static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
271static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); 202static void cfq_dispatch_insert(request_queue_t *, struct request *);
272static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 203static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
273 204
274/* 205/*
275 * lots of deadline iosched dupes, can be abstracted later...
276 */
277static inline void cfq_del_crq_hash(struct cfq_rq *crq)
278{
279 hlist_del_init(&crq->hash);
280}
281
282static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
283{
284 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
285
286 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
287}
288
289static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
290{
291 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
292 struct hlist_node *entry, *next;
293
294 hlist_for_each_safe(entry, next, hash_list) {
295 struct cfq_rq *crq = list_entry_hash(entry);
296 struct request *__rq = crq->request;
297
298 if (!rq_mergeable(__rq)) {
299 cfq_del_crq_hash(crq);
300 continue;
301 }
302
303 if (rq_hash_key(__rq) == offset)
304 return __rq;
305 }
306
307 return NULL;
308}
309
310/*
311 * scheduler run of queue, if there are requests pending and no one in the 206 * scheduler run of queue, if there are requests pending and no one in the
312 * driver that will restart queueing 207 * driver that will restart queueing
313 */ 208 */
@@ -333,12 +228,12 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
333} 228}
334 229
335/* 230/*
336 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 231 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
337 * We choose the request that is closest to the head right now. Distance 232 * We choose the request that is closest to the head right now. Distance
338 * behind the head is penalized and only allowed to a certain extent. 233 * behind the head is penalized and only allowed to a certain extent.
339 */ 234 */
340static struct cfq_rq * 235static struct request *
341cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) 236cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
342{ 237{
343 sector_t last, s1, s2, d1 = 0, d2 = 0; 238 sector_t last, s1, s2, d1 = 0, d2 = 0;
344 unsigned long back_max; 239 unsigned long back_max;
@@ -346,18 +241,22 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
346#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 241#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
347 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 242 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
348 243
349 if (crq1 == NULL || crq1 == crq2) 244 if (rq1 == NULL || rq1 == rq2)
350 return crq2; 245 return rq2;
351 if (crq2 == NULL) 246 if (rq2 == NULL)
352 return crq1; 247 return rq1;
353 248
354 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) 249 if (rq_is_sync(rq1) && !rq_is_sync(rq2))
355 return crq1; 250 return rq1;
356 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) 251 else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
357 return crq2; 252 return rq2;
253 if (rq_is_meta(rq1) && !rq_is_meta(rq2))
254 return rq1;
255 else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
256 return rq2;
358 257
359 s1 = crq1->request->sector; 258 s1 = rq1->sector;
360 s2 = crq2->request->sector; 259 s2 = rq2->sector;
361 260
362 last = cfqd->last_sector; 261 last = cfqd->last_sector;
363 262
@@ -392,23 +291,23 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
392 * check two variables for all permutations: --> faster! 291 * check two variables for all permutations: --> faster!
393 */ 292 */
394 switch (wrap) { 293 switch (wrap) {
395 case 0: /* common case for CFQ: crq1 and crq2 not wrapped */ 294 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
396 if (d1 < d2) 295 if (d1 < d2)
397 return crq1; 296 return rq1;
398 else if (d2 < d1) 297 else if (d2 < d1)
399 return crq2; 298 return rq2;
400 else { 299 else {
401 if (s1 >= s2) 300 if (s1 >= s2)
402 return crq1; 301 return rq1;
403 else 302 else
404 return crq2; 303 return rq2;
405 } 304 }
406 305
407 case CFQ_RQ2_WRAP: 306 case CFQ_RQ2_WRAP:
408 return crq1; 307 return rq1;
409 case CFQ_RQ1_WRAP: 308 case CFQ_RQ1_WRAP:
410 return crq2; 309 return rq2;
411 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */ 310 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
412 default: 311 default:
413 /* 312 /*
414 * Since both rqs are wrapped, 313 * Since both rqs are wrapped,
@@ -417,50 +316,43 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
417 * since back seek takes more time than forward. 316 * since back seek takes more time than forward.
418 */ 317 */
419 if (s1 <= s2) 318 if (s1 <= s2)
420 return crq1; 319 return rq1;
421 else 320 else
422 return crq2; 321 return rq2;
423 } 322 }
424} 323}
425 324
426/* 325/*
427 * would be nice to take fifo expire time into account as well 326 * would be nice to take fifo expire time into account as well
428 */ 327 */
429static struct cfq_rq * 328static struct request *
430cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 329cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
431 struct cfq_rq *last) 330 struct request *last)
432{ 331{
433 struct cfq_rq *crq_next = NULL, *crq_prev = NULL; 332 struct rb_node *rbnext = rb_next(&last->rb_node);
434 struct rb_node *rbnext, *rbprev; 333 struct rb_node *rbprev = rb_prev(&last->rb_node);
435 334 struct request *next = NULL, *prev = NULL;
436 if (!(rbnext = rb_next(&last->rb_node))) {
437 rbnext = rb_first(&cfqq->sort_list);
438 if (rbnext == &last->rb_node)
439 rbnext = NULL;
440 }
441 335
442 rbprev = rb_prev(&last->rb_node); 336 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
443 337
444 if (rbprev) 338 if (rbprev)
445 crq_prev = rb_entry_crq(rbprev); 339 prev = rb_entry_rq(rbprev);
446 if (rbnext)
447 crq_next = rb_entry_crq(rbnext);
448
449 return cfq_choose_req(cfqd, crq_next, crq_prev);
450}
451 340
452static void cfq_update_next_crq(struct cfq_rq *crq) 341 if (rbnext)
453{ 342 next = rb_entry_rq(rbnext);
454 struct cfq_queue *cfqq = crq->cfq_queue; 343 else {
344 rbnext = rb_first(&cfqq->sort_list);
345 if (rbnext && rbnext != &last->rb_node)
346 next = rb_entry_rq(rbnext);
347 }
455 348
456 if (cfqq->next_crq == crq) 349 return cfq_choose_req(cfqd, next, prev);
457 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
458} 350}
459 351
460static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) 352static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
461{ 353{
462 struct cfq_data *cfqd = cfqq->cfqd; 354 struct cfq_data *cfqd = cfqq->cfqd;
463 struct list_head *list, *entry; 355 struct list_head *list;
464 356
465 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 357 BUG_ON(!cfq_cfqq_on_rr(cfqq));
466 358
@@ -485,31 +377,26 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
485 } 377 }
486 378
487 /* 379 /*
488 * if queue was preempted, just add to front to be fair. busy_rr 380 * If this queue was preempted or is new (never been serviced), let
489 * isn't sorted, but insert at the back for fairness. 381 * it be added first for fairness but beind other new queues.
382 * Otherwise, just add to the back of the list.
490 */ 383 */
491 if (preempted || list == &cfqd->busy_rr) { 384 if (preempted || cfq_cfqq_queue_new(cfqq)) {
492 if (preempted) 385 struct list_head *n = list;
493 list = list->prev; 386 struct cfq_queue *__cfqq;
494 387
495 list_add_tail(&cfqq->cfq_list, list); 388 while (n->next != list) {
496 return; 389 __cfqq = list_entry_cfqq(n->next);
497 } 390 if (!cfq_cfqq_queue_new(__cfqq))
391 break;
498 392
499 /* 393 n = n->next;
500 * sort by when queue was last serviced 394 }
501 */
502 entry = list;
503 while ((entry = entry->prev) != list) {
504 struct cfq_queue *__cfqq = list_entry_cfqq(entry);
505 395
506 if (!__cfqq->service_last) 396 list = n;
507 break;
508 if (time_before(__cfqq->service_last, cfqq->service_last))
509 break;
510 } 397 }
511 398
512 list_add(&cfqq->cfq_list, entry); 399 list_add_tail(&cfqq->cfq_list, list);
513} 400}
514 401
515/* 402/*
@@ -531,7 +418,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
531{ 418{
532 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 419 BUG_ON(!cfq_cfqq_on_rr(cfqq));
533 cfq_clear_cfqq_on_rr(cfqq); 420 cfq_clear_cfqq_on_rr(cfqq);
534 list_move(&cfqq->cfq_list, &cfqd->empty_list); 421 list_del_init(&cfqq->cfq_list);
535 422
536 BUG_ON(!cfqd->busy_queues); 423 BUG_ON(!cfqd->busy_queues);
537 cfqd->busy_queues--; 424 cfqd->busy_queues--;
@@ -540,81 +427,43 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
540/* 427/*
541 * rb tree support functions 428 * rb tree support functions
542 */ 429 */
543static inline void cfq_del_crq_rb(struct cfq_rq *crq) 430static inline void cfq_del_rq_rb(struct request *rq)
544{ 431{
545 struct cfq_queue *cfqq = crq->cfq_queue; 432 struct cfq_queue *cfqq = RQ_CFQQ(rq);
546 struct cfq_data *cfqd = cfqq->cfqd; 433 struct cfq_data *cfqd = cfqq->cfqd;
547 const int sync = cfq_crq_is_sync(crq); 434 const int sync = rq_is_sync(rq);
548 435
549 BUG_ON(!cfqq->queued[sync]); 436 BUG_ON(!cfqq->queued[sync]);
550 cfqq->queued[sync]--; 437 cfqq->queued[sync]--;
551 438
552 cfq_update_next_crq(crq); 439 elv_rb_del(&cfqq->sort_list, rq);
553
554 rb_erase(&crq->rb_node, &cfqq->sort_list);
555 440
556 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 441 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
557 cfq_del_cfqq_rr(cfqd, cfqq); 442 cfq_del_cfqq_rr(cfqd, cfqq);
558} 443}
559 444
560static struct cfq_rq * 445static void cfq_add_rq_rb(struct request *rq)
561__cfq_add_crq_rb(struct cfq_rq *crq)
562{ 446{
563 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; 447 struct cfq_queue *cfqq = RQ_CFQQ(rq);
564 struct rb_node *parent = NULL;
565 struct cfq_rq *__crq;
566
567 while (*p) {
568 parent = *p;
569 __crq = rb_entry_crq(parent);
570
571 if (crq->rb_key < __crq->rb_key)
572 p = &(*p)->rb_left;
573 else if (crq->rb_key > __crq->rb_key)
574 p = &(*p)->rb_right;
575 else
576 return __crq;
577 }
578
579 rb_link_node(&crq->rb_node, parent, p);
580 return NULL;
581}
582
583static void cfq_add_crq_rb(struct cfq_rq *crq)
584{
585 struct cfq_queue *cfqq = crq->cfq_queue;
586 struct cfq_data *cfqd = cfqq->cfqd; 448 struct cfq_data *cfqd = cfqq->cfqd;
587 struct request *rq = crq->request; 449 struct request *__alias;
588 struct cfq_rq *__alias;
589 450
590 crq->rb_key = rq_rb_key(rq); 451 cfqq->queued[rq_is_sync(rq)]++;
591 cfqq->queued[cfq_crq_is_sync(crq)]++;
592 452
593 /* 453 /*
594 * looks a little odd, but the first insert might return an alias. 454 * looks a little odd, but the first insert might return an alias.
595 * if that happens, put the alias on the dispatch list 455 * if that happens, put the alias on the dispatch list
596 */ 456 */
597 while ((__alias = __cfq_add_crq_rb(crq)) != NULL) 457 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
598 cfq_dispatch_insert(cfqd->queue, __alias); 458 cfq_dispatch_insert(cfqd->queue, __alias);
599
600 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
601
602 if (!cfq_cfqq_on_rr(cfqq))
603 cfq_add_cfqq_rr(cfqd, cfqq);
604
605 /*
606 * check if this request is a better next-serve candidate
607 */
608 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
609} 459}
610 460
611static inline void 461static inline void
612cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) 462cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
613{ 463{
614 rb_erase(&crq->rb_node, &cfqq->sort_list); 464 elv_rb_del(&cfqq->sort_list, rq);
615 cfqq->queued[cfq_crq_is_sync(crq)]--; 465 cfqq->queued[rq_is_sync(rq)]--;
616 466 cfq_add_rq_rb(rq);
617 cfq_add_crq_rb(crq);
618} 467}
619 468
620static struct request * 469static struct request *
@@ -623,27 +472,14 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
623 struct task_struct *tsk = current; 472 struct task_struct *tsk = current;
624 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); 473 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
625 struct cfq_queue *cfqq; 474 struct cfq_queue *cfqq;
626 struct rb_node *n;
627 sector_t sector;
628 475
629 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); 476 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
630 if (!cfqq) 477 if (cfqq) {
631 goto out; 478 sector_t sector = bio->bi_sector + bio_sectors(bio);
632
633 sector = bio->bi_sector + bio_sectors(bio);
634 n = cfqq->sort_list.rb_node;
635 while (n) {
636 struct cfq_rq *crq = rb_entry_crq(n);
637 479
638 if (sector < crq->rb_key) 480 return elv_rb_find(&cfqq->sort_list, sector);
639 n = n->rb_left;
640 else if (sector > crq->rb_key)
641 n = n->rb_right;
642 else
643 return crq->request;
644 } 481 }
645 482
646out:
647 return NULL; 483 return NULL;
648} 484}
649 485
@@ -673,11 +509,18 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
673 509
674static void cfq_remove_request(struct request *rq) 510static void cfq_remove_request(struct request *rq)
675{ 511{
676 struct cfq_rq *crq = RQ_DATA(rq); 512 struct cfq_queue *cfqq = RQ_CFQQ(rq);
513
514 if (cfqq->next_rq == rq)
515 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
677 516
678 list_del_init(&rq->queuelist); 517 list_del_init(&rq->queuelist);
679 cfq_del_crq_rb(crq); 518 cfq_del_rq_rb(rq);
680 cfq_del_crq_hash(crq); 519
520 if (rq_is_meta(rq)) {
521 WARN_ON(!cfqq->meta_pending);
522 cfqq->meta_pending--;
523 }
681} 524}
682 525
683static int 526static int
@@ -685,39 +528,23 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
685{ 528{
686 struct cfq_data *cfqd = q->elevator->elevator_data; 529 struct cfq_data *cfqd = q->elevator->elevator_data;
687 struct request *__rq; 530 struct request *__rq;
688 int ret;
689
690 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
691 if (__rq && elv_rq_merge_ok(__rq, bio)) {
692 ret = ELEVATOR_BACK_MERGE;
693 goto out;
694 }
695 531
696 __rq = cfq_find_rq_fmerge(cfqd, bio); 532 __rq = cfq_find_rq_fmerge(cfqd, bio);
697 if (__rq && elv_rq_merge_ok(__rq, bio)) { 533 if (__rq && elv_rq_merge_ok(__rq, bio)) {
698 ret = ELEVATOR_FRONT_MERGE; 534 *req = __rq;
699 goto out; 535 return ELEVATOR_FRONT_MERGE;
700 } 536 }
701 537
702 return ELEVATOR_NO_MERGE; 538 return ELEVATOR_NO_MERGE;
703out:
704 *req = __rq;
705 return ret;
706} 539}
707 540
708static void cfq_merged_request(request_queue_t *q, struct request *req) 541static void cfq_merged_request(request_queue_t *q, struct request *req,
542 int type)
709{ 543{
710 struct cfq_data *cfqd = q->elevator->elevator_data; 544 if (type == ELEVATOR_FRONT_MERGE) {
711 struct cfq_rq *crq = RQ_DATA(req); 545 struct cfq_queue *cfqq = RQ_CFQQ(req);
712
713 cfq_del_crq_hash(crq);
714 cfq_add_crq_hash(cfqd, crq);
715
716 if (rq_rb_key(req) != crq->rb_key) {
717 struct cfq_queue *cfqq = crq->cfq_queue;
718 546
719 cfq_update_next_crq(crq); 547 cfq_reposition_rq_rb(cfqq, req);
720 cfq_reposition_crq_rb(cfqq, crq);
721 } 548 }
722} 549}
723 550
@@ -725,8 +552,6 @@ static void
725cfq_merged_requests(request_queue_t *q, struct request *rq, 552cfq_merged_requests(request_queue_t *q, struct request *rq,
726 struct request *next) 553 struct request *next)
727{ 554{
728 cfq_merged_request(q, rq);
729
730 /* 555 /*
731 * reposition in fifo if next is older than rq 556 * reposition in fifo if next is older than rq
732 */ 557 */
@@ -768,13 +593,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
768 if (cfq_cfqq_wait_request(cfqq)) 593 if (cfq_cfqq_wait_request(cfqq))
769 del_timer(&cfqd->idle_slice_timer); 594 del_timer(&cfqd->idle_slice_timer);
770 595
771 if (!preempted && !cfq_cfqq_dispatched(cfqq)) { 596 if (!preempted && !cfq_cfqq_dispatched(cfqq))
772 cfqq->service_last = now;
773 cfq_schedule_dispatch(cfqd); 597 cfq_schedule_dispatch(cfqd);
774 }
775 598
776 cfq_clear_cfqq_must_dispatch(cfqq); 599 cfq_clear_cfqq_must_dispatch(cfqq);
777 cfq_clear_cfqq_wait_request(cfqq); 600 cfq_clear_cfqq_wait_request(cfqq);
601 cfq_clear_cfqq_queue_new(cfqq);
778 602
779 /* 603 /*
780 * store what was left of this slice, if the queue idled out 604 * store what was left of this slice, if the queue idled out
@@ -868,26 +692,25 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
868{ 692{
869 struct cfq_queue *cfqq = NULL; 693 struct cfq_queue *cfqq = NULL;
870 694
871 /* 695 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) {
872 * if current list is non-empty, grab first entry. if it is empty, 696 /*
873 * get next prio level and grab first entry then if any are spliced 697 * if current list is non-empty, grab first entry. if it is
874 */ 698 * empty, get next prio level and grab first entry then if any
875 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) 699 * are spliced
700 */
876 cfqq = list_entry_cfqq(cfqd->cur_rr.next); 701 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
877 702 } else if (!list_empty(&cfqd->busy_rr)) {
878 /* 703 /*
879 * If no new queues are available, check if the busy list has some 704 * If no new queues are available, check if the busy list has
880 * before falling back to idle io. 705 * some before falling back to idle io.
881 */ 706 */
882 if (!cfqq && !list_empty(&cfqd->busy_rr))
883 cfqq = list_entry_cfqq(cfqd->busy_rr.next); 707 cfqq = list_entry_cfqq(cfqd->busy_rr.next);
884 708 } else if (!list_empty(&cfqd->idle_rr)) {
885 /* 709 /*
886 * if we have idle queues and no rt or be queues had pending 710 * if we have idle queues and no rt or be queues had pending
887 * requests, either allow immediate service if the grace period 711 * requests, either allow immediate service if the grace period
888 * has passed or arm the idle grace timer 712 * has passed or arm the idle grace timer
889 */ 713 */
890 if (!cfqq && !list_empty(&cfqd->idle_rr)) {
891 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; 714 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
892 715
893 if (time_after_eq(jiffies, end)) 716 if (time_after_eq(jiffies, end))
@@ -942,16 +765,14 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
942 return 1; 765 return 1;
943} 766}
944 767
945static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) 768static void cfq_dispatch_insert(request_queue_t *q, struct request *rq)
946{ 769{
947 struct cfq_data *cfqd = q->elevator->elevator_data; 770 struct cfq_data *cfqd = q->elevator->elevator_data;
948 struct cfq_queue *cfqq = crq->cfq_queue; 771 struct cfq_queue *cfqq = RQ_CFQQ(rq);
949 struct request *rq;
950 772
951 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); 773 cfq_remove_request(rq);
952 cfq_remove_request(crq->request); 774 cfqq->on_dispatch[rq_is_sync(rq)]++;
953 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; 775 elv_dispatch_sort(q, rq);
954 elv_dispatch_sort(q, crq->request);
955 776
956 rq = list_entry(q->queue_head.prev, struct request, queuelist); 777 rq = list_entry(q->queue_head.prev, struct request, queuelist);
957 cfqd->last_sector = rq->sector + rq->nr_sectors; 778 cfqd->last_sector = rq->sector + rq->nr_sectors;
@@ -960,24 +781,23 @@ static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
960/* 781/*
961 * return expired entry, or NULL to just start from scratch in rbtree 782 * return expired entry, or NULL to just start from scratch in rbtree
962 */ 783 */
963static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) 784static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq)
964{ 785{
965 struct cfq_data *cfqd = cfqq->cfqd; 786 struct cfq_data *cfqd = cfqq->cfqd;
966 struct request *rq; 787 struct request *rq;
967 struct cfq_rq *crq; 788 int fifo;
968 789
969 if (cfq_cfqq_fifo_expire(cfqq)) 790 if (cfq_cfqq_fifo_expire(cfqq))
970 return NULL; 791 return NULL;
792 if (list_empty(&cfqq->fifo))
793 return NULL;
971 794
972 if (!list_empty(&cfqq->fifo)) { 795 fifo = cfq_cfqq_class_sync(cfqq);
973 int fifo = cfq_cfqq_class_sync(cfqq); 796 rq = rq_entry_fifo(cfqq->fifo.next);
974 797
975 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); 798 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
976 rq = crq->request; 799 cfq_mark_cfqq_fifo_expire(cfqq);
977 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { 800 return rq;
978 cfq_mark_cfqq_fifo_expire(cfqq);
979 return crq;
980 }
981 } 801 }
982 802
983 return NULL; 803 return NULL;
@@ -1063,25 +883,25 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1063 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 883 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
1064 884
1065 do { 885 do {
1066 struct cfq_rq *crq; 886 struct request *rq;
1067 887
1068 /* 888 /*
1069 * follow expired path, else get first next available 889 * follow expired path, else get first next available
1070 */ 890 */
1071 if ((crq = cfq_check_fifo(cfqq)) == NULL) 891 if ((rq = cfq_check_fifo(cfqq)) == NULL)
1072 crq = cfqq->next_crq; 892 rq = cfqq->next_rq;
1073 893
1074 /* 894 /*
1075 * finally, insert request into driver dispatch list 895 * finally, insert request into driver dispatch list
1076 */ 896 */
1077 cfq_dispatch_insert(cfqd->queue, crq); 897 cfq_dispatch_insert(cfqd->queue, rq);
1078 898
1079 cfqd->dispatch_slice++; 899 cfqd->dispatch_slice++;
1080 dispatched++; 900 dispatched++;
1081 901
1082 if (!cfqd->active_cic) { 902 if (!cfqd->active_cic) {
1083 atomic_inc(&crq->io_context->ioc->refcount); 903 atomic_inc(&RQ_CIC(rq)->ioc->refcount);
1084 cfqd->active_cic = crq->io_context; 904 cfqd->active_cic = RQ_CIC(rq);
1085 } 905 }
1086 906
1087 if (RB_EMPTY_ROOT(&cfqq->sort_list)) 907 if (RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -1112,13 +932,12 @@ static int
1112cfq_forced_dispatch_cfqqs(struct list_head *list) 932cfq_forced_dispatch_cfqqs(struct list_head *list)
1113{ 933{
1114 struct cfq_queue *cfqq, *next; 934 struct cfq_queue *cfqq, *next;
1115 struct cfq_rq *crq;
1116 int dispatched; 935 int dispatched;
1117 936
1118 dispatched = 0; 937 dispatched = 0;
1119 list_for_each_entry_safe(cfqq, next, list, cfq_list) { 938 list_for_each_entry_safe(cfqq, next, list, cfq_list) {
1120 while ((crq = cfqq->next_crq)) { 939 while (cfqq->next_rq) {
1121 cfq_dispatch_insert(cfqq->cfqd->queue, crq); 940 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
1122 dispatched++; 941 dispatched++;
1123 } 942 }
1124 BUG_ON(!list_empty(&cfqq->fifo)); 943 BUG_ON(!list_empty(&cfqq->fifo));
@@ -1194,8 +1013,8 @@ cfq_dispatch_requests(request_queue_t *q, int force)
1194} 1013}
1195 1014
1196/* 1015/*
1197 * task holds one reference to the queue, dropped when task exits. each crq 1016 * task holds one reference to the queue, dropped when task exits. each rq
1198 * in-flight on this queue also holds a reference, dropped when crq is freed. 1017 * in-flight on this queue also holds a reference, dropped when rq is freed.
1199 * 1018 *
1200 * queue lock must be held here. 1019 * queue lock must be held here.
1201 */ 1020 */
@@ -1223,7 +1042,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1223 kmem_cache_free(cfq_pool, cfqq); 1042 kmem_cache_free(cfq_pool, cfqq);
1224} 1043}
1225 1044
1226static inline struct cfq_queue * 1045static struct cfq_queue *
1227__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, 1046__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1228 const int hashval) 1047 const int hashval)
1229{ 1048{
@@ -1260,62 +1079,63 @@ static void cfq_free_io_context(struct io_context *ioc)
1260 freed++; 1079 freed++;
1261 } 1080 }
1262 1081
1263 if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) 1082 elv_ioc_count_mod(ioc_count, -freed);
1083
1084 if (ioc_gone && !elv_ioc_count_read(ioc_count))
1264 complete(ioc_gone); 1085 complete(ioc_gone);
1265} 1086}
1266 1087
1267static void cfq_trim(struct io_context *ioc) 1088static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1268{ 1089{
1269 ioc->set_ioprio = NULL; 1090 if (unlikely(cfqq == cfqd->active_queue))
1270 cfq_free_io_context(ioc); 1091 __cfq_slice_expired(cfqd, cfqq, 0);
1092
1093 cfq_put_queue(cfqq);
1271} 1094}
1272 1095
1273/* 1096static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1274 * Called with interrupts disabled 1097 struct cfq_io_context *cic)
1275 */
1276static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1277{ 1098{
1278 struct cfq_data *cfqd = cic->key; 1099 list_del_init(&cic->queue_list);
1279 request_queue_t *q; 1100 smp_wmb();
1280 1101 cic->key = NULL;
1281 if (!cfqd)
1282 return;
1283
1284 q = cfqd->queue;
1285
1286 WARN_ON(!irqs_disabled());
1287
1288 spin_lock(q->queue_lock);
1289 1102
1290 if (cic->cfqq[ASYNC]) { 1103 if (cic->cfqq[ASYNC]) {
1291 if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) 1104 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
1292 __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0);
1293 cfq_put_queue(cic->cfqq[ASYNC]);
1294 cic->cfqq[ASYNC] = NULL; 1105 cic->cfqq[ASYNC] = NULL;
1295 } 1106 }
1296 1107
1297 if (cic->cfqq[SYNC]) { 1108 if (cic->cfqq[SYNC]) {
1298 if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) 1109 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
1299 __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0);
1300 cfq_put_queue(cic->cfqq[SYNC]);
1301 cic->cfqq[SYNC] = NULL; 1110 cic->cfqq[SYNC] = NULL;
1302 } 1111 }
1112}
1303 1113
1304 cic->key = NULL; 1114
1305 list_del_init(&cic->queue_list); 1115/*
1306 spin_unlock(q->queue_lock); 1116 * Called with interrupts disabled
1117 */
1118static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1119{
1120 struct cfq_data *cfqd = cic->key;
1121
1122 if (cfqd) {
1123 request_queue_t *q = cfqd->queue;
1124
1125 spin_lock_irq(q->queue_lock);
1126 __cfq_exit_single_io_context(cfqd, cic);
1127 spin_unlock_irq(q->queue_lock);
1128 }
1307} 1129}
1308 1130
1309static void cfq_exit_io_context(struct io_context *ioc) 1131static void cfq_exit_io_context(struct io_context *ioc)
1310{ 1132{
1311 struct cfq_io_context *__cic; 1133 struct cfq_io_context *__cic;
1312 unsigned long flags;
1313 struct rb_node *n; 1134 struct rb_node *n;
1314 1135
1315 /* 1136 /*
1316 * put the reference this task is holding to the various queues 1137 * put the reference this task is holding to the various queues
1317 */ 1138 */
1318 spin_lock_irqsave(&cfq_exit_lock, flags);
1319 1139
1320 n = rb_first(&ioc->cic_root); 1140 n = rb_first(&ioc->cic_root);
1321 while (n != NULL) { 1141 while (n != NULL) {
@@ -1324,22 +1144,21 @@ static void cfq_exit_io_context(struct io_context *ioc)
1324 cfq_exit_single_io_context(__cic); 1144 cfq_exit_single_io_context(__cic);
1325 n = rb_next(n); 1145 n = rb_next(n);
1326 } 1146 }
1327
1328 spin_unlock_irqrestore(&cfq_exit_lock, flags);
1329} 1147}
1330 1148
1331static struct cfq_io_context * 1149static struct cfq_io_context *
1332cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) 1150cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1333{ 1151{
1334 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1152 struct cfq_io_context *cic;
1335 1153
1154 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
1336 if (cic) { 1155 if (cic) {
1337 memset(cic, 0, sizeof(*cic)); 1156 memset(cic, 0, sizeof(*cic));
1338 cic->last_end_request = jiffies; 1157 cic->last_end_request = jiffies;
1339 INIT_LIST_HEAD(&cic->queue_list); 1158 INIT_LIST_HEAD(&cic->queue_list);
1340 cic->dtor = cfq_free_io_context; 1159 cic->dtor = cfq_free_io_context;
1341 cic->exit = cfq_exit_io_context; 1160 cic->exit = cfq_exit_io_context;
1342 atomic_inc(&ioc_count); 1161 elv_ioc_count_inc(ioc_count);
1343 } 1162 }
1344 1163
1345 return cic; 1164 return cic;
@@ -1420,15 +1239,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
1420 spin_unlock(cfqd->queue->queue_lock); 1239 spin_unlock(cfqd->queue->queue_lock);
1421} 1240}
1422 1241
1423/* 1242static void cfq_ioc_set_ioprio(struct io_context *ioc)
1424 * callback from sys_ioprio_set, irqs are disabled
1425 */
1426static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1427{ 1243{
1428 struct cfq_io_context *cic; 1244 struct cfq_io_context *cic;
1429 struct rb_node *n; 1245 struct rb_node *n;
1430 1246
1431 spin_lock(&cfq_exit_lock); 1247 ioc->ioprio_changed = 0;
1432 1248
1433 n = rb_first(&ioc->cic_root); 1249 n = rb_first(&ioc->cic_root);
1434 while (n != NULL) { 1250 while (n != NULL) {
@@ -1437,10 +1253,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1437 changed_ioprio(cic); 1253 changed_ioprio(cic);
1438 n = rb_next(n); 1254 n = rb_next(n);
1439 } 1255 }
1440
1441 spin_unlock(&cfq_exit_lock);
1442
1443 return 0;
1444} 1256}
1445 1257
1446static struct cfq_queue * 1258static struct cfq_queue *
@@ -1460,12 +1272,18 @@ retry:
1460 cfqq = new_cfqq; 1272 cfqq = new_cfqq;
1461 new_cfqq = NULL; 1273 new_cfqq = NULL;
1462 } else if (gfp_mask & __GFP_WAIT) { 1274 } else if (gfp_mask & __GFP_WAIT) {
1275 /*
1276 * Inform the allocator of the fact that we will
1277 * just repeat this allocation if it fails, to allow
1278 * the allocator to do whatever it needs to attempt to
1279 * free memory.
1280 */
1463 spin_unlock_irq(cfqd->queue->queue_lock); 1281 spin_unlock_irq(cfqd->queue->queue_lock);
1464 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1282 new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
1465 spin_lock_irq(cfqd->queue->queue_lock); 1283 spin_lock_irq(cfqd->queue->queue_lock);
1466 goto retry; 1284 goto retry;
1467 } else { 1285 } else {
1468 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); 1286 cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
1469 if (!cfqq) 1287 if (!cfqq)
1470 goto out; 1288 goto out;
1471 } 1289 }
@@ -1480,13 +1298,13 @@ retry:
1480 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); 1298 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1481 atomic_set(&cfqq->ref, 0); 1299 atomic_set(&cfqq->ref, 0);
1482 cfqq->cfqd = cfqd; 1300 cfqq->cfqd = cfqd;
1483 cfqq->service_last = 0;
1484 /* 1301 /*
1485 * set ->slice_left to allow preemption for a new process 1302 * set ->slice_left to allow preemption for a new process
1486 */ 1303 */
1487 cfqq->slice_left = 2 * cfqd->cfq_slice_idle; 1304 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
1488 cfq_mark_cfqq_idle_window(cfqq); 1305 cfq_mark_cfqq_idle_window(cfqq);
1489 cfq_mark_cfqq_prio_changed(cfqq); 1306 cfq_mark_cfqq_prio_changed(cfqq);
1307 cfq_mark_cfqq_queue_new(cfqq);
1490 cfq_init_prio_data(cfqq); 1308 cfq_init_prio_data(cfqq);
1491 } 1309 }
1492 1310
@@ -1502,12 +1320,10 @@ out:
1502static void 1320static void
1503cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) 1321cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
1504{ 1322{
1505 spin_lock(&cfq_exit_lock); 1323 WARN_ON(!list_empty(&cic->queue_list));
1506 rb_erase(&cic->rb_node, &ioc->cic_root); 1324 rb_erase(&cic->rb_node, &ioc->cic_root);
1507 list_del_init(&cic->queue_list);
1508 spin_unlock(&cfq_exit_lock);
1509 kmem_cache_free(cfq_ioc_pool, cic); 1325 kmem_cache_free(cfq_ioc_pool, cic);
1510 atomic_dec(&ioc_count); 1326 elv_ioc_count_dec(ioc_count);
1511} 1327}
1512 1328
1513static struct cfq_io_context * 1329static struct cfq_io_context *
@@ -1551,7 +1367,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
1551 cic->ioc = ioc; 1367 cic->ioc = ioc;
1552 cic->key = cfqd; 1368 cic->key = cfqd;
1553 1369
1554 ioc->set_ioprio = cfq_ioc_set_ioprio;
1555restart: 1370restart:
1556 parent = NULL; 1371 parent = NULL;
1557 p = &ioc->cic_root.rb_node; 1372 p = &ioc->cic_root.rb_node;
@@ -1573,11 +1388,12 @@ restart:
1573 BUG(); 1388 BUG();
1574 } 1389 }
1575 1390
1576 spin_lock(&cfq_exit_lock);
1577 rb_link_node(&cic->rb_node, parent, p); 1391 rb_link_node(&cic->rb_node, parent, p);
1578 rb_insert_color(&cic->rb_node, &ioc->cic_root); 1392 rb_insert_color(&cic->rb_node, &ioc->cic_root);
1393
1394 spin_lock_irq(cfqd->queue->queue_lock);
1579 list_add(&cic->queue_list, &cfqd->cic_list); 1395 list_add(&cic->queue_list, &cfqd->cic_list);
1580 spin_unlock(&cfq_exit_lock); 1396 spin_unlock_irq(cfqd->queue->queue_lock);
1581} 1397}
1582 1398
1583/* 1399/*
@@ -1593,7 +1409,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1593 1409
1594 might_sleep_if(gfp_mask & __GFP_WAIT); 1410 might_sleep_if(gfp_mask & __GFP_WAIT);
1595 1411
1596 ioc = get_io_context(gfp_mask); 1412 ioc = get_io_context(gfp_mask, cfqd->queue->node);
1597 if (!ioc) 1413 if (!ioc)
1598 return NULL; 1414 return NULL;
1599 1415
@@ -1607,6 +1423,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1607 1423
1608 cfq_cic_link(cfqd, ioc, cic); 1424 cfq_cic_link(cfqd, ioc, cic);
1609out: 1425out:
1426 smp_read_barrier_depends();
1427 if (unlikely(ioc->ioprio_changed))
1428 cfq_ioc_set_ioprio(ioc);
1429
1610 return cic; 1430 return cic;
1611err: 1431err:
1612 put_io_context(ioc); 1432 put_io_context(ioc);
@@ -1640,15 +1460,15 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1640 1460
1641static void 1461static void
1642cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, 1462cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1643 struct cfq_rq *crq) 1463 struct request *rq)
1644{ 1464{
1645 sector_t sdist; 1465 sector_t sdist;
1646 u64 total; 1466 u64 total;
1647 1467
1648 if (cic->last_request_pos < crq->request->sector) 1468 if (cic->last_request_pos < rq->sector)
1649 sdist = crq->request->sector - cic->last_request_pos; 1469 sdist = rq->sector - cic->last_request_pos;
1650 else 1470 else
1651 sdist = cic->last_request_pos - crq->request->sector; 1471 sdist = cic->last_request_pos - rq->sector;
1652 1472
1653 /* 1473 /*
1654 * Don't allow the seek distance to get too large from the 1474 * Don't allow the seek distance to get too large from the
@@ -1699,7 +1519,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1699 */ 1519 */
1700static int 1520static int
1701cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, 1521cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1702 struct cfq_rq *crq) 1522 struct request *rq)
1703{ 1523{
1704 struct cfq_queue *cfqq = cfqd->active_queue; 1524 struct cfq_queue *cfqq = cfqd->active_queue;
1705 1525
@@ -1718,7 +1538,17 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1718 */ 1538 */
1719 if (new_cfqq->slice_left < cfqd->cfq_slice_idle) 1539 if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
1720 return 0; 1540 return 0;
1721 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) 1541 /*
1542 * if the new request is sync, but the currently running queue is
1543 * not, let the sync request have priority.
1544 */
1545 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
1546 return 1;
1547 /*
1548 * So both queues are sync. Let the new request get disk time if
1549 * it's a metadata request and the current queue is doing regular IO.
1550 */
1551 if (rq_is_meta(rq) && !cfqq->meta_pending)
1722 return 1; 1552 return 1;
1723 1553
1724 return 0; 1554 return 0;
@@ -1730,47 +1560,45 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1730 */ 1560 */
1731static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1561static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1732{ 1562{
1733 struct cfq_queue *__cfqq, *next; 1563 cfq_slice_expired(cfqd, 1);
1734
1735 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
1736 cfq_resort_rr_list(__cfqq, 1);
1737 1564
1738 if (!cfqq->slice_left) 1565 if (!cfqq->slice_left)
1739 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; 1566 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
1740 1567
1741 cfqq->slice_end = cfqq->slice_left + jiffies; 1568 /*
1742 cfq_slice_expired(cfqd, 1); 1569 * Put the new queue at the front of the of the current list,
1743 __cfq_set_active_queue(cfqd, cfqq); 1570 * so we know that it will be selected next.
1744} 1571 */
1745 1572 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1746/* 1573 list_move(&cfqq->cfq_list, &cfqd->cur_rr);
1747 * should really be a ll_rw_blk.c helper
1748 */
1749static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1750{
1751 request_queue_t *q = cfqd->queue;
1752 1574
1753 if (!blk_queue_plugged(q)) 1575 cfqq->slice_end = cfqq->slice_left + jiffies;
1754 q->request_fn(q);
1755 else
1756 __generic_unplug_device(q);
1757} 1576}
1758 1577
1759/* 1578/*
1760 * Called when a new fs request (crq) is added (to cfqq). Check if there's 1579 * Called when a new fs request (rq) is added (to cfqq). Check if there's
1761 * something we should do about it 1580 * something we should do about it
1762 */ 1581 */
1763static void 1582static void
1764cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1583cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1765 struct cfq_rq *crq) 1584 struct request *rq)
1766{ 1585{
1767 struct cfq_io_context *cic = crq->io_context; 1586 struct cfq_io_context *cic = RQ_CIC(rq);
1587
1588 if (rq_is_meta(rq))
1589 cfqq->meta_pending++;
1590
1591 /*
1592 * check if this request is a better next-serve candidate)) {
1593 */
1594 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
1595 BUG_ON(!cfqq->next_rq);
1768 1596
1769 /* 1597 /*
1770 * we never wait for an async request and we don't allow preemption 1598 * we never wait for an async request and we don't allow preemption
1771 * of an async request. so just return early 1599 * of an async request. so just return early
1772 */ 1600 */
1773 if (!cfq_crq_is_sync(crq)) { 1601 if (!rq_is_sync(rq)) {
1774 /* 1602 /*
1775 * sync process issued an async request, if it's waiting 1603 * sync process issued an async request, if it's waiting
1776 * then expire it and kick rq handling. 1604 * then expire it and kick rq handling.
@@ -1778,17 +1606,17 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1778 if (cic == cfqd->active_cic && 1606 if (cic == cfqd->active_cic &&
1779 del_timer(&cfqd->idle_slice_timer)) { 1607 del_timer(&cfqd->idle_slice_timer)) {
1780 cfq_slice_expired(cfqd, 0); 1608 cfq_slice_expired(cfqd, 0);
1781 cfq_start_queueing(cfqd, cfqq); 1609 blk_start_queueing(cfqd->queue);
1782 } 1610 }
1783 return; 1611 return;
1784 } 1612 }
1785 1613
1786 cfq_update_io_thinktime(cfqd, cic); 1614 cfq_update_io_thinktime(cfqd, cic);
1787 cfq_update_io_seektime(cfqd, cic, crq); 1615 cfq_update_io_seektime(cfqd, cic, rq);
1788 cfq_update_idle_window(cfqd, cfqq, cic); 1616 cfq_update_idle_window(cfqd, cfqq, cic);
1789 1617
1790 cic->last_queue = jiffies; 1618 cic->last_queue = jiffies;
1791 cic->last_request_pos = crq->request->sector + crq->request->nr_sectors; 1619 cic->last_request_pos = rq->sector + rq->nr_sectors;
1792 1620
1793 if (cfqq == cfqd->active_queue) { 1621 if (cfqq == cfqd->active_queue) {
1794 /* 1622 /*
@@ -1799,9 +1627,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1799 if (cfq_cfqq_wait_request(cfqq)) { 1627 if (cfq_cfqq_wait_request(cfqq)) {
1800 cfq_mark_cfqq_must_dispatch(cfqq); 1628 cfq_mark_cfqq_must_dispatch(cfqq);
1801 del_timer(&cfqd->idle_slice_timer); 1629 del_timer(&cfqd->idle_slice_timer);
1802 cfq_start_queueing(cfqd, cfqq); 1630 blk_start_queueing(cfqd->queue);
1803 } 1631 }
1804 } else if (cfq_should_preempt(cfqd, cfqq, crq)) { 1632 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
1805 /* 1633 /*
1806 * not the active queue - expire current slice if it is 1634 * not the active queue - expire current slice if it is
1807 * idle and has expired it's mean thinktime or this new queue 1635 * idle and has expired it's mean thinktime or this new queue
@@ -1809,34 +1637,32 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1809 */ 1637 */
1810 cfq_preempt_queue(cfqd, cfqq); 1638 cfq_preempt_queue(cfqd, cfqq);
1811 cfq_mark_cfqq_must_dispatch(cfqq); 1639 cfq_mark_cfqq_must_dispatch(cfqq);
1812 cfq_start_queueing(cfqd, cfqq); 1640 blk_start_queueing(cfqd->queue);
1813 } 1641 }
1814} 1642}
1815 1643
1816static void cfq_insert_request(request_queue_t *q, struct request *rq) 1644static void cfq_insert_request(request_queue_t *q, struct request *rq)
1817{ 1645{
1818 struct cfq_data *cfqd = q->elevator->elevator_data; 1646 struct cfq_data *cfqd = q->elevator->elevator_data;
1819 struct cfq_rq *crq = RQ_DATA(rq); 1647 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1820 struct cfq_queue *cfqq = crq->cfq_queue;
1821 1648
1822 cfq_init_prio_data(cfqq); 1649 cfq_init_prio_data(cfqq);
1823 1650
1824 cfq_add_crq_rb(crq); 1651 cfq_add_rq_rb(rq);
1825 1652
1826 list_add_tail(&rq->queuelist, &cfqq->fifo); 1653 if (!cfq_cfqq_on_rr(cfqq))
1654 cfq_add_cfqq_rr(cfqd, cfqq);
1827 1655
1828 if (rq_mergeable(rq)) 1656 list_add_tail(&rq->queuelist, &cfqq->fifo);
1829 cfq_add_crq_hash(cfqd, crq);
1830 1657
1831 cfq_crq_enqueued(cfqd, cfqq, crq); 1658 cfq_rq_enqueued(cfqd, cfqq, rq);
1832} 1659}
1833 1660
1834static void cfq_completed_request(request_queue_t *q, struct request *rq) 1661static void cfq_completed_request(request_queue_t *q, struct request *rq)
1835{ 1662{
1836 struct cfq_rq *crq = RQ_DATA(rq); 1663 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1837 struct cfq_queue *cfqq = crq->cfq_queue;
1838 struct cfq_data *cfqd = cfqq->cfqd; 1664 struct cfq_data *cfqd = cfqq->cfqd;
1839 const int sync = cfq_crq_is_sync(crq); 1665 const int sync = rq_is_sync(rq);
1840 unsigned long now; 1666 unsigned long now;
1841 1667
1842 now = jiffies; 1668 now = jiffies;
@@ -1849,15 +1675,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1849 if (!cfq_class_idle(cfqq)) 1675 if (!cfq_class_idle(cfqq))
1850 cfqd->last_end_request = now; 1676 cfqd->last_end_request = now;
1851 1677
1852 if (!cfq_cfqq_dispatched(cfqq)) { 1678 if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq))
1853 if (cfq_cfqq_on_rr(cfqq)) { 1679 cfq_resort_rr_list(cfqq, 0);
1854 cfqq->service_last = now;
1855 cfq_resort_rr_list(cfqq, 0);
1856 }
1857 }
1858 1680
1859 if (sync) 1681 if (sync)
1860 crq->io_context->last_end_request = now; 1682 RQ_CIC(rq)->last_end_request = now;
1861 1683
1862 /* 1684 /*
1863 * If this is the active queue, check if it needs to be expired, 1685 * If this is the active queue, check if it needs to be expired,
@@ -1873,30 +1695,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
1873 } 1695 }
1874} 1696}
1875 1697
1876static struct request *
1877cfq_former_request(request_queue_t *q, struct request *rq)
1878{
1879 struct cfq_rq *crq = RQ_DATA(rq);
1880 struct rb_node *rbprev = rb_prev(&crq->rb_node);
1881
1882 if (rbprev)
1883 return rb_entry_crq(rbprev)->request;
1884
1885 return NULL;
1886}
1887
1888static struct request *
1889cfq_latter_request(request_queue_t *q, struct request *rq)
1890{
1891 struct cfq_rq *crq = RQ_DATA(rq);
1892 struct rb_node *rbnext = rb_next(&crq->rb_node);
1893
1894 if (rbnext)
1895 return rb_entry_crq(rbnext)->request;
1896
1897 return NULL;
1898}
1899
1900/* 1698/*
1901 * we temporarily boost lower priority queues if they are holding fs exclusive 1699 * we temporarily boost lower priority queues if they are holding fs exclusive
1902 * resources. they are boosted to normal prio (CLASS_BE/4) 1700 * resources. they are boosted to normal prio (CLASS_BE/4)
@@ -1933,9 +1731,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
1933 cfq_resort_rr_list(cfqq, 0); 1731 cfq_resort_rr_list(cfqq, 0);
1934} 1732}
1935 1733
1936static inline int 1734static inline int __cfq_may_queue(struct cfq_queue *cfqq)
1937__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1938 struct task_struct *task, int rw)
1939{ 1735{
1940 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 1736 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
1941 !cfq_cfqq_must_alloc_slice(cfqq)) { 1737 !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -1946,7 +1742,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1946 return ELV_MQUEUE_MAY; 1742 return ELV_MQUEUE_MAY;
1947} 1743}
1948 1744
1949static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) 1745static int cfq_may_queue(request_queue_t *q, int rw)
1950{ 1746{
1951 struct cfq_data *cfqd = q->elevator->elevator_data; 1747 struct cfq_data *cfqd = q->elevator->elevator_data;
1952 struct task_struct *tsk = current; 1748 struct task_struct *tsk = current;
@@ -1963,48 +1759,30 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
1963 cfq_init_prio_data(cfqq); 1759 cfq_init_prio_data(cfqq);
1964 cfq_prio_boost(cfqq); 1760 cfq_prio_boost(cfqq);
1965 1761
1966 return __cfq_may_queue(cfqd, cfqq, tsk, rw); 1762 return __cfq_may_queue(cfqq);
1967 } 1763 }
1968 1764
1969 return ELV_MQUEUE_MAY; 1765 return ELV_MQUEUE_MAY;
1970} 1766}
1971 1767
1972static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
1973{
1974 struct cfq_data *cfqd = q->elevator->elevator_data;
1975
1976 if (unlikely(cfqd->rq_starved)) {
1977 struct request_list *rl = &q->rq;
1978
1979 smp_mb();
1980 if (waitqueue_active(&rl->wait[READ]))
1981 wake_up(&rl->wait[READ]);
1982 if (waitqueue_active(&rl->wait[WRITE]))
1983 wake_up(&rl->wait[WRITE]);
1984 }
1985}
1986
1987/* 1768/*
1988 * queue lock held here 1769 * queue lock held here
1989 */ 1770 */
1990static void cfq_put_request(request_queue_t *q, struct request *rq) 1771static void cfq_put_request(request_queue_t *q, struct request *rq)
1991{ 1772{
1992 struct cfq_data *cfqd = q->elevator->elevator_data; 1773 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1993 struct cfq_rq *crq = RQ_DATA(rq);
1994 1774
1995 if (crq) { 1775 if (cfqq) {
1996 struct cfq_queue *cfqq = crq->cfq_queue;
1997 const int rw = rq_data_dir(rq); 1776 const int rw = rq_data_dir(rq);
1998 1777
1999 BUG_ON(!cfqq->allocated[rw]); 1778 BUG_ON(!cfqq->allocated[rw]);
2000 cfqq->allocated[rw]--; 1779 cfqq->allocated[rw]--;
2001 1780
2002 put_io_context(crq->io_context->ioc); 1781 put_io_context(RQ_CIC(rq)->ioc);
2003 1782
2004 mempool_free(crq, cfqd->crq_pool);
2005 rq->elevator_private = NULL; 1783 rq->elevator_private = NULL;
1784 rq->elevator_private2 = NULL;
2006 1785
2007 cfq_check_waiters(q, cfqq);
2008 cfq_put_queue(cfqq); 1786 cfq_put_queue(cfqq);
2009 } 1787 }
2010} 1788}
@@ -2013,8 +1791,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
2013 * Allocate cfq data structures associated with this request. 1791 * Allocate cfq data structures associated with this request.
2014 */ 1792 */
2015static int 1793static int
2016cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 1794cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
2017 gfp_t gfp_mask)
2018{ 1795{
2019 struct cfq_data *cfqd = q->elevator->elevator_data; 1796 struct cfq_data *cfqd = q->elevator->elevator_data;
2020 struct task_struct *tsk = current; 1797 struct task_struct *tsk = current;
@@ -2022,7 +1799,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
2022 const int rw = rq_data_dir(rq); 1799 const int rw = rq_data_dir(rq);
2023 pid_t key = cfq_queue_pid(tsk, rw); 1800 pid_t key = cfq_queue_pid(tsk, rw);
2024 struct cfq_queue *cfqq; 1801 struct cfq_queue *cfqq;
2025 struct cfq_rq *crq;
2026 unsigned long flags; 1802 unsigned long flags;
2027 int is_sync = key != CFQ_KEY_ASYNC; 1803 int is_sync = key != CFQ_KEY_ASYNC;
2028 1804
@@ -2046,42 +1822,18 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
2046 1822
2047 cfqq->allocated[rw]++; 1823 cfqq->allocated[rw]++;
2048 cfq_clear_cfqq_must_alloc(cfqq); 1824 cfq_clear_cfqq_must_alloc(cfqq);
2049 cfqd->rq_starved = 0;
2050 atomic_inc(&cfqq->ref); 1825 atomic_inc(&cfqq->ref);
2051 spin_unlock_irqrestore(q->queue_lock, flags);
2052 1826
2053 crq = mempool_alloc(cfqd->crq_pool, gfp_mask); 1827 spin_unlock_irqrestore(q->queue_lock, flags);
2054 if (crq) {
2055 RB_CLEAR_NODE(&crq->rb_node);
2056 crq->rb_key = 0;
2057 crq->request = rq;
2058 INIT_HLIST_NODE(&crq->hash);
2059 crq->cfq_queue = cfqq;
2060 crq->io_context = cic;
2061
2062 if (is_sync)
2063 cfq_mark_crq_is_sync(crq);
2064 else
2065 cfq_clear_crq_is_sync(crq);
2066 1828
2067 rq->elevator_private = crq; 1829 rq->elevator_private = cic;
2068 return 0; 1830 rq->elevator_private2 = cfqq;
2069 } 1831 return 0;
2070 1832
2071 spin_lock_irqsave(q->queue_lock, flags);
2072 cfqq->allocated[rw]--;
2073 if (!(cfqq->allocated[0] + cfqq->allocated[1]))
2074 cfq_mark_cfqq_must_alloc(cfqq);
2075 cfq_put_queue(cfqq);
2076queue_fail: 1833queue_fail:
2077 if (cic) 1834 if (cic)
2078 put_io_context(cic->ioc); 1835 put_io_context(cic->ioc);
2079 /* 1836
2080 * mark us rq allocation starved. we need to kickstart the process
2081 * ourselves if there are no pending requests that can do it for us.
2082 * that would be an extremely rare OOM situation
2083 */
2084 cfqd->rq_starved = 1;
2085 cfq_schedule_dispatch(cfqd); 1837 cfq_schedule_dispatch(cfqd);
2086 spin_unlock_irqrestore(q->queue_lock, flags); 1838 spin_unlock_irqrestore(q->queue_lock, flags);
2087 return 1; 1839 return 1;
@@ -2090,27 +1842,10 @@ queue_fail:
2090static void cfq_kick_queue(void *data) 1842static void cfq_kick_queue(void *data)
2091{ 1843{
2092 request_queue_t *q = data; 1844 request_queue_t *q = data;
2093 struct cfq_data *cfqd = q->elevator->elevator_data;
2094 unsigned long flags; 1845 unsigned long flags;
2095 1846
2096 spin_lock_irqsave(q->queue_lock, flags); 1847 spin_lock_irqsave(q->queue_lock, flags);
2097 1848 blk_start_queueing(q);
2098 if (cfqd->rq_starved) {
2099 struct request_list *rl = &q->rq;
2100
2101 /*
2102 * we aren't guaranteed to get a request after this, but we
2103 * have to be opportunistic
2104 */
2105 smp_mb();
2106 if (waitqueue_active(&rl->wait[READ]))
2107 wake_up(&rl->wait[READ]);
2108 if (waitqueue_active(&rl->wait[WRITE]))
2109 wake_up(&rl->wait[WRITE]);
2110 }
2111
2112 blk_remove_plug(q);
2113 q->request_fn(q);
2114 spin_unlock_irqrestore(q->queue_lock, flags); 1849 spin_unlock_irqrestore(q->queue_lock, flags);
2115} 1850}
2116 1851
@@ -2193,7 +1928,6 @@ static void cfq_exit_queue(elevator_t *e)
2193 1928
2194 cfq_shutdown_timer_wq(cfqd); 1929 cfq_shutdown_timer_wq(cfqd);
2195 1930
2196 spin_lock(&cfq_exit_lock);
2197 spin_lock_irq(q->queue_lock); 1931 spin_lock_irq(q->queue_lock);
2198 1932
2199 if (cfqd->active_queue) 1933 if (cfqd->active_queue)
@@ -2203,25 +1937,14 @@ static void cfq_exit_queue(elevator_t *e)
2203 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 1937 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
2204 struct cfq_io_context, 1938 struct cfq_io_context,
2205 queue_list); 1939 queue_list);
2206 if (cic->cfqq[ASYNC]) { 1940
2207 cfq_put_queue(cic->cfqq[ASYNC]); 1941 __cfq_exit_single_io_context(cfqd, cic);
2208 cic->cfqq[ASYNC] = NULL;
2209 }
2210 if (cic->cfqq[SYNC]) {
2211 cfq_put_queue(cic->cfqq[SYNC]);
2212 cic->cfqq[SYNC] = NULL;
2213 }
2214 cic->key = NULL;
2215 list_del_init(&cic->queue_list);
2216 } 1942 }
2217 1943
2218 spin_unlock_irq(q->queue_lock); 1944 spin_unlock_irq(q->queue_lock);
2219 spin_unlock(&cfq_exit_lock);
2220 1945
2221 cfq_shutdown_timer_wq(cfqd); 1946 cfq_shutdown_timer_wq(cfqd);
2222 1947
2223 mempool_destroy(cfqd->crq_pool);
2224 kfree(cfqd->crq_hash);
2225 kfree(cfqd->cfq_hash); 1948 kfree(cfqd->cfq_hash);
2226 kfree(cfqd); 1949 kfree(cfqd);
2227} 1950}
@@ -2231,7 +1954,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2231 struct cfq_data *cfqd; 1954 struct cfq_data *cfqd;
2232 int i; 1955 int i;
2233 1956
2234 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 1957 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
2235 if (!cfqd) 1958 if (!cfqd)
2236 return NULL; 1959 return NULL;
2237 1960
@@ -2243,23 +1966,12 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2243 INIT_LIST_HEAD(&cfqd->busy_rr); 1966 INIT_LIST_HEAD(&cfqd->busy_rr);
2244 INIT_LIST_HEAD(&cfqd->cur_rr); 1967 INIT_LIST_HEAD(&cfqd->cur_rr);
2245 INIT_LIST_HEAD(&cfqd->idle_rr); 1968 INIT_LIST_HEAD(&cfqd->idle_rr);
2246 INIT_LIST_HEAD(&cfqd->empty_list);
2247 INIT_LIST_HEAD(&cfqd->cic_list); 1969 INIT_LIST_HEAD(&cfqd->cic_list);
2248 1970
2249 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); 1971 cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
2250 if (!cfqd->crq_hash)
2251 goto out_crqhash;
2252
2253 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
2254 if (!cfqd->cfq_hash) 1972 if (!cfqd->cfq_hash)
2255 goto out_cfqhash; 1973 goto out_free;
2256
2257 cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
2258 if (!cfqd->crq_pool)
2259 goto out_crqpool;
2260 1974
2261 for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
2262 INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
2263 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 1975 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2264 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 1976 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2265 1977
@@ -2275,7 +1987,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2275 1987
2276 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); 1988 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
2277 1989
2278 cfqd->cfq_queued = cfq_queued;
2279 cfqd->cfq_quantum = cfq_quantum; 1990 cfqd->cfq_quantum = cfq_quantum;
2280 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; 1991 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
2281 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; 1992 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
@@ -2287,19 +1998,13 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2287 cfqd->cfq_slice_idle = cfq_slice_idle; 1998 cfqd->cfq_slice_idle = cfq_slice_idle;
2288 1999
2289 return cfqd; 2000 return cfqd;
2290out_crqpool: 2001out_free:
2291 kfree(cfqd->cfq_hash);
2292out_cfqhash:
2293 kfree(cfqd->crq_hash);
2294out_crqhash:
2295 kfree(cfqd); 2002 kfree(cfqd);
2296 return NULL; 2003 return NULL;
2297} 2004}
2298 2005
2299static void cfq_slab_kill(void) 2006static void cfq_slab_kill(void)
2300{ 2007{
2301 if (crq_pool)
2302 kmem_cache_destroy(crq_pool);
2303 if (cfq_pool) 2008 if (cfq_pool)
2304 kmem_cache_destroy(cfq_pool); 2009 kmem_cache_destroy(cfq_pool);
2305 if (cfq_ioc_pool) 2010 if (cfq_ioc_pool)
@@ -2308,11 +2013,6 @@ static void cfq_slab_kill(void)
2308 2013
2309static int __init cfq_slab_setup(void) 2014static int __init cfq_slab_setup(void)
2310{ 2015{
2311 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
2312 NULL, NULL);
2313 if (!crq_pool)
2314 goto fail;
2315
2316 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, 2016 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
2317 NULL, NULL); 2017 NULL, NULL);
2318 if (!cfq_pool) 2018 if (!cfq_pool)
@@ -2358,7 +2058,6 @@ static ssize_t __FUNC(elevator_t *e, char *page) \
2358 return cfq_var_show(__data, (page)); \ 2058 return cfq_var_show(__data, (page)); \
2359} 2059}
2360SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 2060SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2361SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
2362SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); 2061SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2363SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); 2062SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
2364SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); 2063SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
@@ -2386,7 +2085,6 @@ static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
2386 return ret; \ 2085 return ret; \
2387} 2086}
2388STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); 2087STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2389STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
2390STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); 2088STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2391STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); 2089STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
2392STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); 2090STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
@@ -2402,7 +2100,6 @@ STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX,
2402 2100
2403static struct elv_fs_entry cfq_attrs[] = { 2101static struct elv_fs_entry cfq_attrs[] = {
2404 CFQ_ATTR(quantum), 2102 CFQ_ATTR(quantum),
2405 CFQ_ATTR(queued),
2406 CFQ_ATTR(fifo_expire_sync), 2103 CFQ_ATTR(fifo_expire_sync),
2407 CFQ_ATTR(fifo_expire_async), 2104 CFQ_ATTR(fifo_expire_async),
2408 CFQ_ATTR(back_seek_max), 2105 CFQ_ATTR(back_seek_max),
@@ -2425,14 +2122,14 @@ static struct elevator_type iosched_cfq = {
2425 .elevator_deactivate_req_fn = cfq_deactivate_request, 2122 .elevator_deactivate_req_fn = cfq_deactivate_request,
2426 .elevator_queue_empty_fn = cfq_queue_empty, 2123 .elevator_queue_empty_fn = cfq_queue_empty,
2427 .elevator_completed_req_fn = cfq_completed_request, 2124 .elevator_completed_req_fn = cfq_completed_request,
2428 .elevator_former_req_fn = cfq_former_request, 2125 .elevator_former_req_fn = elv_rb_former_request,
2429 .elevator_latter_req_fn = cfq_latter_request, 2126 .elevator_latter_req_fn = elv_rb_latter_request,
2430 .elevator_set_req_fn = cfq_set_request, 2127 .elevator_set_req_fn = cfq_set_request,
2431 .elevator_put_req_fn = cfq_put_request, 2128 .elevator_put_req_fn = cfq_put_request,
2432 .elevator_may_queue_fn = cfq_may_queue, 2129 .elevator_may_queue_fn = cfq_may_queue,
2433 .elevator_init_fn = cfq_init_queue, 2130 .elevator_init_fn = cfq_init_queue,
2434 .elevator_exit_fn = cfq_exit_queue, 2131 .elevator_exit_fn = cfq_exit_queue,
2435 .trim = cfq_trim, 2132 .trim = cfq_free_io_context,
2436 }, 2133 },
2437 .elevator_attrs = cfq_attrs, 2134 .elevator_attrs = cfq_attrs,
2438 .elevator_name = "cfq", 2135 .elevator_name = "cfq",
@@ -2468,7 +2165,7 @@ static void __exit cfq_exit(void)
2468 ioc_gone = &all_gone; 2165 ioc_gone = &all_gone;
2469 /* ioc_gone's update must be visible before reading ioc_count */ 2166 /* ioc_gone's update must be visible before reading ioc_count */
2470 smp_wmb(); 2167 smp_wmb();
2471 if (atomic_read(&ioc_count)) 2168 if (elv_ioc_count_read(ioc_count))
2472 wait_for_completion(ioc_gone); 2169 wait_for_completion(ioc_gone);
2473 synchronize_rcu(); 2170 synchronize_rcu();
2474 cfq_slab_kill(); 2171 cfq_slab_kill();
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c7ca9f0b6498..b7c5b34cb7b4 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Deadline i/o scheduler. 2 * Deadline i/o scheduler.
3 * 3 *
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
5 */ 5 */
6#include <linux/kernel.h> 6#include <linux/kernel.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
@@ -12,7 +12,6 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/compiler.h> 14#include <linux/compiler.h>
15#include <linux/hash.h>
16#include <linux/rbtree.h> 15#include <linux/rbtree.h>
17 16
18/* 17/*
@@ -24,13 +23,6 @@ static const int writes_starved = 2; /* max times reads can starve a write */
24static const int fifo_batch = 16; /* # of sequential requests treated as one 23static const int fifo_batch = 16; /* # of sequential requests treated as one
25 by the above parameters. For throughput. */ 24 by the above parameters. For throughput. */
26 25
27static const int deadline_hash_shift = 5;
28#define DL_HASH_BLOCK(sec) ((sec) >> 3)
29#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
30#define DL_HASH_ENTRIES (1 << deadline_hash_shift)
31#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
32#define ON_HASH(drq) (!hlist_unhashed(&(drq)->hash))
33
34struct deadline_data { 26struct deadline_data {
35 /* 27 /*
36 * run time data 28 * run time data
@@ -45,8 +37,7 @@ struct deadline_data {
45 /* 37 /*
46 * next in sort order. read, write or both are NULL 38 * next in sort order. read, write or both are NULL
47 */ 39 */
48 struct deadline_rq *next_drq[2]; 40 struct request *next_rq[2];
49 struct hlist_head *hash; /* request hash */
50 unsigned int batching; /* number of sequential requests made */ 41 unsigned int batching; /* number of sequential requests made */
51 sector_t last_sector; /* head position */ 42 sector_t last_sector; /* head position */
52 unsigned int starved; /* times reads have starved writes */ 43 unsigned int starved; /* times reads have starved writes */
@@ -58,240 +49,69 @@ struct deadline_data {
58 int fifo_batch; 49 int fifo_batch;
59 int writes_starved; 50 int writes_starved;
60 int front_merges; 51 int front_merges;
61
62 mempool_t *drq_pool;
63}; 52};
64 53
65/* 54static void deadline_move_request(struct deadline_data *, struct request *);
66 * pre-request data.
67 */
68struct deadline_rq {
69 /*
70 * rbtree index, key is the starting offset
71 */
72 struct rb_node rb_node;
73 sector_t rb_key;
74
75 struct request *request;
76
77 /*
78 * request hash, key is the ending offset (for back merge lookup)
79 */
80 struct hlist_node hash;
81
82 /*
83 * expire fifo
84 */
85 struct list_head fifo;
86 unsigned long expires;
87};
88
89static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
90
91static kmem_cache_t *drq_pool;
92
93#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
94 55
95/* 56#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))])
96 * the back merge hash support functions
97 */
98static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
99{
100 hlist_del_init(&drq->hash);
101}
102
103static inline void deadline_del_drq_hash(struct deadline_rq *drq)
104{
105 if (ON_HASH(drq))
106 __deadline_del_drq_hash(drq);
107}
108
109static inline void
110deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
111{
112 struct request *rq = drq->request;
113
114 BUG_ON(ON_HASH(drq));
115
116 hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
117}
118
119/*
120 * move hot entry to front of chain
121 */
122static inline void
123deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
124{
125 struct request *rq = drq->request;
126 struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
127
128 if (ON_HASH(drq) && &drq->hash != head->first) {
129 hlist_del(&drq->hash);
130 hlist_add_head(&drq->hash, head);
131 }
132}
133
134static struct request *
135deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
136{
137 struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
138 struct hlist_node *entry, *next;
139 struct deadline_rq *drq;
140
141 hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) {
142 struct request *__rq = drq->request;
143
144 BUG_ON(!ON_HASH(drq));
145
146 if (!rq_mergeable(__rq)) {
147 __deadline_del_drq_hash(drq);
148 continue;
149 }
150
151 if (rq_hash_key(__rq) == offset)
152 return __rq;
153 }
154
155 return NULL;
156}
157
158/*
159 * rb tree support functions
160 */
161#define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
162#define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
163#define rq_rb_key(rq) (rq)->sector
164
165static struct deadline_rq *
166__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
167{
168 struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
169 struct rb_node *parent = NULL;
170 struct deadline_rq *__drq;
171
172 while (*p) {
173 parent = *p;
174 __drq = rb_entry_drq(parent);
175
176 if (drq->rb_key < __drq->rb_key)
177 p = &(*p)->rb_left;
178 else if (drq->rb_key > __drq->rb_key)
179 p = &(*p)->rb_right;
180 else
181 return __drq;
182 }
183
184 rb_link_node(&drq->rb_node, parent, p);
185 return NULL;
186}
187 57
188static void 58static void
189deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 59deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
190{ 60{
191 struct deadline_rq *__alias; 61 struct rb_root *root = RQ_RB_ROOT(dd, rq);
192 62 struct request *__alias;
193 drq->rb_key = rq_rb_key(drq->request);
194 63
195retry: 64retry:
196 __alias = __deadline_add_drq_rb(dd, drq); 65 __alias = elv_rb_add(root, rq);
197 if (!__alias) { 66 if (unlikely(__alias)) {
198 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); 67 deadline_move_request(dd, __alias);
199 return; 68 goto retry;
200 } 69 }
201
202 deadline_move_request(dd, __alias);
203 goto retry;
204} 70}
205 71
206static inline void 72static inline void
207deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) 73deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
208{ 74{
209 const int data_dir = rq_data_dir(drq->request); 75 const int data_dir = rq_data_dir(rq);
210 76
211 if (dd->next_drq[data_dir] == drq) { 77 if (dd->next_rq[data_dir] == rq) {
212 struct rb_node *rbnext = rb_next(&drq->rb_node); 78 struct rb_node *rbnext = rb_next(&rq->rb_node);
213 79
214 dd->next_drq[data_dir] = NULL; 80 dd->next_rq[data_dir] = NULL;
215 if (rbnext) 81 if (rbnext)
216 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 82 dd->next_rq[data_dir] = rb_entry_rq(rbnext);
217 }
218
219 BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
220 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
221 RB_CLEAR_NODE(&drq->rb_node);
222}
223
224static struct request *
225deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
226{
227 struct rb_node *n = dd->sort_list[data_dir].rb_node;
228 struct deadline_rq *drq;
229
230 while (n) {
231 drq = rb_entry_drq(n);
232
233 if (sector < drq->rb_key)
234 n = n->rb_left;
235 else if (sector > drq->rb_key)
236 n = n->rb_right;
237 else
238 return drq->request;
239 } 83 }
240 84
241 return NULL; 85 elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
242} 86}
243 87
244/* 88/*
245 * deadline_find_first_drq finds the first (lowest sector numbered) request 89 * add rq to rbtree and fifo
246 * for the specified data_dir. Used to sweep back to the start of the disk
247 * (1-way elevator) after we process the last (highest sector) request.
248 */
249static struct deadline_rq *
250deadline_find_first_drq(struct deadline_data *dd, int data_dir)
251{
252 struct rb_node *n = dd->sort_list[data_dir].rb_node;
253
254 for (;;) {
255 if (n->rb_left == NULL)
256 return rb_entry_drq(n);
257
258 n = n->rb_left;
259 }
260}
261
262/*
263 * add drq to rbtree and fifo
264 */ 90 */
265static void 91static void
266deadline_add_request(struct request_queue *q, struct request *rq) 92deadline_add_request(struct request_queue *q, struct request *rq)
267{ 93{
268 struct deadline_data *dd = q->elevator->elevator_data; 94 struct deadline_data *dd = q->elevator->elevator_data;
269 struct deadline_rq *drq = RQ_DATA(rq); 95 const int data_dir = rq_data_dir(rq);
270 96
271 const int data_dir = rq_data_dir(drq->request); 97 deadline_add_rq_rb(dd, rq);
272 98
273 deadline_add_drq_rb(dd, drq);
274 /* 99 /*
275 * set expire time (only used for reads) and add to fifo list 100 * set expire time (only used for reads) and add to fifo list
276 */ 101 */
277 drq->expires = jiffies + dd->fifo_expire[data_dir]; 102 rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
278 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); 103 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
279
280 if (rq_mergeable(rq))
281 deadline_add_drq_hash(dd, drq);
282} 104}
283 105
284/* 106/*
285 * remove rq from rbtree, fifo, and hash 107 * remove rq from rbtree and fifo.
286 */ 108 */
287static void deadline_remove_request(request_queue_t *q, struct request *rq) 109static void deadline_remove_request(request_queue_t *q, struct request *rq)
288{ 110{
289 struct deadline_rq *drq = RQ_DATA(rq);
290 struct deadline_data *dd = q->elevator->elevator_data; 111 struct deadline_data *dd = q->elevator->elevator_data;
291 112
292 list_del_init(&drq->fifo); 113 rq_fifo_clear(rq);
293 deadline_del_drq_rb(dd, drq); 114 deadline_del_rq_rb(dd, rq);
294 deadline_del_drq_hash(drq);
295} 115}
296 116
297static int 117static int
@@ -302,27 +122,14 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
302 int ret; 122 int ret;
303 123
304 /* 124 /*
305 * see if the merge hash can satisfy a back merge
306 */
307 __rq = deadline_find_drq_hash(dd, bio->bi_sector);
308 if (__rq) {
309 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
310
311 if (elv_rq_merge_ok(__rq, bio)) {
312 ret = ELEVATOR_BACK_MERGE;
313 goto out;
314 }
315 }
316
317 /*
318 * check for front merge 125 * check for front merge
319 */ 126 */
320 if (dd->front_merges) { 127 if (dd->front_merges) {
321 sector_t rb_key = bio->bi_sector + bio_sectors(bio); 128 sector_t sector = bio->bi_sector + bio_sectors(bio);
322 129
323 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio)); 130 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
324 if (__rq) { 131 if (__rq) {
325 BUG_ON(rb_key != rq_rb_key(__rq)); 132 BUG_ON(sector != __rq->sector);
326 133
327 if (elv_rq_merge_ok(__rq, bio)) { 134 if (elv_rq_merge_ok(__rq, bio)) {
328 ret = ELEVATOR_FRONT_MERGE; 135 ret = ELEVATOR_FRONT_MERGE;
@@ -333,29 +140,21 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
333 140
334 return ELEVATOR_NO_MERGE; 141 return ELEVATOR_NO_MERGE;
335out: 142out:
336 if (ret)
337 deadline_hot_drq_hash(dd, RQ_DATA(__rq));
338 *req = __rq; 143 *req = __rq;
339 return ret; 144 return ret;
340} 145}
341 146
342static void deadline_merged_request(request_queue_t *q, struct request *req) 147static void deadline_merged_request(request_queue_t *q, struct request *req,
148 int type)
343{ 149{
344 struct deadline_data *dd = q->elevator->elevator_data; 150 struct deadline_data *dd = q->elevator->elevator_data;
345 struct deadline_rq *drq = RQ_DATA(req);
346
347 /*
348 * hash always needs to be repositioned, key is end sector
349 */
350 deadline_del_drq_hash(drq);
351 deadline_add_drq_hash(dd, drq);
352 151
353 /* 152 /*
354 * if the merge was a front merge, we need to reposition request 153 * if the merge was a front merge, we need to reposition request
355 */ 154 */
356 if (rq_rb_key(req) != drq->rb_key) { 155 if (type == ELEVATOR_FRONT_MERGE) {
357 deadline_del_drq_rb(dd, drq); 156 elv_rb_del(RQ_RB_ROOT(dd, req), req);
358 deadline_add_drq_rb(dd, drq); 157 deadline_add_rq_rb(dd, req);
359 } 158 }
360} 159}
361 160
@@ -363,33 +162,14 @@ static void
363deadline_merged_requests(request_queue_t *q, struct request *req, 162deadline_merged_requests(request_queue_t *q, struct request *req,
364 struct request *next) 163 struct request *next)
365{ 164{
366 struct deadline_data *dd = q->elevator->elevator_data;
367 struct deadline_rq *drq = RQ_DATA(req);
368 struct deadline_rq *dnext = RQ_DATA(next);
369
370 BUG_ON(!drq);
371 BUG_ON(!dnext);
372
373 /* 165 /*
374 * reposition drq (this is the merged request) in hash, and in rbtree 166 * if next expires before rq, assign its expire time to rq
375 * in case of a front merge 167 * and move into next position (next will be deleted) in fifo
376 */ 168 */
377 deadline_del_drq_hash(drq); 169 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
378 deadline_add_drq_hash(dd, drq); 170 if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
379 171 list_move(&req->queuelist, &next->queuelist);
380 if (rq_rb_key(req) != drq->rb_key) { 172 rq_set_fifo_time(req, rq_fifo_time(next));
381 deadline_del_drq_rb(dd, drq);
382 deadline_add_drq_rb(dd, drq);
383 }
384
385 /*
386 * if dnext expires before drq, assign its expire time to drq
387 * and move into dnext position (dnext will be deleted) in fifo
388 */
389 if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
390 if (time_before(dnext->expires, drq->expires)) {
391 list_move(&drq->fifo, &dnext->fifo);
392 drq->expires = dnext->expires;
393 } 173 }
394 } 174 }
395 175
@@ -403,52 +183,50 @@ deadline_merged_requests(request_queue_t *q, struct request *req,
403 * move request from sort list to dispatch queue. 183 * move request from sort list to dispatch queue.
404 */ 184 */
405static inline void 185static inline void
406deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq) 186deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
407{ 187{
408 request_queue_t *q = drq->request->q; 188 request_queue_t *q = rq->q;
409 189
410 deadline_remove_request(q, drq->request); 190 deadline_remove_request(q, rq);
411 elv_dispatch_add_tail(q, drq->request); 191 elv_dispatch_add_tail(q, rq);
412} 192}
413 193
414/* 194/*
415 * move an entry to dispatch queue 195 * move an entry to dispatch queue
416 */ 196 */
417static void 197static void
418deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq) 198deadline_move_request(struct deadline_data *dd, struct request *rq)
419{ 199{
420 const int data_dir = rq_data_dir(drq->request); 200 const int data_dir = rq_data_dir(rq);
421 struct rb_node *rbnext = rb_next(&drq->rb_node); 201 struct rb_node *rbnext = rb_next(&rq->rb_node);
422 202
423 dd->next_drq[READ] = NULL; 203 dd->next_rq[READ] = NULL;
424 dd->next_drq[WRITE] = NULL; 204 dd->next_rq[WRITE] = NULL;
425 205
426 if (rbnext) 206 if (rbnext)
427 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 207 dd->next_rq[data_dir] = rb_entry_rq(rbnext);
428 208
429 dd->last_sector = drq->request->sector + drq->request->nr_sectors; 209 dd->last_sector = rq->sector + rq->nr_sectors;
430 210
431 /* 211 /*
432 * take it off the sort and fifo list, move 212 * take it off the sort and fifo list, move
433 * to dispatch queue 213 * to dispatch queue
434 */ 214 */
435 deadline_move_to_dispatch(dd, drq); 215 deadline_move_to_dispatch(dd, rq);
436} 216}
437 217
438#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
439
440/* 218/*
441 * deadline_check_fifo returns 0 if there are no expired reads on the fifo, 219 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
442 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 220 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
443 */ 221 */
444static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) 222static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
445{ 223{
446 struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next); 224 struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
447 225
448 /* 226 /*
449 * drq is expired! 227 * rq is expired!
450 */ 228 */
451 if (time_after(jiffies, drq->expires)) 229 if (time_after(jiffies, rq_fifo_time(rq)))
452 return 1; 230 return 1;
453 231
454 return 0; 232 return 0;
@@ -463,21 +241,21 @@ static int deadline_dispatch_requests(request_queue_t *q, int force)
463 struct deadline_data *dd = q->elevator->elevator_data; 241 struct deadline_data *dd = q->elevator->elevator_data;
464 const int reads = !list_empty(&dd->fifo_list[READ]); 242 const int reads = !list_empty(&dd->fifo_list[READ]);
465 const int writes = !list_empty(&dd->fifo_list[WRITE]); 243 const int writes = !list_empty(&dd->fifo_list[WRITE]);
466 struct deadline_rq *drq; 244 struct request *rq;
467 int data_dir; 245 int data_dir;
468 246
469 /* 247 /*
470 * batches are currently reads XOR writes 248 * batches are currently reads XOR writes
471 */ 249 */
472 if (dd->next_drq[WRITE]) 250 if (dd->next_rq[WRITE])
473 drq = dd->next_drq[WRITE]; 251 rq = dd->next_rq[WRITE];
474 else 252 else
475 drq = dd->next_drq[READ]; 253 rq = dd->next_rq[READ];
476 254
477 if (drq) { 255 if (rq) {
478 /* we have a "next request" */ 256 /* we have a "next request" */
479 257
480 if (dd->last_sector != drq->request->sector) 258 if (dd->last_sector != rq->sector)
481 /* end the batch on a non sequential request */ 259 /* end the batch on a non sequential request */
482 dd->batching += dd->fifo_batch; 260 dd->batching += dd->fifo_batch;
483 261
@@ -526,30 +304,33 @@ dispatch_find_request:
526 if (deadline_check_fifo(dd, data_dir)) { 304 if (deadline_check_fifo(dd, data_dir)) {
527 /* An expired request exists - satisfy it */ 305 /* An expired request exists - satisfy it */
528 dd->batching = 0; 306 dd->batching = 0;
529 drq = list_entry_fifo(dd->fifo_list[data_dir].next); 307 rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
530 308
531 } else if (dd->next_drq[data_dir]) { 309 } else if (dd->next_rq[data_dir]) {
532 /* 310 /*
533 * The last req was the same dir and we have a next request in 311 * The last req was the same dir and we have a next request in
534 * sort order. No expired requests so continue on from here. 312 * sort order. No expired requests so continue on from here.
535 */ 313 */
536 drq = dd->next_drq[data_dir]; 314 rq = dd->next_rq[data_dir];
537 } else { 315 } else {
316 struct rb_node *node;
538 /* 317 /*
539 * The last req was the other direction or we have run out of 318 * The last req was the other direction or we have run out of
540 * higher-sectored requests. Go back to the lowest sectored 319 * higher-sectored requests. Go back to the lowest sectored
541 * request (1 way elevator) and start a new batch. 320 * request (1 way elevator) and start a new batch.
542 */ 321 */
543 dd->batching = 0; 322 dd->batching = 0;
544 drq = deadline_find_first_drq(dd, data_dir); 323 node = rb_first(&dd->sort_list[data_dir]);
324 if (node)
325 rq = rb_entry_rq(node);
545 } 326 }
546 327
547dispatch_request: 328dispatch_request:
548 /* 329 /*
549 * drq is the selected appropriate request. 330 * rq is the selected appropriate request.
550 */ 331 */
551 dd->batching++; 332 dd->batching++;
552 deadline_move_request(dd, drq); 333 deadline_move_request(dd, rq);
553 334
554 return 1; 335 return 1;
555} 336}
@@ -562,30 +343,6 @@ static int deadline_queue_empty(request_queue_t *q)
562 && list_empty(&dd->fifo_list[READ]); 343 && list_empty(&dd->fifo_list[READ]);
563} 344}
564 345
565static struct request *
566deadline_former_request(request_queue_t *q, struct request *rq)
567{
568 struct deadline_rq *drq = RQ_DATA(rq);
569 struct rb_node *rbprev = rb_prev(&drq->rb_node);
570
571 if (rbprev)
572 return rb_entry_drq(rbprev)->request;
573
574 return NULL;
575}
576
577static struct request *
578deadline_latter_request(request_queue_t *q, struct request *rq)
579{
580 struct deadline_rq *drq = RQ_DATA(rq);
581 struct rb_node *rbnext = rb_next(&drq->rb_node);
582
583 if (rbnext)
584 return rb_entry_drq(rbnext)->request;
585
586 return NULL;
587}
588
589static void deadline_exit_queue(elevator_t *e) 346static void deadline_exit_queue(elevator_t *e)
590{ 347{
591 struct deadline_data *dd = e->elevator_data; 348 struct deadline_data *dd = e->elevator_data;
@@ -593,46 +350,21 @@ static void deadline_exit_queue(elevator_t *e)
593 BUG_ON(!list_empty(&dd->fifo_list[READ])); 350 BUG_ON(!list_empty(&dd->fifo_list[READ]));
594 BUG_ON(!list_empty(&dd->fifo_list[WRITE])); 351 BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
595 352
596 mempool_destroy(dd->drq_pool);
597 kfree(dd->hash);
598 kfree(dd); 353 kfree(dd);
599} 354}
600 355
601/* 356/*
602 * initialize elevator private data (deadline_data), and alloc a drq for 357 * initialize elevator private data (deadline_data).
603 * each request on the free lists
604 */ 358 */
605static void *deadline_init_queue(request_queue_t *q, elevator_t *e) 359static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
606{ 360{
607 struct deadline_data *dd; 361 struct deadline_data *dd;
608 int i;
609
610 if (!drq_pool)
611 return NULL;
612 362
613 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 363 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
614 if (!dd) 364 if (!dd)
615 return NULL; 365 return NULL;
616 memset(dd, 0, sizeof(*dd)); 366 memset(dd, 0, sizeof(*dd));
617 367
618 dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES,
619 GFP_KERNEL, q->node);
620 if (!dd->hash) {
621 kfree(dd);
622 return NULL;
623 }
624
625 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
626 mempool_free_slab, drq_pool, q->node);
627 if (!dd->drq_pool) {
628 kfree(dd->hash);
629 kfree(dd);
630 return NULL;
631 }
632
633 for (i = 0; i < DL_HASH_ENTRIES; i++)
634 INIT_HLIST_HEAD(&dd->hash[i]);
635
636 INIT_LIST_HEAD(&dd->fifo_list[READ]); 368 INIT_LIST_HEAD(&dd->fifo_list[READ]);
637 INIT_LIST_HEAD(&dd->fifo_list[WRITE]); 369 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
638 dd->sort_list[READ] = RB_ROOT; 370 dd->sort_list[READ] = RB_ROOT;
@@ -645,39 +377,6 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
645 return dd; 377 return dd;
646} 378}
647 379
648static void deadline_put_request(request_queue_t *q, struct request *rq)
649{
650 struct deadline_data *dd = q->elevator->elevator_data;
651 struct deadline_rq *drq = RQ_DATA(rq);
652
653 mempool_free(drq, dd->drq_pool);
654 rq->elevator_private = NULL;
655}
656
657static int
658deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
659 gfp_t gfp_mask)
660{
661 struct deadline_data *dd = q->elevator->elevator_data;
662 struct deadline_rq *drq;
663
664 drq = mempool_alloc(dd->drq_pool, gfp_mask);
665 if (drq) {
666 memset(drq, 0, sizeof(*drq));
667 RB_CLEAR_NODE(&drq->rb_node);
668 drq->request = rq;
669
670 INIT_HLIST_NODE(&drq->hash);
671
672 INIT_LIST_HEAD(&drq->fifo);
673
674 rq->elevator_private = drq;
675 return 0;
676 }
677
678 return 1;
679}
680
681/* 380/*
682 * sysfs parts below 381 * sysfs parts below
683 */ 382 */
@@ -757,10 +456,8 @@ static struct elevator_type iosched_deadline = {
757 .elevator_dispatch_fn = deadline_dispatch_requests, 456 .elevator_dispatch_fn = deadline_dispatch_requests,
758 .elevator_add_req_fn = deadline_add_request, 457 .elevator_add_req_fn = deadline_add_request,
759 .elevator_queue_empty_fn = deadline_queue_empty, 458 .elevator_queue_empty_fn = deadline_queue_empty,
760 .elevator_former_req_fn = deadline_former_request, 459 .elevator_former_req_fn = elv_rb_former_request,
761 .elevator_latter_req_fn = deadline_latter_request, 460 .elevator_latter_req_fn = elv_rb_latter_request,
762 .elevator_set_req_fn = deadline_set_request,
763 .elevator_put_req_fn = deadline_put_request,
764 .elevator_init_fn = deadline_init_queue, 461 .elevator_init_fn = deadline_init_queue,
765 .elevator_exit_fn = deadline_exit_queue, 462 .elevator_exit_fn = deadline_exit_queue,
766 }, 463 },
@@ -772,24 +469,11 @@ static struct elevator_type iosched_deadline = {
772 469
773static int __init deadline_init(void) 470static int __init deadline_init(void)
774{ 471{
775 int ret; 472 return elv_register(&iosched_deadline);
776
777 drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
778 0, 0, NULL, NULL);
779
780 if (!drq_pool)
781 return -ENOMEM;
782
783 ret = elv_register(&iosched_deadline);
784 if (ret)
785 kmem_cache_destroy(drq_pool);
786
787 return ret;
788} 473}
789 474
790static void __exit deadline_exit(void) 475static void __exit deadline_exit(void)
791{ 476{
792 kmem_cache_destroy(drq_pool);
793 elv_unregister(&iosched_deadline); 477 elv_unregister(&iosched_deadline);
794} 478}
795 479
diff --git a/block/elevator.c b/block/elevator.c
index 9b72dc7c8a5c..487dd3da8853 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * 5 *
6 * 30042000 Jens Axboe <axboe@suse.de> : 6 * 30042000 Jens Axboe <axboe@kernel.dk> :
7 * 7 *
8 * Split the elevator a bit so that it is possible to choose a different 8 * Split the elevator a bit so that it is possible to choose a different
9 * one or even write a new "plug in". There are three pieces: 9 * one or even write a new "plug in". There are three pieces:
@@ -33,6 +33,7 @@
33#include <linux/compiler.h> 33#include <linux/compiler.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/blktrace_api.h> 35#include <linux/blktrace_api.h>
36#include <linux/hash.h>
36 37
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
38 39
@@ -40,6 +41,16 @@ static DEFINE_SPINLOCK(elv_list_lock);
40static LIST_HEAD(elv_list); 41static LIST_HEAD(elv_list);
41 42
42/* 43/*
44 * Merge hash stuff.
45 */
46static const int elv_hash_shift = 6;
47#define ELV_HASH_BLOCK(sec) ((sec) >> 3)
48#define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
49#define ELV_HASH_ENTRIES (1 << elv_hash_shift)
50#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
51#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
52
53/*
43 * can we safely merge with this request? 54 * can we safely merge with this request?
44 */ 55 */
45inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 56inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
@@ -56,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
56 /* 67 /*
57 * same device and no special stuff set, merge is ok 68 * same device and no special stuff set, merge is ok
58 */ 69 */
59 if (rq->rq_disk == bio->bi_bdev->bd_disk && 70 if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
60 !rq->waiting && !rq->special)
61 return 1; 71 return 1;
62 72
63 return 0; 73 return 0;
@@ -151,27 +161,44 @@ __setup("elevator=", elevator_setup);
151 161
152static struct kobj_type elv_ktype; 162static struct kobj_type elv_ktype;
153 163
154static elevator_t *elevator_alloc(struct elevator_type *e) 164static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
155{ 165{
156 elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); 166 elevator_t *eq;
157 if (eq) { 167 int i;
158 memset(eq, 0, sizeof(*eq)); 168
159 eq->ops = &e->ops; 169 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
160 eq->elevator_type = e; 170 if (unlikely(!eq))
161 kobject_init(&eq->kobj); 171 goto err;
162 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 172
163 eq->kobj.ktype = &elv_ktype; 173 memset(eq, 0, sizeof(*eq));
164 mutex_init(&eq->sysfs_lock); 174 eq->ops = &e->ops;
165 } else { 175 eq->elevator_type = e;
166 elevator_put(e); 176 kobject_init(&eq->kobj);
167 } 177 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
178 eq->kobj.ktype = &elv_ktype;
179 mutex_init(&eq->sysfs_lock);
180
181 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
182 GFP_KERNEL, q->node);
183 if (!eq->hash)
184 goto err;
185
186 for (i = 0; i < ELV_HASH_ENTRIES; i++)
187 INIT_HLIST_HEAD(&eq->hash[i]);
188
168 return eq; 189 return eq;
190err:
191 kfree(eq);
192 elevator_put(e);
193 return NULL;
169} 194}
170 195
171static void elevator_release(struct kobject *kobj) 196static void elevator_release(struct kobject *kobj)
172{ 197{
173 elevator_t *e = container_of(kobj, elevator_t, kobj); 198 elevator_t *e = container_of(kobj, elevator_t, kobj);
199
174 elevator_put(e->elevator_type); 200 elevator_put(e->elevator_type);
201 kfree(e->hash);
175 kfree(e); 202 kfree(e);
176} 203}
177 204
@@ -198,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name)
198 e = elevator_get("noop"); 225 e = elevator_get("noop");
199 } 226 }
200 227
201 eq = elevator_alloc(e); 228 eq = elevator_alloc(q, e);
202 if (!eq) 229 if (!eq)
203 return -ENOMEM; 230 return -ENOMEM;
204 231
@@ -212,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name)
212 return ret; 239 return ret;
213} 240}
214 241
242EXPORT_SYMBOL(elevator_init);
243
215void elevator_exit(elevator_t *e) 244void elevator_exit(elevator_t *e)
216{ 245{
217 mutex_lock(&e->sysfs_lock); 246 mutex_lock(&e->sysfs_lock);
@@ -223,10 +252,118 @@ void elevator_exit(elevator_t *e)
223 kobject_put(&e->kobj); 252 kobject_put(&e->kobj);
224} 253}
225 254
255EXPORT_SYMBOL(elevator_exit);
256
257static inline void __elv_rqhash_del(struct request *rq)
258{
259 hlist_del_init(&rq->hash);
260}
261
262static void elv_rqhash_del(request_queue_t *q, struct request *rq)
263{
264 if (ELV_ON_HASH(rq))
265 __elv_rqhash_del(rq);
266}
267
268static void elv_rqhash_add(request_queue_t *q, struct request *rq)
269{
270 elevator_t *e = q->elevator;
271
272 BUG_ON(ELV_ON_HASH(rq));
273 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
274}
275
276static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
277{
278 __elv_rqhash_del(rq);
279 elv_rqhash_add(q, rq);
280}
281
282static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
283{
284 elevator_t *e = q->elevator;
285 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
286 struct hlist_node *entry, *next;
287 struct request *rq;
288
289 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
290 BUG_ON(!ELV_ON_HASH(rq));
291
292 if (unlikely(!rq_mergeable(rq))) {
293 __elv_rqhash_del(rq);
294 continue;
295 }
296
297 if (rq_hash_key(rq) == offset)
298 return rq;
299 }
300
301 return NULL;
302}
303
304/*
305 * RB-tree support functions for inserting/lookup/removal of requests
306 * in a sorted RB tree.
307 */
308struct request *elv_rb_add(struct rb_root *root, struct request *rq)
309{
310 struct rb_node **p = &root->rb_node;
311 struct rb_node *parent = NULL;
312 struct request *__rq;
313
314 while (*p) {
315 parent = *p;
316 __rq = rb_entry(parent, struct request, rb_node);
317
318 if (rq->sector < __rq->sector)
319 p = &(*p)->rb_left;
320 else if (rq->sector > __rq->sector)
321 p = &(*p)->rb_right;
322 else
323 return __rq;
324 }
325
326 rb_link_node(&rq->rb_node, parent, p);
327 rb_insert_color(&rq->rb_node, root);
328 return NULL;
329}
330
331EXPORT_SYMBOL(elv_rb_add);
332
333void elv_rb_del(struct rb_root *root, struct request *rq)
334{
335 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
336 rb_erase(&rq->rb_node, root);
337 RB_CLEAR_NODE(&rq->rb_node);
338}
339
340EXPORT_SYMBOL(elv_rb_del);
341
342struct request *elv_rb_find(struct rb_root *root, sector_t sector)
343{
344 struct rb_node *n = root->rb_node;
345 struct request *rq;
346
347 while (n) {
348 rq = rb_entry(n, struct request, rb_node);
349
350 if (sector < rq->sector)
351 n = n->rb_left;
352 else if (sector > rq->sector)
353 n = n->rb_right;
354 else
355 return rq;
356 }
357
358 return NULL;
359}
360
361EXPORT_SYMBOL(elv_rb_find);
362
226/* 363/*
227 * Insert rq into dispatch queue of q. Queue lock must be held on 364 * Insert rq into dispatch queue of q. Queue lock must be held on
228 * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be 365 * entry. rq is sort insted into the dispatch queue. To be used by
229 * appended to the dispatch queue. To be used by specific elevators. 366 * specific elevators.
230 */ 367 */
231void elv_dispatch_sort(request_queue_t *q, struct request *rq) 368void elv_dispatch_sort(request_queue_t *q, struct request *rq)
232{ 369{
@@ -235,6 +372,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
235 372
236 if (q->last_merge == rq) 373 if (q->last_merge == rq)
237 q->last_merge = NULL; 374 q->last_merge = NULL;
375
376 elv_rqhash_del(q, rq);
377
238 q->nr_sorted--; 378 q->nr_sorted--;
239 379
240 boundary = q->end_sector; 380 boundary = q->end_sector;
@@ -242,7 +382,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
242 list_for_each_prev(entry, &q->queue_head) { 382 list_for_each_prev(entry, &q->queue_head) {
243 struct request *pos = list_entry_rq(entry); 383 struct request *pos = list_entry_rq(entry);
244 384
245 if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 385 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
246 break; 386 break;
247 if (rq->sector >= boundary) { 387 if (rq->sector >= boundary) {
248 if (pos->sector < boundary) 388 if (pos->sector < boundary)
@@ -258,11 +398,38 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
258 list_add(&rq->queuelist, entry); 398 list_add(&rq->queuelist, entry);
259} 399}
260 400
401EXPORT_SYMBOL(elv_dispatch_sort);
402
403/*
404 * Insert rq into dispatch queue of q. Queue lock must be held on
405 * entry. rq is added to the back of the dispatch queue. To be used by
406 * specific elevators.
407 */
408void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
409{
410 if (q->last_merge == rq)
411 q->last_merge = NULL;
412
413 elv_rqhash_del(q, rq);
414
415 q->nr_sorted--;
416
417 q->end_sector = rq_end_sector(rq);
418 q->boundary_rq = rq;
419 list_add_tail(&rq->queuelist, &q->queue_head);
420}
421
422EXPORT_SYMBOL(elv_dispatch_add_tail);
423
261int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 424int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
262{ 425{
263 elevator_t *e = q->elevator; 426 elevator_t *e = q->elevator;
427 struct request *__rq;
264 int ret; 428 int ret;
265 429
430 /*
431 * First try one-hit cache.
432 */
266 if (q->last_merge) { 433 if (q->last_merge) {
267 ret = elv_try_merge(q->last_merge, bio); 434 ret = elv_try_merge(q->last_merge, bio);
268 if (ret != ELEVATOR_NO_MERGE) { 435 if (ret != ELEVATOR_NO_MERGE) {
@@ -271,18 +438,30 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
271 } 438 }
272 } 439 }
273 440
441 /*
442 * See if our hash lookup can find a potential backmerge.
443 */
444 __rq = elv_rqhash_find(q, bio->bi_sector);
445 if (__rq && elv_rq_merge_ok(__rq, bio)) {
446 *req = __rq;
447 return ELEVATOR_BACK_MERGE;
448 }
449
274 if (e->ops->elevator_merge_fn) 450 if (e->ops->elevator_merge_fn)
275 return e->ops->elevator_merge_fn(q, req, bio); 451 return e->ops->elevator_merge_fn(q, req, bio);
276 452
277 return ELEVATOR_NO_MERGE; 453 return ELEVATOR_NO_MERGE;
278} 454}
279 455
280void elv_merged_request(request_queue_t *q, struct request *rq) 456void elv_merged_request(request_queue_t *q, struct request *rq, int type)
281{ 457{
282 elevator_t *e = q->elevator; 458 elevator_t *e = q->elevator;
283 459
284 if (e->ops->elevator_merged_fn) 460 if (e->ops->elevator_merged_fn)
285 e->ops->elevator_merged_fn(q, rq); 461 e->ops->elevator_merged_fn(q, rq, type);
462
463 if (type == ELEVATOR_BACK_MERGE)
464 elv_rqhash_reposition(q, rq);
286 465
287 q->last_merge = rq; 466 q->last_merge = rq;
288} 467}
@@ -294,8 +473,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
294 473
295 if (e->ops->elevator_merge_req_fn) 474 if (e->ops->elevator_merge_req_fn)
296 e->ops->elevator_merge_req_fn(q, rq, next); 475 e->ops->elevator_merge_req_fn(q, rq, next);
297 q->nr_sorted--;
298 476
477 elv_rqhash_reposition(q, rq);
478 elv_rqhash_del(q, next);
479
480 q->nr_sorted--;
299 q->last_merge = rq; 481 q->last_merge = rq;
300} 482}
301 483
@@ -313,7 +495,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
313 e->ops->elevator_deactivate_req_fn(q, rq); 495 e->ops->elevator_deactivate_req_fn(q, rq);
314 } 496 }
315 497
316 rq->flags &= ~REQ_STARTED; 498 rq->cmd_flags &= ~REQ_STARTED;
317 499
318 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 500 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
319} 501}
@@ -344,13 +526,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
344 526
345 switch (where) { 527 switch (where) {
346 case ELEVATOR_INSERT_FRONT: 528 case ELEVATOR_INSERT_FRONT:
347 rq->flags |= REQ_SOFTBARRIER; 529 rq->cmd_flags |= REQ_SOFTBARRIER;
348 530
349 list_add(&rq->queuelist, &q->queue_head); 531 list_add(&rq->queuelist, &q->queue_head);
350 break; 532 break;
351 533
352 case ELEVATOR_INSERT_BACK: 534 case ELEVATOR_INSERT_BACK:
353 rq->flags |= REQ_SOFTBARRIER; 535 rq->cmd_flags |= REQ_SOFTBARRIER;
354 elv_drain_elevator(q); 536 elv_drain_elevator(q);
355 list_add_tail(&rq->queuelist, &q->queue_head); 537 list_add_tail(&rq->queuelist, &q->queue_head);
356 /* 538 /*
@@ -369,10 +551,14 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
369 551
370 case ELEVATOR_INSERT_SORT: 552 case ELEVATOR_INSERT_SORT:
371 BUG_ON(!blk_fs_request(rq)); 553 BUG_ON(!blk_fs_request(rq));
372 rq->flags |= REQ_SORTED; 554 rq->cmd_flags |= REQ_SORTED;
373 q->nr_sorted++; 555 q->nr_sorted++;
374 if (q->last_merge == NULL && rq_mergeable(rq)) 556 if (rq_mergeable(rq)) {
375 q->last_merge = rq; 557 elv_rqhash_add(q, rq);
558 if (!q->last_merge)
559 q->last_merge = rq;
560 }
561
376 /* 562 /*
377 * Some ioscheds (cfq) run q->request_fn directly, so 563 * Some ioscheds (cfq) run q->request_fn directly, so
378 * rq cannot be accessed after calling 564 * rq cannot be accessed after calling
@@ -387,7 +573,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
387 * insertion; otherwise, requests should be requeued 573 * insertion; otherwise, requests should be requeued
388 * in ordseq order. 574 * in ordseq order.
389 */ 575 */
390 rq->flags |= REQ_SOFTBARRIER; 576 rq->cmd_flags |= REQ_SOFTBARRIER;
391 577
392 if (q->ordseq == 0) { 578 if (q->ordseq == 0) {
393 list_add(&rq->queuelist, &q->queue_head); 579 list_add(&rq->queuelist, &q->queue_head);
@@ -429,9 +615,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
429 int plug) 615 int plug)
430{ 616{
431 if (q->ordcolor) 617 if (q->ordcolor)
432 rq->flags |= REQ_ORDERED_COLOR; 618 rq->cmd_flags |= REQ_ORDERED_COLOR;
433 619
434 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 620 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
435 /* 621 /*
436 * toggle ordered color 622 * toggle ordered color
437 */ 623 */
@@ -452,7 +638,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
452 q->end_sector = rq_end_sector(rq); 638 q->end_sector = rq_end_sector(rq);
453 q->boundary_rq = rq; 639 q->boundary_rq = rq;
454 } 640 }
455 } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 641 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
456 where = ELEVATOR_INSERT_BACK; 642 where = ELEVATOR_INSERT_BACK;
457 643
458 if (plug) 644 if (plug)
@@ -461,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
461 elv_insert(q, rq, where); 647 elv_insert(q, rq, where);
462} 648}
463 649
650EXPORT_SYMBOL(__elv_add_request);
651
464void elv_add_request(request_queue_t *q, struct request *rq, int where, 652void elv_add_request(request_queue_t *q, struct request *rq, int where,
465 int plug) 653 int plug)
466{ 654{
@@ -471,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
471 spin_unlock_irqrestore(q->queue_lock, flags); 659 spin_unlock_irqrestore(q->queue_lock, flags);
472} 660}
473 661
662EXPORT_SYMBOL(elv_add_request);
663
474static inline struct request *__elv_next_request(request_queue_t *q) 664static inline struct request *__elv_next_request(request_queue_t *q)
475{ 665{
476 struct request *rq; 666 struct request *rq;
@@ -493,7 +683,7 @@ struct request *elv_next_request(request_queue_t *q)
493 int ret; 683 int ret;
494 684
495 while ((rq = __elv_next_request(q)) != NULL) { 685 while ((rq = __elv_next_request(q)) != NULL) {
496 if (!(rq->flags & REQ_STARTED)) { 686 if (!(rq->cmd_flags & REQ_STARTED)) {
497 elevator_t *e = q->elevator; 687 elevator_t *e = q->elevator;
498 688
499 /* 689 /*
@@ -510,7 +700,7 @@ struct request *elv_next_request(request_queue_t *q)
510 * it, a request that has been delayed should 700 * it, a request that has been delayed should
511 * not be passed by new incoming requests 701 * not be passed by new incoming requests
512 */ 702 */
513 rq->flags |= REQ_STARTED; 703 rq->cmd_flags |= REQ_STARTED;
514 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 704 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
515 } 705 }
516 706
@@ -519,7 +709,7 @@ struct request *elv_next_request(request_queue_t *q)
519 q->boundary_rq = NULL; 709 q->boundary_rq = NULL;
520 } 710 }
521 711
522 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) 712 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
523 break; 713 break;
524 714
525 ret = q->prep_rq_fn(q, rq); 715 ret = q->prep_rq_fn(q, rq);
@@ -541,7 +731,7 @@ struct request *elv_next_request(request_queue_t *q)
541 nr_bytes = rq->data_len; 731 nr_bytes = rq->data_len;
542 732
543 blkdev_dequeue_request(rq); 733 blkdev_dequeue_request(rq);
544 rq->flags |= REQ_QUIET; 734 rq->cmd_flags |= REQ_QUIET;
545 end_that_request_chunk(rq, 0, nr_bytes); 735 end_that_request_chunk(rq, 0, nr_bytes);
546 end_that_request_last(rq, 0); 736 end_that_request_last(rq, 0);
547 } else { 737 } else {
@@ -554,9 +744,12 @@ struct request *elv_next_request(request_queue_t *q)
554 return rq; 744 return rq;
555} 745}
556 746
747EXPORT_SYMBOL(elv_next_request);
748
557void elv_dequeue_request(request_queue_t *q, struct request *rq) 749void elv_dequeue_request(request_queue_t *q, struct request *rq)
558{ 750{
559 BUG_ON(list_empty(&rq->queuelist)); 751 BUG_ON(list_empty(&rq->queuelist));
752 BUG_ON(ELV_ON_HASH(rq));
560 753
561 list_del_init(&rq->queuelist); 754 list_del_init(&rq->queuelist);
562 755
@@ -569,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq)
569 q->in_flight++; 762 q->in_flight++;
570} 763}
571 764
765EXPORT_SYMBOL(elv_dequeue_request);
766
572int elv_queue_empty(request_queue_t *q) 767int elv_queue_empty(request_queue_t *q)
573{ 768{
574 elevator_t *e = q->elevator; 769 elevator_t *e = q->elevator;
@@ -582,6 +777,8 @@ int elv_queue_empty(request_queue_t *q)
582 return 1; 777 return 1;
583} 778}
584 779
780EXPORT_SYMBOL(elv_queue_empty);
781
585struct request *elv_latter_request(request_queue_t *q, struct request *rq) 782struct request *elv_latter_request(request_queue_t *q, struct request *rq)
586{ 783{
587 elevator_t *e = q->elevator; 784 elevator_t *e = q->elevator;
@@ -600,13 +797,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
600 return NULL; 797 return NULL;
601} 798}
602 799
603int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 800int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
604 gfp_t gfp_mask)
605{ 801{
606 elevator_t *e = q->elevator; 802 elevator_t *e = q->elevator;
607 803
608 if (e->ops->elevator_set_req_fn) 804 if (e->ops->elevator_set_req_fn)
609 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); 805 return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
610 806
611 rq->elevator_private = NULL; 807 rq->elevator_private = NULL;
612 return 0; 808 return 0;
@@ -620,12 +816,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
620 e->ops->elevator_put_req_fn(q, rq); 816 e->ops->elevator_put_req_fn(q, rq);
621} 817}
622 818
623int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) 819int elv_may_queue(request_queue_t *q, int rw)
624{ 820{
625 elevator_t *e = q->elevator; 821 elevator_t *e = q->elevator;
626 822
627 if (e->ops->elevator_may_queue_fn) 823 if (e->ops->elevator_may_queue_fn)
628 return e->ops->elevator_may_queue_fn(q, rw, bio); 824 return e->ops->elevator_may_queue_fn(q, rw);
629 825
630 return ELV_MQUEUE_MAY; 826 return ELV_MQUEUE_MAY;
631} 827}
@@ -792,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
792 /* 988 /*
793 * Allocate new elevator 989 * Allocate new elevator
794 */ 990 */
795 e = elevator_alloc(new_e); 991 e = elevator_alloc(q, new_e);
796 if (!e) 992 if (!e)
797 return 0; 993 return 0;
798 994
@@ -908,11 +1104,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
908 return len; 1104 return len;
909} 1105}
910 1106
911EXPORT_SYMBOL(elv_dispatch_sort); 1107struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
912EXPORT_SYMBOL(elv_add_request); 1108{
913EXPORT_SYMBOL(__elv_add_request); 1109 struct rb_node *rbprev = rb_prev(&rq->rb_node);
914EXPORT_SYMBOL(elv_next_request); 1110
915EXPORT_SYMBOL(elv_dequeue_request); 1111 if (rbprev)
916EXPORT_SYMBOL(elv_queue_empty); 1112 return rb_entry_rq(rbprev);
917EXPORT_SYMBOL(elevator_exit); 1113
918EXPORT_SYMBOL(elevator_init); 1114 return NULL;
1115}
1116
1117EXPORT_SYMBOL(elv_rb_former_request);
1118
1119struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
1120{
1121 struct rb_node *rbnext = rb_next(&rq->rb_node);
1122
1123 if (rbnext)
1124 return rb_entry_rq(rbnext);
1125
1126 return NULL;
1127}
1128
1129EXPORT_SYMBOL(elv_rb_latter_request);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 51dc0edf76e0..83425fb3c8db 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data);
39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
40static void init_request_from_bio(struct request *req, struct bio *bio); 40static void init_request_from_bio(struct request *req, struct bio *bio);
41static int __make_request(request_queue_t *q, struct bio *bio); 41static int __make_request(request_queue_t *q, struct bio *bio);
42static struct io_context *current_io_context(gfp_t gfp_flags, int node);
42 43
43/* 44/*
44 * For the allocated request tables 45 * For the allocated request tables
@@ -277,19 +278,19 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
277 278
278EXPORT_SYMBOL(blk_queue_make_request); 279EXPORT_SYMBOL(blk_queue_make_request);
279 280
280static inline void rq_init(request_queue_t *q, struct request *rq) 281static void rq_init(request_queue_t *q, struct request *rq)
281{ 282{
282 INIT_LIST_HEAD(&rq->queuelist); 283 INIT_LIST_HEAD(&rq->queuelist);
283 INIT_LIST_HEAD(&rq->donelist); 284 INIT_LIST_HEAD(&rq->donelist);
284 285
285 rq->errors = 0; 286 rq->errors = 0;
286 rq->rq_status = RQ_ACTIVE;
287 rq->bio = rq->biotail = NULL; 287 rq->bio = rq->biotail = NULL;
288 INIT_HLIST_NODE(&rq->hash);
289 RB_CLEAR_NODE(&rq->rb_node);
288 rq->ioprio = 0; 290 rq->ioprio = 0;
289 rq->buffer = NULL; 291 rq->buffer = NULL;
290 rq->ref_count = 1; 292 rq->ref_count = 1;
291 rq->q = q; 293 rq->q = q;
292 rq->waiting = NULL;
293 rq->special = NULL; 294 rq->special = NULL;
294 rq->data_len = 0; 295 rq->data_len = 0;
295 rq->data = NULL; 296 rq->data = NULL;
@@ -382,8 +383,8 @@ unsigned blk_ordered_req_seq(struct request *rq)
382 if (rq == &q->post_flush_rq) 383 if (rq == &q->post_flush_rq)
383 return QUEUE_ORDSEQ_POSTFLUSH; 384 return QUEUE_ORDSEQ_POSTFLUSH;
384 385
385 if ((rq->flags & REQ_ORDERED_COLOR) == 386 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
386 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) 387 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
387 return QUEUE_ORDSEQ_DRAIN; 388 return QUEUE_ORDSEQ_DRAIN;
388 else 389 else
389 return QUEUE_ORDSEQ_DONE; 390 return QUEUE_ORDSEQ_DONE;
@@ -446,11 +447,11 @@ static void queue_flush(request_queue_t *q, unsigned which)
446 end_io = post_flush_end_io; 447 end_io = post_flush_end_io;
447 } 448 }
448 449
450 rq->cmd_flags = REQ_HARDBARRIER;
449 rq_init(q, rq); 451 rq_init(q, rq);
450 rq->flags = REQ_HARDBARRIER;
451 rq->elevator_private = NULL; 452 rq->elevator_private = NULL;
453 rq->elevator_private2 = NULL;
452 rq->rq_disk = q->bar_rq.rq_disk; 454 rq->rq_disk = q->bar_rq.rq_disk;
453 rq->rl = NULL;
454 rq->end_io = end_io; 455 rq->end_io = end_io;
455 q->prepare_flush_fn(q, rq); 456 q->prepare_flush_fn(q, rq);
456 457
@@ -471,11 +472,13 @@ static inline struct request *start_ordered(request_queue_t *q,
471 blkdev_dequeue_request(rq); 472 blkdev_dequeue_request(rq);
472 q->orig_bar_rq = rq; 473 q->orig_bar_rq = rq;
473 rq = &q->bar_rq; 474 rq = &q->bar_rq;
475 rq->cmd_flags = 0;
474 rq_init(q, rq); 476 rq_init(q, rq);
475 rq->flags = bio_data_dir(q->orig_bar_rq->bio); 477 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
476 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; 478 rq->cmd_flags |= REQ_RW;
479 rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
477 rq->elevator_private = NULL; 480 rq->elevator_private = NULL;
478 rq->rl = NULL; 481 rq->elevator_private2 = NULL;
479 init_request_from_bio(rq, q->orig_bar_rq->bio); 482 init_request_from_bio(rq, q->orig_bar_rq->bio);
480 rq->end_io = bar_end_io; 483 rq->end_io = bar_end_io;
481 484
@@ -587,8 +590,8 @@ static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
587 return 0; 590 return 0;
588} 591}
589 592
590static inline int ordered_bio_endio(struct request *rq, struct bio *bio, 593static int ordered_bio_endio(struct request *rq, struct bio *bio,
591 unsigned int nbytes, int error) 594 unsigned int nbytes, int error)
592{ 595{
593 request_queue_t *q = rq->q; 596 request_queue_t *q = rq->q;
594 bio_end_io_t *endio; 597 bio_end_io_t *endio;
@@ -1124,7 +1127,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
1124 } 1127 }
1125 1128
1126 list_del_init(&rq->queuelist); 1129 list_del_init(&rq->queuelist);
1127 rq->flags &= ~REQ_QUEUED; 1130 rq->cmd_flags &= ~REQ_QUEUED;
1128 rq->tag = -1; 1131 rq->tag = -1;
1129 1132
1130 if (unlikely(bqt->tag_index[tag] == NULL)) 1133 if (unlikely(bqt->tag_index[tag] == NULL))
@@ -1160,7 +1163,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1160 struct blk_queue_tag *bqt = q->queue_tags; 1163 struct blk_queue_tag *bqt = q->queue_tags;
1161 int tag; 1164 int tag;
1162 1165
1163 if (unlikely((rq->flags & REQ_QUEUED))) { 1166 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
1164 printk(KERN_ERR 1167 printk(KERN_ERR
1165 "%s: request %p for device [%s] already tagged %d", 1168 "%s: request %p for device [%s] already tagged %d",
1166 __FUNCTION__, rq, 1169 __FUNCTION__, rq,
@@ -1168,13 +1171,18 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1168 BUG(); 1171 BUG();
1169 } 1172 }
1170 1173
1171 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); 1174 /*
1172 if (tag >= bqt->max_depth) 1175 * Protect against shared tag maps, as we may not have exclusive
1173 return 1; 1176 * access to the tag map.
1177 */
1178 do {
1179 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1180 if (tag >= bqt->max_depth)
1181 return 1;
1174 1182
1175 __set_bit(tag, bqt->tag_map); 1183 } while (test_and_set_bit(tag, bqt->tag_map));
1176 1184
1177 rq->flags |= REQ_QUEUED; 1185 rq->cmd_flags |= REQ_QUEUED;
1178 rq->tag = tag; 1186 rq->tag = tag;
1179 bqt->tag_index[tag] = rq; 1187 bqt->tag_index[tag] = rq;
1180 blkdev_dequeue_request(rq); 1188 blkdev_dequeue_request(rq);
@@ -1210,65 +1218,31 @@ void blk_queue_invalidate_tags(request_queue_t *q)
1210 printk(KERN_ERR 1218 printk(KERN_ERR
1211 "%s: bad tag found on list\n", __FUNCTION__); 1219 "%s: bad tag found on list\n", __FUNCTION__);
1212 list_del_init(&rq->queuelist); 1220 list_del_init(&rq->queuelist);
1213 rq->flags &= ~REQ_QUEUED; 1221 rq->cmd_flags &= ~REQ_QUEUED;
1214 } else 1222 } else
1215 blk_queue_end_tag(q, rq); 1223 blk_queue_end_tag(q, rq);
1216 1224
1217 rq->flags &= ~REQ_STARTED; 1225 rq->cmd_flags &= ~REQ_STARTED;
1218 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1226 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1219 } 1227 }
1220} 1228}
1221 1229
1222EXPORT_SYMBOL(blk_queue_invalidate_tags); 1230EXPORT_SYMBOL(blk_queue_invalidate_tags);
1223 1231
1224static const char * const rq_flags[] = {
1225 "REQ_RW",
1226 "REQ_FAILFAST",
1227 "REQ_SORTED",
1228 "REQ_SOFTBARRIER",
1229 "REQ_HARDBARRIER",
1230 "REQ_FUA",
1231 "REQ_CMD",
1232 "REQ_NOMERGE",
1233 "REQ_STARTED",
1234 "REQ_DONTPREP",
1235 "REQ_QUEUED",
1236 "REQ_ELVPRIV",
1237 "REQ_PC",
1238 "REQ_BLOCK_PC",
1239 "REQ_SENSE",
1240 "REQ_FAILED",
1241 "REQ_QUIET",
1242 "REQ_SPECIAL",
1243 "REQ_DRIVE_CMD",
1244 "REQ_DRIVE_TASK",
1245 "REQ_DRIVE_TASKFILE",
1246 "REQ_PREEMPT",
1247 "REQ_PM_SUSPEND",
1248 "REQ_PM_RESUME",
1249 "REQ_PM_SHUTDOWN",
1250 "REQ_ORDERED_COLOR",
1251};
1252
1253void blk_dump_rq_flags(struct request *rq, char *msg) 1232void blk_dump_rq_flags(struct request *rq, char *msg)
1254{ 1233{
1255 int bit; 1234 int bit;
1256 1235
1257 printk("%s: dev %s: flags = ", msg, 1236 printk("%s: dev %s: type=%x, flags=%x\n", msg,
1258 rq->rq_disk ? rq->rq_disk->disk_name : "?"); 1237 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
1259 bit = 0; 1238 rq->cmd_flags);
1260 do {
1261 if (rq->flags & (1 << bit))
1262 printk("%s ", rq_flags[bit]);
1263 bit++;
1264 } while (bit < __REQ_NR_BITS);
1265 1239
1266 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, 1240 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1267 rq->nr_sectors, 1241 rq->nr_sectors,
1268 rq->current_nr_sectors); 1242 rq->current_nr_sectors);
1269 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); 1243 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1270 1244
1271 if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) { 1245 if (blk_pc_request(rq)) {
1272 printk("cdb: "); 1246 printk("cdb: ");
1273 for (bit = 0; bit < sizeof(rq->cmd); bit++) 1247 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1274 printk("%02x ", rq->cmd[bit]); 1248 printk("%02x ", rq->cmd[bit]);
@@ -1441,7 +1415,7 @@ static inline int ll_new_mergeable(request_queue_t *q,
1441 int nr_phys_segs = bio_phys_segments(q, bio); 1415 int nr_phys_segs = bio_phys_segments(q, bio);
1442 1416
1443 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1417 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1444 req->flags |= REQ_NOMERGE; 1418 req->cmd_flags |= REQ_NOMERGE;
1445 if (req == q->last_merge) 1419 if (req == q->last_merge)
1446 q->last_merge = NULL; 1420 q->last_merge = NULL;
1447 return 0; 1421 return 0;
@@ -1464,7 +1438,7 @@ static inline int ll_new_hw_segment(request_queue_t *q,
1464 1438
1465 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments 1439 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1466 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1440 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1467 req->flags |= REQ_NOMERGE; 1441 req->cmd_flags |= REQ_NOMERGE;
1468 if (req == q->last_merge) 1442 if (req == q->last_merge)
1469 q->last_merge = NULL; 1443 q->last_merge = NULL;
1470 return 0; 1444 return 0;
@@ -1491,7 +1465,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1491 max_sectors = q->max_sectors; 1465 max_sectors = q->max_sectors;
1492 1466
1493 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1467 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1494 req->flags |= REQ_NOMERGE; 1468 req->cmd_flags |= REQ_NOMERGE;
1495 if (req == q->last_merge) 1469 if (req == q->last_merge)
1496 q->last_merge = NULL; 1470 q->last_merge = NULL;
1497 return 0; 1471 return 0;
@@ -1530,7 +1504,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1530 1504
1531 1505
1532 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1506 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1533 req->flags |= REQ_NOMERGE; 1507 req->cmd_flags |= REQ_NOMERGE;
1534 if (req == q->last_merge) 1508 if (req == q->last_merge)
1535 q->last_merge = NULL; 1509 q->last_merge = NULL;
1536 return 0; 1510 return 0;
@@ -2029,14 +2003,13 @@ EXPORT_SYMBOL(blk_get_queue);
2029 2003
2030static inline void blk_free_request(request_queue_t *q, struct request *rq) 2004static inline void blk_free_request(request_queue_t *q, struct request *rq)
2031{ 2005{
2032 if (rq->flags & REQ_ELVPRIV) 2006 if (rq->cmd_flags & REQ_ELVPRIV)
2033 elv_put_request(q, rq); 2007 elv_put_request(q, rq);
2034 mempool_free(rq, q->rq.rq_pool); 2008 mempool_free(rq, q->rq.rq_pool);
2035} 2009}
2036 2010
2037static inline struct request * 2011static struct request *
2038blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, 2012blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask)
2039 int priv, gfp_t gfp_mask)
2040{ 2013{
2041 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 2014 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
2042 2015
@@ -2044,17 +2017,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
2044 return NULL; 2017 return NULL;
2045 2018
2046 /* 2019 /*
2047 * first three bits are identical in rq->flags and bio->bi_rw, 2020 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
2048 * see bio.h and blkdev.h 2021 * see bio.h and blkdev.h
2049 */ 2022 */
2050 rq->flags = rw; 2023 rq->cmd_flags = rw | REQ_ALLOCED;
2051 2024
2052 if (priv) { 2025 if (priv) {
2053 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { 2026 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
2054 mempool_free(rq, q->rq.rq_pool); 2027 mempool_free(rq, q->rq.rq_pool);
2055 return NULL; 2028 return NULL;
2056 } 2029 }
2057 rq->flags |= REQ_ELVPRIV; 2030 rq->cmd_flags |= REQ_ELVPRIV;
2058 } 2031 }
2059 2032
2060 return rq; 2033 return rq;
@@ -2141,13 +2114,13 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
2141 struct io_context *ioc = NULL; 2114 struct io_context *ioc = NULL;
2142 int may_queue, priv; 2115 int may_queue, priv;
2143 2116
2144 may_queue = elv_may_queue(q, rw, bio); 2117 may_queue = elv_may_queue(q, rw);
2145 if (may_queue == ELV_MQUEUE_NO) 2118 if (may_queue == ELV_MQUEUE_NO)
2146 goto rq_starved; 2119 goto rq_starved;
2147 2120
2148 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 2121 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
2149 if (rl->count[rw]+1 >= q->nr_requests) { 2122 if (rl->count[rw]+1 >= q->nr_requests) {
2150 ioc = current_io_context(GFP_ATOMIC); 2123 ioc = current_io_context(GFP_ATOMIC, q->node);
2151 /* 2124 /*
2152 * The queue will fill after this allocation, so set 2125 * The queue will fill after this allocation, so set
2153 * it as full, and mark this process as "batching". 2126 * it as full, and mark this process as "batching".
@@ -2189,7 +2162,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
2189 2162
2190 spin_unlock_irq(q->queue_lock); 2163 spin_unlock_irq(q->queue_lock);
2191 2164
2192 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); 2165 rq = blk_alloc_request(q, rw, priv, gfp_mask);
2193 if (unlikely(!rq)) { 2166 if (unlikely(!rq)) {
2194 /* 2167 /*
2195 * Allocation failed presumably due to memory. Undo anything 2168 * Allocation failed presumably due to memory. Undo anything
@@ -2225,7 +2198,6 @@ rq_starved:
2225 ioc->nr_batch_requests--; 2198 ioc->nr_batch_requests--;
2226 2199
2227 rq_init(q, rq); 2200 rq_init(q, rq);
2228 rq->rl = rl;
2229 2201
2230 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 2202 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
2231out: 2203out:
@@ -2268,7 +2240,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
2268 * up to a big batch of them for a small period time. 2240 * up to a big batch of them for a small period time.
2269 * See ioc_batching, ioc_set_batching 2241 * See ioc_batching, ioc_set_batching
2270 */ 2242 */
2271 ioc = current_io_context(GFP_NOIO); 2243 ioc = current_io_context(GFP_NOIO, q->node);
2272 ioc_set_batching(q, ioc); 2244 ioc_set_batching(q, ioc);
2273 2245
2274 spin_lock_irq(q->queue_lock); 2246 spin_lock_irq(q->queue_lock);
@@ -2300,6 +2272,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
2300EXPORT_SYMBOL(blk_get_request); 2272EXPORT_SYMBOL(blk_get_request);
2301 2273
2302/** 2274/**
2275 * blk_start_queueing - initiate dispatch of requests to device
2276 * @q: request queue to kick into gear
2277 *
2278 * This is basically a helper to remove the need to know whether a queue
2279 * is plugged or not if someone just wants to initiate dispatch of requests
2280 * for this queue.
2281 *
2282 * The queue lock must be held with interrupts disabled.
2283 */
2284void blk_start_queueing(request_queue_t *q)
2285{
2286 if (!blk_queue_plugged(q))
2287 q->request_fn(q);
2288 else
2289 __generic_unplug_device(q);
2290}
2291EXPORT_SYMBOL(blk_start_queueing);
2292
2293/**
2303 * blk_requeue_request - put a request back on queue 2294 * blk_requeue_request - put a request back on queue
2304 * @q: request queue where request should be inserted 2295 * @q: request queue where request should be inserted
2305 * @rq: request to be inserted 2296 * @rq: request to be inserted
@@ -2351,7 +2342,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
2351 * must not attempt merges on this) and that it acts as a soft 2342 * must not attempt merges on this) and that it acts as a soft
2352 * barrier 2343 * barrier
2353 */ 2344 */
2354 rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER; 2345 rq->cmd_type = REQ_TYPE_SPECIAL;
2346 rq->cmd_flags |= REQ_SOFTBARRIER;
2355 2347
2356 rq->special = data; 2348 rq->special = data;
2357 2349
@@ -2365,11 +2357,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
2365 2357
2366 drive_stat_acct(rq, rq->nr_sectors, 1); 2358 drive_stat_acct(rq, rq->nr_sectors, 1);
2367 __elv_add_request(q, rq, where, 0); 2359 __elv_add_request(q, rq, where, 0);
2368 2360 blk_start_queueing(q);
2369 if (blk_queue_plugged(q))
2370 __generic_unplug_device(q);
2371 else
2372 q->request_fn(q);
2373 spin_unlock_irqrestore(q->queue_lock, flags); 2361 spin_unlock_irqrestore(q->queue_lock, flags);
2374} 2362}
2375 2363
@@ -2558,7 +2546,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2558 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2546 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2559 2547
2560 rq->rq_disk = bd_disk; 2548 rq->rq_disk = bd_disk;
2561 rq->flags |= REQ_NOMERGE; 2549 rq->cmd_flags |= REQ_NOMERGE;
2562 rq->end_io = done; 2550 rq->end_io = done;
2563 WARN_ON(irqs_disabled()); 2551 WARN_ON(irqs_disabled());
2564 spin_lock_irq(q->queue_lock); 2552 spin_lock_irq(q->queue_lock);
@@ -2598,10 +2586,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2598 rq->sense_len = 0; 2586 rq->sense_len = 0;
2599 } 2587 }
2600 2588
2601 rq->waiting = &wait; 2589 rq->end_io_data = &wait;
2602 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 2590 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2603 wait_for_completion(&wait); 2591 wait_for_completion(&wait);
2604 rq->waiting = NULL;
2605 2592
2606 if (rq->errors) 2593 if (rq->errors)
2607 err = -EIO; 2594 err = -EIO;
@@ -2710,8 +2697,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats);
2710 */ 2697 */
2711void __blk_put_request(request_queue_t *q, struct request *req) 2698void __blk_put_request(request_queue_t *q, struct request *req)
2712{ 2699{
2713 struct request_list *rl = req->rl;
2714
2715 if (unlikely(!q)) 2700 if (unlikely(!q))
2716 return; 2701 return;
2717 if (unlikely(--req->ref_count)) 2702 if (unlikely(--req->ref_count))
@@ -2719,18 +2704,16 @@ void __blk_put_request(request_queue_t *q, struct request *req)
2719 2704
2720 elv_completed_request(q, req); 2705 elv_completed_request(q, req);
2721 2706
2722 req->rq_status = RQ_INACTIVE;
2723 req->rl = NULL;
2724
2725 /* 2707 /*
2726 * Request may not have originated from ll_rw_blk. if not, 2708 * Request may not have originated from ll_rw_blk. if not,
2727 * it didn't come out of our reserved rq pools 2709 * it didn't come out of our reserved rq pools
2728 */ 2710 */
2729 if (rl) { 2711 if (req->cmd_flags & REQ_ALLOCED) {
2730 int rw = rq_data_dir(req); 2712 int rw = rq_data_dir(req);
2731 int priv = req->flags & REQ_ELVPRIV; 2713 int priv = req->cmd_flags & REQ_ELVPRIV;
2732 2714
2733 BUG_ON(!list_empty(&req->queuelist)); 2715 BUG_ON(!list_empty(&req->queuelist));
2716 BUG_ON(!hlist_unhashed(&req->hash));
2734 2717
2735 blk_free_request(q, req); 2718 blk_free_request(q, req);
2736 freed_request(q, rw, priv); 2719 freed_request(q, rw, priv);
@@ -2764,9 +2747,9 @@ EXPORT_SYMBOL(blk_put_request);
2764 */ 2747 */
2765void blk_end_sync_rq(struct request *rq, int error) 2748void blk_end_sync_rq(struct request *rq, int error)
2766{ 2749{
2767 struct completion *waiting = rq->waiting; 2750 struct completion *waiting = rq->end_io_data;
2768 2751
2769 rq->waiting = NULL; 2752 rq->end_io_data = NULL;
2770 __blk_put_request(rq->q, rq); 2753 __blk_put_request(rq->q, rq);
2771 2754
2772 /* 2755 /*
@@ -2829,7 +2812,7 @@ static int attempt_merge(request_queue_t *q, struct request *req,
2829 2812
2830 if (rq_data_dir(req) != rq_data_dir(next) 2813 if (rq_data_dir(req) != rq_data_dir(next)
2831 || req->rq_disk != next->rq_disk 2814 || req->rq_disk != next->rq_disk
2832 || next->waiting || next->special) 2815 || next->special)
2833 return 0; 2816 return 0;
2834 2817
2835 /* 2818 /*
@@ -2890,22 +2873,24 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2890 2873
2891static void init_request_from_bio(struct request *req, struct bio *bio) 2874static void init_request_from_bio(struct request *req, struct bio *bio)
2892{ 2875{
2893 req->flags |= REQ_CMD; 2876 req->cmd_type = REQ_TYPE_FS;
2894 2877
2895 /* 2878 /*
2896 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 2879 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2897 */ 2880 */
2898 if (bio_rw_ahead(bio) || bio_failfast(bio)) 2881 if (bio_rw_ahead(bio) || bio_failfast(bio))
2899 req->flags |= REQ_FAILFAST; 2882 req->cmd_flags |= REQ_FAILFAST;
2900 2883
2901 /* 2884 /*
2902 * REQ_BARRIER implies no merging, but lets make it explicit 2885 * REQ_BARRIER implies no merging, but lets make it explicit
2903 */ 2886 */
2904 if (unlikely(bio_barrier(bio))) 2887 if (unlikely(bio_barrier(bio)))
2905 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 2888 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2906 2889
2907 if (bio_sync(bio)) 2890 if (bio_sync(bio))
2908 req->flags |= REQ_RW_SYNC; 2891 req->cmd_flags |= REQ_RW_SYNC;
2892 if (bio_rw_meta(bio))
2893 req->cmd_flags |= REQ_RW_META;
2909 2894
2910 req->errors = 0; 2895 req->errors = 0;
2911 req->hard_sector = req->sector = bio->bi_sector; 2896 req->hard_sector = req->sector = bio->bi_sector;
@@ -2914,7 +2899,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
2914 req->nr_phys_segments = bio_phys_segments(req->q, bio); 2899 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2915 req->nr_hw_segments = bio_hw_segments(req->q, bio); 2900 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2916 req->buffer = bio_data(bio); /* see ->buffer comment above */ 2901 req->buffer = bio_data(bio); /* see ->buffer comment above */
2917 req->waiting = NULL;
2918 req->bio = req->biotail = bio; 2902 req->bio = req->biotail = bio;
2919 req->ioprio = bio_prio(bio); 2903 req->ioprio = bio_prio(bio);
2920 req->rq_disk = bio->bi_bdev->bd_disk; 2904 req->rq_disk = bio->bi_bdev->bd_disk;
@@ -2924,17 +2908,11 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
2924static int __make_request(request_queue_t *q, struct bio *bio) 2908static int __make_request(request_queue_t *q, struct bio *bio)
2925{ 2909{
2926 struct request *req; 2910 struct request *req;
2927 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; 2911 int el_ret, nr_sectors, barrier, err;
2928 unsigned short prio; 2912 const unsigned short prio = bio_prio(bio);
2929 sector_t sector; 2913 const int sync = bio_sync(bio);
2930 2914
2931 sector = bio->bi_sector;
2932 nr_sectors = bio_sectors(bio); 2915 nr_sectors = bio_sectors(bio);
2933 cur_nr_sectors = bio_cur_sectors(bio);
2934 prio = bio_prio(bio);
2935
2936 rw = bio_data_dir(bio);
2937 sync = bio_sync(bio);
2938 2916
2939 /* 2917 /*
2940 * low level driver can indicate that it wants pages above a 2918 * low level driver can indicate that it wants pages above a
@@ -2943,8 +2921,6 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2943 */ 2921 */
2944 blk_queue_bounce(q, &bio); 2922 blk_queue_bounce(q, &bio);
2945 2923
2946 spin_lock_prefetch(q->queue_lock);
2947
2948 barrier = bio_barrier(bio); 2924 barrier = bio_barrier(bio);
2949 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 2925 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2950 err = -EOPNOTSUPP; 2926 err = -EOPNOTSUPP;
@@ -2972,7 +2948,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2972 req->ioprio = ioprio_best(req->ioprio, prio); 2948 req->ioprio = ioprio_best(req->ioprio, prio);
2973 drive_stat_acct(req, nr_sectors, 0); 2949 drive_stat_acct(req, nr_sectors, 0);
2974 if (!attempt_back_merge(q, req)) 2950 if (!attempt_back_merge(q, req))
2975 elv_merged_request(q, req); 2951 elv_merged_request(q, req, el_ret);
2976 goto out; 2952 goto out;
2977 2953
2978 case ELEVATOR_FRONT_MERGE: 2954 case ELEVATOR_FRONT_MERGE:
@@ -2992,14 +2968,14 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2992 * not touch req->buffer either... 2968 * not touch req->buffer either...
2993 */ 2969 */
2994 req->buffer = bio_data(bio); 2970 req->buffer = bio_data(bio);
2995 req->current_nr_sectors = cur_nr_sectors; 2971 req->current_nr_sectors = bio_cur_sectors(bio);
2996 req->hard_cur_sectors = cur_nr_sectors; 2972 req->hard_cur_sectors = req->current_nr_sectors;
2997 req->sector = req->hard_sector = sector; 2973 req->sector = req->hard_sector = bio->bi_sector;
2998 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2974 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2999 req->ioprio = ioprio_best(req->ioprio, prio); 2975 req->ioprio = ioprio_best(req->ioprio, prio);
3000 drive_stat_acct(req, nr_sectors, 0); 2976 drive_stat_acct(req, nr_sectors, 0);
3001 if (!attempt_front_merge(q, req)) 2977 if (!attempt_front_merge(q, req))
3002 elv_merged_request(q, req); 2978 elv_merged_request(q, req, el_ret);
3003 goto out; 2979 goto out;
3004 2980
3005 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 2981 /* ELV_NO_MERGE: elevator says don't/can't merge. */
@@ -3012,7 +2988,7 @@ get_rq:
3012 * Grab a free request. This is might sleep but can not fail. 2988 * Grab a free request. This is might sleep but can not fail.
3013 * Returns with the queue unlocked. 2989 * Returns with the queue unlocked.
3014 */ 2990 */
3015 req = get_request_wait(q, rw, bio); 2991 req = get_request_wait(q, bio_data_dir(bio), bio);
3016 2992
3017 /* 2993 /*
3018 * After dropping the lock and possibly sleeping here, our request 2994 * After dropping the lock and possibly sleeping here, our request
@@ -3306,7 +3282,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
3306 req->errors = 0; 3282 req->errors = 0;
3307 3283
3308 if (!uptodate) { 3284 if (!uptodate) {
3309 if (blk_fs_request(req) && !(req->flags & REQ_QUIET)) 3285 if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
3310 printk("end_request: I/O error, dev %s, sector %llu\n", 3286 printk("end_request: I/O error, dev %s, sector %llu\n",
3311 req->rq_disk ? req->rq_disk->disk_name : "?", 3287 req->rq_disk ? req->rq_disk->disk_name : "?",
3312 (unsigned long long)req->sector); 3288 (unsigned long long)req->sector);
@@ -3569,8 +3545,8 @@ EXPORT_SYMBOL(end_request);
3569 3545
3570void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) 3546void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
3571{ 3547{
3572 /* first two bits are identical in rq->flags and bio->bi_rw */ 3548 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */
3573 rq->flags |= (bio->bi_rw & 3); 3549 rq->cmd_flags |= (bio->bi_rw & 3);
3574 3550
3575 rq->nr_phys_segments = bio_phys_segments(q, bio); 3551 rq->nr_phys_segments = bio_phys_segments(q, bio);
3576 rq->nr_hw_segments = bio_hw_segments(q, bio); 3552 rq->nr_hw_segments = bio_hw_segments(q, bio);
@@ -3658,25 +3634,22 @@ EXPORT_SYMBOL(put_io_context);
3658/* Called by the exitting task */ 3634/* Called by the exitting task */
3659void exit_io_context(void) 3635void exit_io_context(void)
3660{ 3636{
3661 unsigned long flags;
3662 struct io_context *ioc; 3637 struct io_context *ioc;
3663 struct cfq_io_context *cic; 3638 struct cfq_io_context *cic;
3664 3639
3665 local_irq_save(flags);
3666 task_lock(current); 3640 task_lock(current);
3667 ioc = current->io_context; 3641 ioc = current->io_context;
3668 current->io_context = NULL; 3642 current->io_context = NULL;
3669 ioc->task = NULL;
3670 task_unlock(current); 3643 task_unlock(current);
3671 local_irq_restore(flags);
3672 3644
3645 ioc->task = NULL;
3673 if (ioc->aic && ioc->aic->exit) 3646 if (ioc->aic && ioc->aic->exit)
3674 ioc->aic->exit(ioc->aic); 3647 ioc->aic->exit(ioc->aic);
3675 if (ioc->cic_root.rb_node != NULL) { 3648 if (ioc->cic_root.rb_node != NULL) {
3676 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); 3649 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
3677 cic->exit(ioc); 3650 cic->exit(ioc);
3678 } 3651 }
3679 3652
3680 put_io_context(ioc); 3653 put_io_context(ioc);
3681} 3654}
3682 3655
@@ -3688,7 +3661,7 @@ void exit_io_context(void)
3688 * but since the current task itself holds a reference, the context can be 3661 * but since the current task itself holds a reference, the context can be
3689 * used in general code, so long as it stays within `current` context. 3662 * used in general code, so long as it stays within `current` context.
3690 */ 3663 */
3691struct io_context *current_io_context(gfp_t gfp_flags) 3664static struct io_context *current_io_context(gfp_t gfp_flags, int node)
3692{ 3665{
3693 struct task_struct *tsk = current; 3666 struct task_struct *tsk = current;
3694 struct io_context *ret; 3667 struct io_context *ret;
@@ -3697,11 +3670,11 @@ struct io_context *current_io_context(gfp_t gfp_flags)
3697 if (likely(ret)) 3670 if (likely(ret))
3698 return ret; 3671 return ret;
3699 3672
3700 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); 3673 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
3701 if (ret) { 3674 if (ret) {
3702 atomic_set(&ret->refcount, 1); 3675 atomic_set(&ret->refcount, 1);
3703 ret->task = current; 3676 ret->task = current;
3704 ret->set_ioprio = NULL; 3677 ret->ioprio_changed = 0;
3705 ret->last_waited = jiffies; /* doesn't matter... */ 3678 ret->last_waited = jiffies; /* doesn't matter... */
3706 ret->nr_batch_requests = 0; /* because this is 0 */ 3679 ret->nr_batch_requests = 0; /* because this is 0 */
3707 ret->aic = NULL; 3680 ret->aic = NULL;
@@ -3721,10 +3694,10 @@ EXPORT_SYMBOL(current_io_context);
3721 * 3694 *
3722 * This is always called in the context of the task which submitted the I/O. 3695 * This is always called in the context of the task which submitted the I/O.
3723 */ 3696 */
3724struct io_context *get_io_context(gfp_t gfp_flags) 3697struct io_context *get_io_context(gfp_t gfp_flags, int node)
3725{ 3698{
3726 struct io_context *ret; 3699 struct io_context *ret;
3727 ret = current_io_context(gfp_flags); 3700 ret = current_io_context(gfp_flags, node);
3728 if (likely(ret)) 3701 if (likely(ret))
3729 atomic_inc(&ret->refcount); 3702 atomic_inc(&ret->refcount);
3730 return ret; 3703 return ret;
@@ -3837,9 +3810,6 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
3837 ssize_t ret = queue_var_store(&ra_kb, page, count); 3810 ssize_t ret = queue_var_store(&ra_kb, page, count);
3838 3811
3839 spin_lock_irq(q->queue_lock); 3812 spin_lock_irq(q->queue_lock);
3840 if (ra_kb > (q->max_sectors >> 1))
3841 ra_kb = (q->max_sectors >> 1);
3842
3843 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 3813 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3844 spin_unlock_irq(q->queue_lock); 3814 spin_unlock_irq(q->queue_lock);
3845 3815
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 56a7c620574f..79af43179421 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e)
69{ 69{
70 struct noop_data *nd; 70 struct noop_data *nd;
71 71
72 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 72 nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
73 if (!nd) 73 if (!nd)
74 return NULL; 74 return NULL;
75 INIT_LIST_HEAD(&nd->queue); 75 INIT_LIST_HEAD(&nd->queue);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index b33eda26e205..2dc326421a24 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q,
294 rq->sense = sense; 294 rq->sense = sense;
295 rq->sense_len = 0; 295 rq->sense_len = 0;
296 296
297 rq->flags |= REQ_BLOCK_PC; 297 rq->cmd_type = REQ_TYPE_BLOCK_PC;
298 bio = rq->bio; 298 bio = rq->bio;
299 299
300 /* 300 /*
@@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
470 memset(sense, 0, sizeof(sense)); 470 memset(sense, 0, sizeof(sense));
471 rq->sense = sense; 471 rq->sense = sense;
472 rq->sense_len = 0; 472 rq->sense_len = 0;
473 rq->flags |= REQ_BLOCK_PC; 473 rq->cmd_type = REQ_TYPE_BLOCK_PC;
474 474
475 blk_execute_rq(q, disk, rq, 0); 475 blk_execute_rq(q, disk, rq, 0);
476 476
@@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c
502 int err; 502 int err;
503 503
504 rq = blk_get_request(q, WRITE, __GFP_WAIT); 504 rq = blk_get_request(q, WRITE, __GFP_WAIT);
505 rq->flags |= REQ_BLOCK_PC; 505 rq->cmd_type = REQ_TYPE_BLOCK_PC;
506 rq->data = NULL; 506 rq->data = NULL;
507 rq->data_len = 0; 507 rq->data_len = 0;
508 rq->timeout = BLK_DEFAULT_TIMEOUT; 508 rq->timeout = BLK_DEFAULT_TIMEOUT;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index a360215dbce7..2568640430fb 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3331,7 +3331,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
3331 Command->DmaDirection = PCI_DMA_TODEVICE; 3331 Command->DmaDirection = PCI_DMA_TODEVICE;
3332 Command->CommandType = DAC960_WriteCommand; 3332 Command->CommandType = DAC960_WriteCommand;
3333 } 3333 }
3334 Command->Completion = Request->waiting; 3334 Command->Completion = Request->end_io_data;
3335 Command->LogicalDriveNumber = (long)Request->rq_disk->private_data; 3335 Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
3336 Command->BlockNumber = Request->sector; 3336 Command->BlockNumber = Request->sector;
3337 Command->BlockCount = Request->nr_sectors; 3337 Command->BlockCount = Request->nr_sectors;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index b5382cedf0c0..422e31d5f8e5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -2,6 +2,8 @@
2# Block device driver configuration 2# Block device driver configuration
3# 3#
4 4
5if BLOCK
6
5menu "Block devices" 7menu "Block devices"
6 8
7config BLK_DEV_FD 9config BLK_DEV_FD
@@ -468,3 +470,5 @@ config ATA_OVER_ETH
468 devices like the Coraid EtherDrive (R) Storage Blade. 470 devices like the Coraid EtherDrive (R) Storage Blade.
469 471
470endmenu 472endmenu
473
474endif
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 2cd3391ff878..c211065ad829 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1229,7 +1229,6 @@ static inline void complete_buffers(struct bio *bio, int status)
1229 int nr_sectors = bio_sectors(bio); 1229 int nr_sectors = bio_sectors(bio);
1230 1230
1231 bio->bi_next = NULL; 1231 bio->bi_next = NULL;
1232 blk_finished_io(len);
1233 bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO); 1232 bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
1234 bio = xbh; 1233 bio = xbh;
1235 } 1234 }
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 78082edc14b4..4abc193314ee 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -989,7 +989,6 @@ static inline void complete_buffers(struct bio *bio, int ok)
989 xbh = bio->bi_next; 989 xbh = bio->bi_next;
990 bio->bi_next = NULL; 990 bio->bi_next = NULL;
991 991
992 blk_finished_io(nr_sectors);
993 bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO); 992 bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
994 993
995 bio = xbh; 994 bio = xbh;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index ad1d7065a1b2..629c5769d994 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2991,8 +2991,8 @@ static void do_fd_request(request_queue_t * q)
2991 if (usage_count == 0) { 2991 if (usage_count == 0) {
2992 printk("warning: usage count=0, current_req=%p exiting\n", 2992 printk("warning: usage count=0, current_req=%p exiting\n",
2993 current_req); 2993 current_req);
2994 printk("sect=%ld flags=%lx\n", (long)current_req->sector, 2994 printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
2995 current_req->flags); 2995 current_req->cmd_type, current_req->cmd_flags);
2996 return; 2996 return;
2997 } 2997 }
2998 if (test_bit(0, &fdc_busy)) { 2998 if (test_bit(0, &fdc_busy)) {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 68b0471ad5a6..d6bb8da955a2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -66,6 +66,7 @@
66#include <linux/swap.h> 66#include <linux/swap.h>
67#include <linux/slab.h> 67#include <linux/slab.h>
68#include <linux/loop.h> 68#include <linux/loop.h>
69#include <linux/compat.h>
69#include <linux/suspend.h> 70#include <linux/suspend.h>
70#include <linux/writeback.h> 71#include <linux/writeback.h>
71#include <linux/buffer_head.h> /* for invalidate_bdev() */ 72#include <linux/buffer_head.h> /* for invalidate_bdev() */
@@ -1165,6 +1166,162 @@ static int lo_ioctl(struct inode * inode, struct file * file,
1165 return err; 1166 return err;
1166} 1167}
1167 1168
1169#ifdef CONFIG_COMPAT
1170struct compat_loop_info {
1171 compat_int_t lo_number; /* ioctl r/o */
1172 compat_dev_t lo_device; /* ioctl r/o */
1173 compat_ulong_t lo_inode; /* ioctl r/o */
1174 compat_dev_t lo_rdevice; /* ioctl r/o */
1175 compat_int_t lo_offset;
1176 compat_int_t lo_encrypt_type;
1177 compat_int_t lo_encrypt_key_size; /* ioctl w/o */
1178 compat_int_t lo_flags; /* ioctl r/o */
1179 char lo_name[LO_NAME_SIZE];
1180 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1181 compat_ulong_t lo_init[2];
1182 char reserved[4];
1183};
1184
1185/*
1186 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1187 * - noinlined to reduce stack space usage in main part of driver
1188 */
1189static noinline int
1190loop_info64_from_compat(const struct compat_loop_info *arg,
1191 struct loop_info64 *info64)
1192{
1193 struct compat_loop_info info;
1194
1195 if (copy_from_user(&info, arg, sizeof(info)))
1196 return -EFAULT;
1197
1198 memset(info64, 0, sizeof(*info64));
1199 info64->lo_number = info.lo_number;
1200 info64->lo_device = info.lo_device;
1201 info64->lo_inode = info.lo_inode;
1202 info64->lo_rdevice = info.lo_rdevice;
1203 info64->lo_offset = info.lo_offset;
1204 info64->lo_sizelimit = 0;
1205 info64->lo_encrypt_type = info.lo_encrypt_type;
1206 info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1207 info64->lo_flags = info.lo_flags;
1208 info64->lo_init[0] = info.lo_init[0];
1209 info64->lo_init[1] = info.lo_init[1];
1210 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1211 memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1212 else
1213 memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1214 memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1215 return 0;
1216}
1217
1218/*
1219 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1220 * - noinlined to reduce stack space usage in main part of driver
1221 */
1222static noinline int
1223loop_info64_to_compat(const struct loop_info64 *info64,
1224 struct compat_loop_info __user *arg)
1225{
1226 struct compat_loop_info info;
1227
1228 memset(&info, 0, sizeof(info));
1229 info.lo_number = info64->lo_number;
1230 info.lo_device = info64->lo_device;
1231 info.lo_inode = info64->lo_inode;
1232 info.lo_rdevice = info64->lo_rdevice;
1233 info.lo_offset = info64->lo_offset;
1234 info.lo_encrypt_type = info64->lo_encrypt_type;
1235 info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1236 info.lo_flags = info64->lo_flags;
1237 info.lo_init[0] = info64->lo_init[0];
1238 info.lo_init[1] = info64->lo_init[1];
1239 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1240 memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1241 else
1242 memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1243 memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1244
1245 /* error in case values were truncated */
1246 if (info.lo_device != info64->lo_device ||
1247 info.lo_rdevice != info64->lo_rdevice ||
1248 info.lo_inode != info64->lo_inode ||
1249 info.lo_offset != info64->lo_offset ||
1250 info.lo_init[0] != info64->lo_init[0] ||
1251 info.lo_init[1] != info64->lo_init[1])
1252 return -EOVERFLOW;
1253
1254 if (copy_to_user(arg, &info, sizeof(info)))
1255 return -EFAULT;
1256 return 0;
1257}
1258
1259static int
1260loop_set_status_compat(struct loop_device *lo,
1261 const struct compat_loop_info __user *arg)
1262{
1263 struct loop_info64 info64;
1264 int ret;
1265
1266 ret = loop_info64_from_compat(arg, &info64);
1267 if (ret < 0)
1268 return ret;
1269 return loop_set_status(lo, &info64);
1270}
1271
1272static int
1273loop_get_status_compat(struct loop_device *lo,
1274 struct compat_loop_info __user *arg)
1275{
1276 struct loop_info64 info64;
1277 int err = 0;
1278
1279 if (!arg)
1280 err = -EINVAL;
1281 if (!err)
1282 err = loop_get_status(lo, &info64);
1283 if (!err)
1284 err = loop_info64_to_compat(&info64, arg);
1285 return err;
1286}
1287
1288static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1289{
1290 struct inode *inode = file->f_dentry->d_inode;
1291 struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
1292 int err;
1293
1294 lock_kernel();
1295 switch(cmd) {
1296 case LOOP_SET_STATUS:
1297 mutex_lock(&lo->lo_ctl_mutex);
1298 err = loop_set_status_compat(
1299 lo, (const struct compat_loop_info __user *) arg);
1300 mutex_unlock(&lo->lo_ctl_mutex);
1301 break;
1302 case LOOP_GET_STATUS:
1303 mutex_lock(&lo->lo_ctl_mutex);
1304 err = loop_get_status_compat(
1305 lo, (struct compat_loop_info __user *) arg);
1306 mutex_unlock(&lo->lo_ctl_mutex);
1307 break;
1308 case LOOP_CLR_FD:
1309 case LOOP_GET_STATUS64:
1310 case LOOP_SET_STATUS64:
1311 arg = (unsigned long) compat_ptr(arg);
1312 case LOOP_SET_FD:
1313 case LOOP_CHANGE_FD:
1314 err = lo_ioctl(inode, file, cmd, arg);
1315 break;
1316 default:
1317 err = -ENOIOCTLCMD;
1318 break;
1319 }
1320 unlock_kernel();
1321 return err;
1322}
1323#endif
1324
1168static int lo_open(struct inode *inode, struct file *file) 1325static int lo_open(struct inode *inode, struct file *file)
1169{ 1326{
1170 struct loop_device *lo = inode->i_bdev->bd_disk->private_data; 1327 struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
@@ -1192,6 +1349,9 @@ static struct block_device_operations lo_fops = {
1192 .open = lo_open, 1349 .open = lo_open,
1193 .release = lo_release, 1350 .release = lo_release,
1194 .ioctl = lo_ioctl, 1351 .ioctl = lo_ioctl,
1352#ifdef CONFIG_COMPAT
1353 .compat_ioctl = lo_compat_ioctl,
1354#endif
1195}; 1355};
1196 1356
1197/* 1357/*
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index bdbade9a5cf5..9d1035e8d9d8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -407,10 +407,10 @@ static void do_nbd_request(request_queue_t * q)
407 struct nbd_device *lo; 407 struct nbd_device *lo;
408 408
409 blkdev_dequeue_request(req); 409 blkdev_dequeue_request(req);
410 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n", 410 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
411 req->rq_disk->disk_name, req, req->flags); 411 req->rq_disk->disk_name, req, req->cmd_type);
412 412
413 if (!(req->flags & REQ_CMD)) 413 if (!blk_fs_request(req))
414 goto error_out; 414 goto error_out;
415 415
416 lo = req->rq_disk->private_data; 416 lo = req->rq_disk->private_data;
@@ -489,7 +489,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
489 switch (cmd) { 489 switch (cmd) {
490 case NBD_DISCONNECT: 490 case NBD_DISCONNECT:
491 printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); 491 printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
492 sreq.flags = REQ_SPECIAL; 492 sreq.cmd_type = REQ_TYPE_SPECIAL;
493 nbd_cmd(&sreq) = NBD_CMD_DISC; 493 nbd_cmd(&sreq) = NBD_CMD_DISC;
494 /* 494 /*
495 * Set these to sane values in case server implementation 495 * Set these to sane values in case server implementation
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 2403721f9db1..38578b9dbfd1 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -437,7 +437,7 @@ static char *pd_buf; /* buffer for request in progress */
437 437
438static enum action do_pd_io_start(void) 438static enum action do_pd_io_start(void)
439{ 439{
440 if (pd_req->flags & REQ_SPECIAL) { 440 if (blk_special_request(pd_req)) {
441 phase = pd_special; 441 phase = pd_special;
442 return pd_special(); 442 return pd_special();
443 } 443 }
@@ -719,14 +719,12 @@ static int pd_special_command(struct pd_unit *disk,
719 719
720 memset(&rq, 0, sizeof(rq)); 720 memset(&rq, 0, sizeof(rq));
721 rq.errors = 0; 721 rq.errors = 0;
722 rq.rq_status = RQ_ACTIVE;
723 rq.rq_disk = disk->gd; 722 rq.rq_disk = disk->gd;
724 rq.ref_count = 1; 723 rq.ref_count = 1;
725 rq.waiting = &wait; 724 rq.end_io_data = &wait;
726 rq.end_io = blk_end_sync_rq; 725 rq.end_io = blk_end_sync_rq;
727 blk_insert_request(disk->gd->queue, &rq, 0, func); 726 blk_insert_request(disk->gd->queue, &rq, 0, func);
728 wait_for_completion(&wait); 727 wait_for_completion(&wait);
729 rq.waiting = NULL;
730 if (rq.errors) 728 if (rq.errors)
731 err = -EIO; 729 err = -EIO;
732 blk_put_request(&rq); 730 blk_put_request(&rq);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 451b996bba91..888d1aceeeff 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -365,17 +365,17 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
365 rq->sense = sense; 365 rq->sense = sense;
366 memset(sense, 0, sizeof(sense)); 366 memset(sense, 0, sizeof(sense));
367 rq->sense_len = 0; 367 rq->sense_len = 0;
368 rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER; 368 rq->cmd_type = REQ_TYPE_BLOCK_PC;
369 rq->cmd_flags |= REQ_HARDBARRIER;
369 if (cgc->quiet) 370 if (cgc->quiet)
370 rq->flags |= REQ_QUIET; 371 rq->cmd_flags |= REQ_QUIET;
371 memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); 372 memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
372 if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) 373 if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
373 memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); 374 memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
374 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); 375 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
375 376
376 rq->ref_count++; 377 rq->ref_count++;
377 rq->flags |= REQ_NOMERGE; 378 rq->end_io_data = &wait;
378 rq->waiting = &wait;
379 rq->end_io = blk_end_sync_rq; 379 rq->end_io = blk_end_sync_rq;
380 elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); 380 elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
381 generic_unplug_device(q); 381 generic_unplug_device(q);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index cc42e762396f..f2305ee792a1 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -319,8 +319,8 @@ static void start_request(struct floppy_state *fs)
319 printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n", 319 printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
320 req->rq_disk->disk_name, req->cmd, 320 req->rq_disk->disk_name, req->cmd,
321 (long)req->sector, req->nr_sectors, req->buffer); 321 (long)req->sector, req->nr_sectors, req->buffer);
322 printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n", 322 printk(" errors=%d current_nr_sectors=%ld\n",
323 req->rq_status, req->errors, req->current_nr_sectors); 323 req->errors, req->current_nr_sectors);
324#endif 324#endif
325 325
326 if (req->sector < 0 || req->sector >= fs->total_secs) { 326 if (req->sector < 0 || req->sector >= fs->total_secs) {
diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c
index 89e3c2f8b776..dfda796eba56 100644
--- a/drivers/block/swim_iop.c
+++ b/drivers/block/swim_iop.c
@@ -529,8 +529,8 @@ static void start_request(struct floppy_state *fs)
529 printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n", 529 printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
530 CURRENT->rq_disk->disk_name, CURRENT->cmd, 530 CURRENT->rq_disk->disk_name, CURRENT->cmd,
531 CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer); 531 CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer);
532 printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n", 532 printk(" errors=%d current_nr_sectors=%ld\n",
533 CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors); 533 CURRENT->errors, CURRENT->current_nr_sectors);
534#endif 534#endif
535 535
536 if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) { 536 if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) {
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index e828e4cbd3e1..ebf3025721d1 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -313,7 +313,7 @@ static void do_xd_request (request_queue_t * q)
313 int res = 0; 313 int res = 0;
314 int retry; 314 int retry;
315 315
316 if (!(req->flags & REQ_CMD)) { 316 if (!blk_fs_request(req)) {
317 end_request(req, 0); 317 end_request(req, 0);
318 continue; 318 continue;
319 } 319 }
diff --git a/drivers/cdrom/Kconfig b/drivers/cdrom/Kconfig
index ff5652d40619..4b12e9031fb3 100644
--- a/drivers/cdrom/Kconfig
+++ b/drivers/cdrom/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5menu "Old CD-ROM drivers (not SCSI, not IDE)" 5menu "Old CD-ROM drivers (not SCSI, not IDE)"
6 depends on ISA 6 depends on ISA && BLOCK
7 7
8config CD_NO_IDESCSI 8config CD_NO_IDESCSI
9 bool "Support non-SCSI/IDE/ATAPI CDROM drives" 9 bool "Support non-SCSI/IDE/ATAPI CDROM drives"
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index d239cf8b20bd..b38c84a7a8e3 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2129,7 +2129,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2129 rq->cmd[9] = 0xf8; 2129 rq->cmd[9] = 0xf8;
2130 2130
2131 rq->cmd_len = 12; 2131 rq->cmd_len = 12;
2132 rq->flags |= REQ_BLOCK_PC; 2132 rq->cmd_type = REQ_TYPE_BLOCK_PC;
2133 rq->timeout = 60 * HZ; 2133 rq->timeout = 60 * HZ;
2134 bio = rq->bio; 2134 bio = rq->bio;
2135 2135
diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c
index 37bdb0163f0d..ccd91c1a84bd 100644
--- a/drivers/cdrom/cdu31a.c
+++ b/drivers/cdrom/cdu31a.c
@@ -1338,8 +1338,10 @@ static void do_cdu31a_request(request_queue_t * q)
1338 } 1338 }
1339 1339
1340 /* WTF??? */ 1340 /* WTF??? */
1341 if (!(req->flags & REQ_CMD)) 1341 if (!blk_fs_request(req)) {
1342 end_request(req, 0);
1342 continue; 1343 continue;
1344 }
1343 if (rq_data_dir(req) == WRITE) { 1345 if (rq_data_dir(req) == WRITE) {
1344 end_request(req, 0); 1346 end_request(req, 0);
1345 continue; 1347 continue;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 4cc619edf424..bde1c665d9f4 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -1006,6 +1006,7 @@ config GPIO_VR41XX
1006 1006
1007config RAW_DRIVER 1007config RAW_DRIVER
1008 tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)" 1008 tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)"
1009 depends on BLOCK
1009 help 1010 help
1010 The raw driver permits block devices to be bound to /dev/raw/rawN. 1011 The raw driver permits block devices to be bound to /dev/raw/rawN.
1011 Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. 1012 Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O.
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 4c3a5ca9d8f7..b430a12eb819 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -655,6 +655,7 @@ void add_interrupt_randomness(int irq)
655 add_timer_randomness(irq_timer_state[irq], 0x100 + irq); 655 add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
656} 656}
657 657
658#ifdef CONFIG_BLOCK
658void add_disk_randomness(struct gendisk *disk) 659void add_disk_randomness(struct gendisk *disk)
659{ 660{
660 if (!disk || !disk->random) 661 if (!disk || !disk->random)
@@ -667,6 +668,7 @@ void add_disk_randomness(struct gendisk *disk)
667} 668}
668 669
669EXPORT_SYMBOL(add_disk_randomness); 670EXPORT_SYMBOL(add_disk_randomness);
671#endif
670 672
671#define EXTRACT_SIZE 10 673#define EXTRACT_SIZE 10
672 674
@@ -918,6 +920,7 @@ void rand_initialize_irq(int irq)
918 } 920 }
919} 921}
920 922
923#ifdef CONFIG_BLOCK
921void rand_initialize_disk(struct gendisk *disk) 924void rand_initialize_disk(struct gendisk *disk)
922{ 925{
923 struct timer_rand_state *state; 926 struct timer_rand_state *state;
@@ -932,6 +935,7 @@ void rand_initialize_disk(struct gendisk *disk)
932 disk->random = state; 935 disk->random = state;
933 } 936 }
934} 937}
938#endif
935 939
936static ssize_t 940static ssize_t
937random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) 941random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
diff --git a/drivers/fc4/fc.c b/drivers/fc4/fc.c
index 1a159e8843ca..22d17474755f 100644
--- a/drivers/fc4/fc.c
+++ b/drivers/fc4/fc.c
@@ -974,7 +974,6 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt)
974 */ 974 */
975 975
976 fc->rst_pkt->device->host->eh_action = &sem; 976 fc->rst_pkt->device->host->eh_action = &sem;
977 fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY;
978 977
979 fc->rst_pkt->done = fcp_scsi_reset_done; 978 fc->rst_pkt->done = fcp_scsi_reset_done;
980 979
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index b6fb167e20f6..69d627bd537a 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -4,6 +4,8 @@
4# Andre Hedrick <andre@linux-ide.org> 4# Andre Hedrick <andre@linux-ide.org>
5# 5#
6 6
7if BLOCK
8
7menu "ATA/ATAPI/MFM/RLL support" 9menu "ATA/ATAPI/MFM/RLL support"
8 10
9config IDE 11config IDE
@@ -1082,3 +1084,5 @@ config BLK_DEV_HD
1082endif 1084endif
1083 1085
1084endmenu 1086endmenu
1087
1088endif
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 654d4cd09847..69bbb6206a00 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -372,7 +372,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq,
372{ 372{
373 int log = 0; 373 int log = 0;
374 374
375 if (!sense || !rq || (rq->flags & REQ_QUIET)) 375 if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
376 return 0; 376 return 0;
377 377
378 switch (sense->sense_key) { 378 switch (sense->sense_key) {
@@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq)
597 struct cdrom_info *cd = drive->driver_data; 597 struct cdrom_info *cd = drive->driver_data;
598 598
599 ide_init_drive_cmd(rq); 599 ide_init_drive_cmd(rq);
600 rq->flags = REQ_PC; 600 rq->cmd_type = REQ_TYPE_BLOCK_PC;
601 rq->rq_disk = cd->disk; 601 rq->rq_disk = cd->disk;
602} 602}
603 603
@@ -617,7 +617,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
617 rq->cmd[0] = GPCMD_REQUEST_SENSE; 617 rq->cmd[0] = GPCMD_REQUEST_SENSE;
618 rq->cmd[4] = rq->data_len = 18; 618 rq->cmd[4] = rq->data_len = 18;
619 619
620 rq->flags = REQ_SENSE; 620 rq->cmd_type = REQ_TYPE_SENSE;
621 621
622 /* NOTE! Save the failed command in "rq->buffer" */ 622 /* NOTE! Save the failed command in "rq->buffer" */
623 rq->buffer = (void *) failed_command; 623 rq->buffer = (void *) failed_command;
@@ -630,10 +630,10 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate)
630 struct request *rq = HWGROUP(drive)->rq; 630 struct request *rq = HWGROUP(drive)->rq;
631 int nsectors = rq->hard_cur_sectors; 631 int nsectors = rq->hard_cur_sectors;
632 632
633 if ((rq->flags & REQ_SENSE) && uptodate) { 633 if (blk_sense_request(rq) && uptodate) {
634 /* 634 /*
635 * For REQ_SENSE, "rq->buffer" points to the original failed 635 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
636 * request 636 * failed request
637 */ 637 */
638 struct request *failed = (struct request *) rq->buffer; 638 struct request *failed = (struct request *) rq->buffer;
639 struct cdrom_info *info = drive->driver_data; 639 struct cdrom_info *info = drive->driver_data;
@@ -706,17 +706,17 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
706 return 1; 706 return 1;
707 } 707 }
708 708
709 if (rq->flags & REQ_SENSE) { 709 if (blk_sense_request(rq)) {
710 /* We got an error trying to get sense info 710 /* We got an error trying to get sense info
711 from the drive (probably while trying 711 from the drive (probably while trying
712 to recover from a former error). Just give up. */ 712 to recover from a former error). Just give up. */
713 713
714 rq->flags |= REQ_FAILED; 714 rq->cmd_flags |= REQ_FAILED;
715 cdrom_end_request(drive, 0); 715 cdrom_end_request(drive, 0);
716 ide_error(drive, "request sense failure", stat); 716 ide_error(drive, "request sense failure", stat);
717 return 1; 717 return 1;
718 718
719 } else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) { 719 } else if (blk_pc_request(rq)) {
720 /* All other functions, except for READ. */ 720 /* All other functions, except for READ. */
721 unsigned long flags; 721 unsigned long flags;
722 722
@@ -724,7 +724,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
724 * if we have an error, pass back CHECK_CONDITION as the 724 * if we have an error, pass back CHECK_CONDITION as the
725 * scsi status byte 725 * scsi status byte
726 */ 726 */
727 if ((rq->flags & REQ_BLOCK_PC) && !rq->errors) 727 if (!rq->errors)
728 rq->errors = SAM_STAT_CHECK_CONDITION; 728 rq->errors = SAM_STAT_CHECK_CONDITION;
729 729
730 /* Check for tray open. */ 730 /* Check for tray open. */
@@ -735,12 +735,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
735 cdrom_saw_media_change (drive); 735 cdrom_saw_media_change (drive);
736 /*printk("%s: media changed\n",drive->name);*/ 736 /*printk("%s: media changed\n",drive->name);*/
737 return 0; 737 return 0;
738 } else if (!(rq->flags & REQ_QUIET)) { 738 } else if (!(rq->cmd_flags & REQ_QUIET)) {
739 /* Otherwise, print an error. */ 739 /* Otherwise, print an error. */
740 ide_dump_status(drive, "packet command error", stat); 740 ide_dump_status(drive, "packet command error", stat);
741 } 741 }
742 742
743 rq->flags |= REQ_FAILED; 743 rq->cmd_flags |= REQ_FAILED;
744 744
745 /* 745 /*
746 * instead of playing games with moving completions around, 746 * instead of playing games with moving completions around,
@@ -881,7 +881,7 @@ static int cdrom_timer_expiry(ide_drive_t *drive)
881 wait = ATAPI_WAIT_PC; 881 wait = ATAPI_WAIT_PC;
882 break; 882 break;
883 default: 883 default:
884 if (!(rq->flags & REQ_QUIET)) 884 if (!(rq->cmd_flags & REQ_QUIET))
885 printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]); 885 printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
886 wait = 0; 886 wait = 0;
887 break; 887 break;
@@ -1124,7 +1124,7 @@ static ide_startstop_t cdrom_read_intr (ide_drive_t *drive)
1124 if (rq->current_nr_sectors > 0) { 1124 if (rq->current_nr_sectors > 0) {
1125 printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n", 1125 printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
1126 drive->name, rq->current_nr_sectors); 1126 drive->name, rq->current_nr_sectors);
1127 rq->flags |= REQ_FAILED; 1127 rq->cmd_flags |= REQ_FAILED;
1128 cdrom_end_request(drive, 0); 1128 cdrom_end_request(drive, 0);
1129 } else 1129 } else
1130 cdrom_end_request(drive, 1); 1130 cdrom_end_request(drive, 1);
@@ -1456,7 +1456,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
1456 printk ("%s: cdrom_pc_intr: data underrun %d\n", 1456 printk ("%s: cdrom_pc_intr: data underrun %d\n",
1457 drive->name, pc->buflen); 1457 drive->name, pc->buflen);
1458 */ 1458 */
1459 rq->flags |= REQ_FAILED; 1459 rq->cmd_flags |= REQ_FAILED;
1460 cdrom_end_request(drive, 0); 1460 cdrom_end_request(drive, 0);
1461 } 1461 }
1462 return ide_stopped; 1462 return ide_stopped;
@@ -1509,7 +1509,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
1509 rq->data += thislen; 1509 rq->data += thislen;
1510 rq->data_len -= thislen; 1510 rq->data_len -= thislen;
1511 1511
1512 if (rq->flags & REQ_SENSE) 1512 if (blk_sense_request(rq))
1513 rq->sense_len += thislen; 1513 rq->sense_len += thislen;
1514 } else { 1514 } else {
1515confused: 1515confused:
@@ -1517,7 +1517,7 @@ confused:
1517 "appears confused (ireason = 0x%02x). " 1517 "appears confused (ireason = 0x%02x). "
1518 "Trying to recover by ending request.\n", 1518 "Trying to recover by ending request.\n",
1519 drive->name, ireason); 1519 drive->name, ireason);
1520 rq->flags |= REQ_FAILED; 1520 rq->cmd_flags |= REQ_FAILED;
1521 cdrom_end_request(drive, 0); 1521 cdrom_end_request(drive, 0);
1522 return ide_stopped; 1522 return ide_stopped;
1523 } 1523 }
@@ -1546,7 +1546,7 @@ static ide_startstop_t cdrom_do_packet_command (ide_drive_t *drive)
1546 struct cdrom_info *info = drive->driver_data; 1546 struct cdrom_info *info = drive->driver_data;
1547 1547
1548 info->dma = 0; 1548 info->dma = 0;
1549 rq->flags &= ~REQ_FAILED; 1549 rq->cmd_flags &= ~REQ_FAILED;
1550 len = rq->data_len; 1550 len = rq->data_len;
1551 1551
1552 /* Start sending the command to the drive. */ 1552 /* Start sending the command to the drive. */
@@ -1558,7 +1558,7 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
1558{ 1558{
1559 struct request_sense sense; 1559 struct request_sense sense;
1560 int retries = 10; 1560 int retries = 10;
1561 unsigned int flags = rq->flags; 1561 unsigned int flags = rq->cmd_flags;
1562 1562
1563 if (rq->sense == NULL) 1563 if (rq->sense == NULL)
1564 rq->sense = &sense; 1564 rq->sense = &sense;
@@ -1567,14 +1567,14 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
1567 do { 1567 do {
1568 int error; 1568 int error;
1569 unsigned long time = jiffies; 1569 unsigned long time = jiffies;
1570 rq->flags = flags; 1570 rq->cmd_flags = flags;
1571 1571
1572 error = ide_do_drive_cmd(drive, rq, ide_wait); 1572 error = ide_do_drive_cmd(drive, rq, ide_wait);
1573 time = jiffies - time; 1573 time = jiffies - time;
1574 1574
1575 /* FIXME: we should probably abort/retry or something 1575 /* FIXME: we should probably abort/retry or something
1576 * in case of failure */ 1576 * in case of failure */
1577 if (rq->flags & REQ_FAILED) { 1577 if (rq->cmd_flags & REQ_FAILED) {
1578 /* The request failed. Retry if it was due to a unit 1578 /* The request failed. Retry if it was due to a unit
1579 attention status 1579 attention status
1580 (usually means media was changed). */ 1580 (usually means media was changed). */
@@ -1596,10 +1596,10 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
1596 } 1596 }
1597 1597
1598 /* End of retry loop. */ 1598 /* End of retry loop. */
1599 } while ((rq->flags & REQ_FAILED) && retries >= 0); 1599 } while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
1600 1600
1601 /* Return an error if the command failed. */ 1601 /* Return an error if the command failed. */
1602 return (rq->flags & REQ_FAILED) ? -EIO : 0; 1602 return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
1603} 1603}
1604 1604
1605/* 1605/*
@@ -1963,7 +1963,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
1963{ 1963{
1964 struct cdrom_info *info = drive->driver_data; 1964 struct cdrom_info *info = drive->driver_data;
1965 1965
1966 rq->flags |= REQ_QUIET; 1966 rq->cmd_flags |= REQ_QUIET;
1967 1967
1968 info->dma = 0; 1968 info->dma = 0;
1969 1969
@@ -2023,11 +2023,11 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block)
2023 } 2023 }
2024 info->last_block = block; 2024 info->last_block = block;
2025 return action; 2025 return action;
2026 } else if (rq->flags & (REQ_PC | REQ_SENSE)) { 2026 } else if (rq->cmd_type == REQ_TYPE_SENSE) {
2027 return cdrom_do_packet_command(drive); 2027 return cdrom_do_packet_command(drive);
2028 } else if (rq->flags & REQ_BLOCK_PC) { 2028 } else if (blk_pc_request(rq)) {
2029 return cdrom_do_block_pc(drive, rq); 2029 return cdrom_do_block_pc(drive, rq);
2030 } else if (rq->flags & REQ_SPECIAL) { 2030 } else if (blk_special_request(rq)) {
2031 /* 2031 /*
2032 * right now this can only be a reset... 2032 * right now this can only be a reset...
2033 */ 2033 */
@@ -2105,7 +2105,7 @@ static int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
2105 2105
2106 req.sense = sense; 2106 req.sense = sense;
2107 req.cmd[0] = GPCMD_TEST_UNIT_READY; 2107 req.cmd[0] = GPCMD_TEST_UNIT_READY;
2108 req.flags |= REQ_QUIET; 2108 req.cmd_flags |= REQ_QUIET;
2109 2109
2110#if ! STANDARD_ATAPI 2110#if ! STANDARD_ATAPI
2111 /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to 2111 /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to
@@ -2207,7 +2207,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
2207 req.cmd[0] = GPCMD_READ_CDVD_CAPACITY; 2207 req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
2208 req.data = (char *)&capbuf; 2208 req.data = (char *)&capbuf;
2209 req.data_len = sizeof(capbuf); 2209 req.data_len = sizeof(capbuf);
2210 req.flags |= REQ_QUIET; 2210 req.cmd_flags |= REQ_QUIET;
2211 2211
2212 stat = cdrom_queue_packet_command(drive, &req); 2212 stat = cdrom_queue_packet_command(drive, &req);
2213 if (stat == 0) { 2213 if (stat == 0) {
@@ -2230,7 +2230,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
2230 req.sense = sense; 2230 req.sense = sense;
2231 req.data = buf; 2231 req.data = buf;
2232 req.data_len = buflen; 2232 req.data_len = buflen;
2233 req.flags |= REQ_QUIET; 2233 req.cmd_flags |= REQ_QUIET;
2234 req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; 2234 req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
2235 req.cmd[6] = trackno; 2235 req.cmd[6] = trackno;
2236 req.cmd[7] = (buflen >> 8); 2236 req.cmd[7] = (buflen >> 8);
@@ -2531,7 +2531,7 @@ static int ide_cdrom_packet(struct cdrom_device_info *cdi,
2531 req.timeout = cgc->timeout; 2531 req.timeout = cgc->timeout;
2532 2532
2533 if (cgc->quiet) 2533 if (cgc->quiet)
2534 req.flags |= REQ_QUIET; 2534 req.cmd_flags |= REQ_QUIET;
2535 2535
2536 req.sense = cgc->sense; 2536 req.sense = cgc->sense;
2537 cgc->stat = cdrom_queue_packet_command(drive, &req); 2537 cgc->stat = cdrom_queue_packet_command(drive, &req);
@@ -2629,7 +2629,8 @@ int ide_cdrom_reset (struct cdrom_device_info *cdi)
2629 int ret; 2629 int ret;
2630 2630
2631 cdrom_prepare_request(drive, &req); 2631 cdrom_prepare_request(drive, &req);
2632 req.flags = REQ_SPECIAL | REQ_QUIET; 2632 req.cmd_type = REQ_TYPE_SPECIAL;
2633 req.cmd_flags = REQ_QUIET;
2633 ret = ide_do_drive_cmd(drive, &req, ide_wait); 2634 ret = ide_do_drive_cmd(drive, &req, ide_wait);
2634 2635
2635 /* 2636 /*
@@ -3116,9 +3117,9 @@ static int ide_cdrom_prep_pc(struct request *rq)
3116 3117
3117static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq) 3118static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
3118{ 3119{
3119 if (rq->flags & REQ_CMD) 3120 if (blk_fs_request(rq))
3120 return ide_cdrom_prep_fs(q, rq); 3121 return ide_cdrom_prep_fs(q, rq);
3121 else if (rq->flags & REQ_BLOCK_PC) 3122 else if (blk_pc_request(rq))
3122 return ide_cdrom_prep_pc(rq); 3123 return ide_cdrom_prep_pc(rq);
3123 3124
3124 return 0; 3125 return 0;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7cf3eb023521..0a05a377d66a 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -699,7 +699,8 @@ static void idedisk_prepare_flush(request_queue_t *q, struct request *rq)
699 rq->cmd[0] = WIN_FLUSH_CACHE; 699 rq->cmd[0] = WIN_FLUSH_CACHE;
700 700
701 701
702 rq->flags |= REQ_DRIVE_TASK; 702 rq->cmd_type = REQ_TYPE_ATA_TASK;
703 rq->cmd_flags |= REQ_SOFTBARRIER;
703 rq->buffer = rq->cmd; 704 rq->buffer = rq->cmd;
704} 705}
705 706
@@ -740,7 +741,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
740 if (drive->special.b.set_multmode) 741 if (drive->special.b.set_multmode)
741 return -EBUSY; 742 return -EBUSY;
742 ide_init_drive_cmd (&rq); 743 ide_init_drive_cmd (&rq);
743 rq.flags = REQ_DRIVE_CMD; 744 rq.cmd_type = REQ_TYPE_ATA_CMD;
744 drive->mult_req = arg; 745 drive->mult_req = arg;
745 drive->special.b.set_multmode = 1; 746 drive->special.b.set_multmode = 1;
746 (void) ide_do_drive_cmd (drive, &rq, ide_wait); 747 (void) ide_do_drive_cmd (drive, &rq, ide_wait);
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 7c3a13e1cf64..c3546fe9af63 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -205,7 +205,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq)
205 ide_hwif_t *hwif = HWIF(drive); 205 ide_hwif_t *hwif = HWIF(drive);
206 struct scatterlist *sg = hwif->sg_table; 206 struct scatterlist *sg = hwif->sg_table;
207 207
208 BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256); 208 BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256);
209 209
210 ide_map_sg(drive, rq); 210 ide_map_sg(drive, rq);
211 211
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index adbe9f76a505..8ccee9c769f8 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -588,7 +588,7 @@ static int idefloppy_do_end_request(ide_drive_t *drive, int uptodate, int nsecs)
588 /* Why does this happen? */ 588 /* Why does this happen? */
589 if (!rq) 589 if (!rq)
590 return 0; 590 return 0;
591 if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) { 591 if (!blk_special_request(rq)) {
592 /* our real local end request function */ 592 /* our real local end request function */
593 ide_end_request(drive, uptodate, nsecs); 593 ide_end_request(drive, uptodate, nsecs);
594 return 0; 594 return 0;
@@ -689,7 +689,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc
689 689
690 ide_init_drive_cmd(rq); 690 ide_init_drive_cmd(rq);
691 rq->buffer = (char *) pc; 691 rq->buffer = (char *) pc;
692 rq->flags = REQ_SPECIAL; //rq->cmd = IDEFLOPPY_PC_RQ; 692 rq->cmd_type = REQ_TYPE_SPECIAL;
693 rq->rq_disk = floppy->disk; 693 rq->rq_disk = floppy->disk;
694 (void) ide_do_drive_cmd(drive, rq, ide_preempt); 694 (void) ide_do_drive_cmd(drive, rq, ide_preempt);
695} 695}
@@ -1250,7 +1250,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t
1250 pc->callback = &idefloppy_rw_callback; 1250 pc->callback = &idefloppy_rw_callback;
1251 pc->rq = rq; 1251 pc->rq = rq;
1252 pc->b_count = cmd == READ ? 0 : rq->bio->bi_size; 1252 pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
1253 if (rq->flags & REQ_RW) 1253 if (rq->cmd_flags & REQ_RW)
1254 set_bit(PC_WRITING, &pc->flags); 1254 set_bit(PC_WRITING, &pc->flags);
1255 pc->buffer = NULL; 1255 pc->buffer = NULL;
1256 pc->request_transfer = pc->buffer_size = blocks * floppy->block_size; 1256 pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
@@ -1281,8 +1281,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
1281 idefloppy_pc_t *pc; 1281 idefloppy_pc_t *pc;
1282 unsigned long block = (unsigned long)block_s; 1282 unsigned long block = (unsigned long)block_s;
1283 1283
1284 debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n", 1284 debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n",
1285 rq->rq_status,
1286 rq->rq_disk ? rq->rq_disk->disk_name : "?", 1285 rq->rq_disk ? rq->rq_disk->disk_name : "?",
1287 rq->flags, rq->errors); 1286 rq->flags, rq->errors);
1288 debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, " 1287 debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, "
@@ -1303,7 +1302,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
1303 idefloppy_do_end_request(drive, 0, 0); 1302 idefloppy_do_end_request(drive, 0, 0);
1304 return ide_stopped; 1303 return ide_stopped;
1305 } 1304 }
1306 if (rq->flags & REQ_CMD) { 1305 if (blk_fs_request(rq)) {
1307 if (((long)rq->sector % floppy->bs_factor) || 1306 if (((long)rq->sector % floppy->bs_factor) ||
1308 (rq->nr_sectors % floppy->bs_factor)) { 1307 (rq->nr_sectors % floppy->bs_factor)) {
1309 printk("%s: unsupported r/w request size\n", 1308 printk("%s: unsupported r/w request size\n",
@@ -1313,9 +1312,9 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
1313 } 1312 }
1314 pc = idefloppy_next_pc_storage(drive); 1313 pc = idefloppy_next_pc_storage(drive);
1315 idefloppy_create_rw_cmd(floppy, pc, rq, block); 1314 idefloppy_create_rw_cmd(floppy, pc, rq, block);
1316 } else if (rq->flags & REQ_SPECIAL) { 1315 } else if (blk_special_request(rq)) {
1317 pc = (idefloppy_pc_t *) rq->buffer; 1316 pc = (idefloppy_pc_t *) rq->buffer;
1318 } else if (rq->flags & REQ_BLOCK_PC) { 1317 } else if (blk_pc_request(rq)) {
1319 pc = idefloppy_next_pc_storage(drive); 1318 pc = idefloppy_next_pc_storage(drive);
1320 if (idefloppy_blockpc_cmd(floppy, pc, rq)) { 1319 if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
1321 idefloppy_do_end_request(drive, 0, 0); 1320 idefloppy_do_end_request(drive, 0, 0);
@@ -1343,7 +1342,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc)
1343 1342
1344 ide_init_drive_cmd (&rq); 1343 ide_init_drive_cmd (&rq);
1345 rq.buffer = (char *) pc; 1344 rq.buffer = (char *) pc;
1346 rq.flags = REQ_SPECIAL; // rq.cmd = IDEFLOPPY_PC_RQ; 1345 rq.cmd_type = REQ_TYPE_SPECIAL;
1347 rq.rq_disk = floppy->disk; 1346 rq.rq_disk = floppy->disk;
1348 1347
1349 return ide_do_drive_cmd(drive, &rq, ide_wait); 1348 return ide_do_drive_cmd(drive, &rq, ide_wait);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index fb6795236e76..38479a29d3e1 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -59,7 +59,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
59{ 59{
60 int ret = 1; 60 int ret = 1;
61 61
62 BUG_ON(!(rq->flags & REQ_STARTED)); 62 BUG_ON(!blk_rq_started(rq));
63 63
64 /* 64 /*
65 * if failfast is set on a request, override number of sectors and 65 * if failfast is set on a request, override number of sectors and
@@ -141,7 +141,7 @@ enum {
141 141
142static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error) 142static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error)
143{ 143{
144 struct request_pm_state *pm = rq->end_io_data; 144 struct request_pm_state *pm = rq->data;
145 145
146 if (drive->media != ide_disk) 146 if (drive->media != ide_disk)
147 return; 147 return;
@@ -164,7 +164,7 @@ static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 s
164 164
165static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) 165static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
166{ 166{
167 struct request_pm_state *pm = rq->end_io_data; 167 struct request_pm_state *pm = rq->data;
168 ide_task_t *args = rq->special; 168 ide_task_t *args = rq->special;
169 169
170 memset(args, 0, sizeof(*args)); 170 memset(args, 0, sizeof(*args));
@@ -244,7 +244,7 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
244 244
245 spin_lock_irqsave(&ide_lock, flags); 245 spin_lock_irqsave(&ide_lock, flags);
246 246
247 BUG_ON(!(rq->flags & REQ_STARTED)); 247 BUG_ON(!blk_rq_started(rq));
248 248
249 /* 249 /*
250 * if failfast is set on a request, override number of sectors and 250 * if failfast is set on a request, override number of sectors and
@@ -366,7 +366,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
366 rq = HWGROUP(drive)->rq; 366 rq = HWGROUP(drive)->rq;
367 spin_unlock_irqrestore(&ide_lock, flags); 367 spin_unlock_irqrestore(&ide_lock, flags);
368 368
369 if (rq->flags & REQ_DRIVE_CMD) { 369 if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
370 u8 *args = (u8 *) rq->buffer; 370 u8 *args = (u8 *) rq->buffer;
371 if (rq->errors == 0) 371 if (rq->errors == 0)
372 rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); 372 rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -376,7 +376,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
376 args[1] = err; 376 args[1] = err;
377 args[2] = hwif->INB(IDE_NSECTOR_REG); 377 args[2] = hwif->INB(IDE_NSECTOR_REG);
378 } 378 }
379 } else if (rq->flags & REQ_DRIVE_TASK) { 379 } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
380 u8 *args = (u8 *) rq->buffer; 380 u8 *args = (u8 *) rq->buffer;
381 if (rq->errors == 0) 381 if (rq->errors == 0)
382 rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); 382 rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -390,7 +390,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
390 args[5] = hwif->INB(IDE_HCYL_REG); 390 args[5] = hwif->INB(IDE_HCYL_REG);
391 args[6] = hwif->INB(IDE_SELECT_REG); 391 args[6] = hwif->INB(IDE_SELECT_REG);
392 } 392 }
393 } else if (rq->flags & REQ_DRIVE_TASKFILE) { 393 } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
394 ide_task_t *args = (ide_task_t *) rq->special; 394 ide_task_t *args = (ide_task_t *) rq->special;
395 if (rq->errors == 0) 395 if (rq->errors == 0)
396 rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); 396 rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -421,7 +421,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
421 } 421 }
422 } 422 }
423 } else if (blk_pm_request(rq)) { 423 } else if (blk_pm_request(rq)) {
424 struct request_pm_state *pm = rq->end_io_data; 424 struct request_pm_state *pm = rq->data;
425#ifdef DEBUG_PM 425#ifdef DEBUG_PM
426 printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n", 426 printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n",
427 drive->name, rq->pm->pm_step, stat, err); 427 drive->name, rq->pm->pm_step, stat, err);
@@ -587,7 +587,7 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
587 return ide_stopped; 587 return ide_stopped;
588 588
589 /* retry only "normal" I/O: */ 589 /* retry only "normal" I/O: */
590 if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) { 590 if (!blk_fs_request(rq)) {
591 rq->errors = 1; 591 rq->errors = 1;
592 ide_end_drive_cmd(drive, stat, err); 592 ide_end_drive_cmd(drive, stat, err);
593 return ide_stopped; 593 return ide_stopped;
@@ -638,7 +638,7 @@ ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg)
638 return ide_stopped; 638 return ide_stopped;
639 639
640 /* retry only "normal" I/O: */ 640 /* retry only "normal" I/O: */
641 if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) { 641 if (!blk_fs_request(rq)) {
642 rq->errors = 1; 642 rq->errors = 1;
643 ide_end_drive_cmd(drive, BUSY_STAT, 0); 643 ide_end_drive_cmd(drive, BUSY_STAT, 0);
644 return ide_stopped; 644 return ide_stopped;
@@ -808,7 +808,7 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
808 if (hwif->sg_mapped) /* needed by ide-scsi */ 808 if (hwif->sg_mapped) /* needed by ide-scsi */
809 return; 809 return;
810 810
811 if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) { 811 if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
812 hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); 812 hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
813 } else { 813 } else {
814 sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE); 814 sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
@@ -844,7 +844,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
844 struct request *rq) 844 struct request *rq)
845{ 845{
846 ide_hwif_t *hwif = HWIF(drive); 846 ide_hwif_t *hwif = HWIF(drive);
847 if (rq->flags & REQ_DRIVE_TASKFILE) { 847 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
848 ide_task_t *args = rq->special; 848 ide_task_t *args = rq->special;
849 849
850 if (!args) 850 if (!args)
@@ -866,7 +866,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
866 if (args->tf_out_flags.all != 0) 866 if (args->tf_out_flags.all != 0)
867 return flagged_taskfile(drive, args); 867 return flagged_taskfile(drive, args);
868 return do_rw_taskfile(drive, args); 868 return do_rw_taskfile(drive, args);
869 } else if (rq->flags & REQ_DRIVE_TASK) { 869 } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
870 u8 *args = rq->buffer; 870 u8 *args = rq->buffer;
871 u8 sel; 871 u8 sel;
872 872
@@ -892,7 +892,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
892 hwif->OUTB(sel, IDE_SELECT_REG); 892 hwif->OUTB(sel, IDE_SELECT_REG);
893 ide_cmd(drive, args[0], args[2], &drive_cmd_intr); 893 ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
894 return ide_started; 894 return ide_started;
895 } else if (rq->flags & REQ_DRIVE_CMD) { 895 } else if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
896 u8 *args = rq->buffer; 896 u8 *args = rq->buffer;
897 897
898 if (!args) 898 if (!args)
@@ -933,7 +933,7 @@ done:
933 933
934static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) 934static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
935{ 935{
936 struct request_pm_state *pm = rq->end_io_data; 936 struct request_pm_state *pm = rq->data;
937 937
938 if (blk_pm_suspend_request(rq) && 938 if (blk_pm_suspend_request(rq) &&
939 pm->pm_step == ide_pm_state_start_suspend) 939 pm->pm_step == ide_pm_state_start_suspend)
@@ -980,7 +980,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
980 ide_startstop_t startstop; 980 ide_startstop_t startstop;
981 sector_t block; 981 sector_t block;
982 982
983 BUG_ON(!(rq->flags & REQ_STARTED)); 983 BUG_ON(!blk_rq_started(rq));
984 984
985#ifdef DEBUG 985#ifdef DEBUG
986 printk("%s: start_request: current=0x%08lx\n", 986 printk("%s: start_request: current=0x%08lx\n",
@@ -1013,12 +1013,12 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
1013 if (!drive->special.all) { 1013 if (!drive->special.all) {
1014 ide_driver_t *drv; 1014 ide_driver_t *drv;
1015 1015
1016 if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) 1016 if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
1017 return execute_drive_cmd(drive, rq); 1017 rq->cmd_type == REQ_TYPE_ATA_TASK ||
1018 else if (rq->flags & REQ_DRIVE_TASKFILE) 1018 rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
1019 return execute_drive_cmd(drive, rq); 1019 return execute_drive_cmd(drive, rq);
1020 else if (blk_pm_request(rq)) { 1020 else if (blk_pm_request(rq)) {
1021 struct request_pm_state *pm = rq->end_io_data; 1021 struct request_pm_state *pm = rq->data;
1022#ifdef DEBUG_PM 1022#ifdef DEBUG_PM
1023 printk("%s: start_power_step(step: %d)\n", 1023 printk("%s: start_power_step(step: %d)\n",
1024 drive->name, rq->pm->pm_step); 1024 drive->name, rq->pm->pm_step);
@@ -1264,7 +1264,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
1264 * We count how many times we loop here to make sure we service 1264 * We count how many times we loop here to make sure we service
1265 * all drives in the hwgroup without looping for ever 1265 * all drives in the hwgroup without looping for ever
1266 */ 1266 */
1267 if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) { 1267 if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) {
1268 drive = drive->next ? drive->next : hwgroup->drive; 1268 drive = drive->next ? drive->next : hwgroup->drive;
1269 if (loops++ < 4 && !blk_queue_plugged(drive->queue)) 1269 if (loops++ < 4 && !blk_queue_plugged(drive->queue))
1270 goto again; 1270 goto again;
@@ -1670,7 +1670,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
1670void ide_init_drive_cmd (struct request *rq) 1670void ide_init_drive_cmd (struct request *rq)
1671{ 1671{
1672 memset(rq, 0, sizeof(*rq)); 1672 memset(rq, 0, sizeof(*rq));
1673 rq->flags = REQ_DRIVE_CMD; 1673 rq->cmd_type = REQ_TYPE_ATA_CMD;
1674 rq->ref_count = 1; 1674 rq->ref_count = 1;
1675} 1675}
1676 1676
@@ -1710,7 +1710,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
1710 int must_wait = (action == ide_wait || action == ide_head_wait); 1710 int must_wait = (action == ide_wait || action == ide_head_wait);
1711 1711
1712 rq->errors = 0; 1712 rq->errors = 0;
1713 rq->rq_status = RQ_ACTIVE;
1714 1713
1715 /* 1714 /*
1716 * we need to hold an extra reference to request for safe inspection 1715 * we need to hold an extra reference to request for safe inspection
@@ -1718,7 +1717,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
1718 */ 1717 */
1719 if (must_wait) { 1718 if (must_wait) {
1720 rq->ref_count++; 1719 rq->ref_count++;
1721 rq->waiting = &wait; 1720 rq->end_io_data = &wait;
1722 rq->end_io = blk_end_sync_rq; 1721 rq->end_io = blk_end_sync_rq;
1723 } 1722 }
1724 1723
@@ -1727,7 +1726,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
1727 hwgroup->rq = NULL; 1726 hwgroup->rq = NULL;
1728 if (action == ide_preempt || action == ide_head_wait) { 1727 if (action == ide_preempt || action == ide_head_wait) {
1729 where = ELEVATOR_INSERT_FRONT; 1728 where = ELEVATOR_INSERT_FRONT;
1730 rq->flags |= REQ_PREEMPT; 1729 rq->cmd_flags |= REQ_PREEMPT;
1731 } 1730 }
1732 __elv_add_request(drive->queue, rq, where, 0); 1731 __elv_add_request(drive->queue, rq, where, 0);
1733 ide_do_request(hwgroup, IDE_NO_IRQ); 1732 ide_do_request(hwgroup, IDE_NO_IRQ);
@@ -1736,7 +1735,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
1736 err = 0; 1735 err = 0;
1737 if (must_wait) { 1736 if (must_wait) {
1738 wait_for_completion(&wait); 1737 wait_for_completion(&wait);
1739 rq->waiting = NULL;
1740 if (rq->errors) 1738 if (rq->errors)
1741 err = -EIO; 1739 err = -EIO;
1742 1740
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 1feff23487d4..850ef63cc986 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -456,13 +456,14 @@ static void ide_dump_opcode(ide_drive_t *drive)
456 spin_unlock(&ide_lock); 456 spin_unlock(&ide_lock);
457 if (!rq) 457 if (!rq)
458 return; 458 return;
459 if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { 459 if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
460 rq->cmd_type == REQ_TYPE_ATA_TASK) {
460 char *args = rq->buffer; 461 char *args = rq->buffer;
461 if (args) { 462 if (args) {
462 opcode = args[0]; 463 opcode = args[0];
463 found = 1; 464 found = 1;
464 } 465 }
465 } else if (rq->flags & REQ_DRIVE_TASKFILE) { 466 } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
466 ide_task_t *args = rq->special; 467 ide_task_t *args = rq->special;
467 if (args) { 468 if (args) {
468 task_struct_t *tf = (task_struct_t *) args->tfRegister; 469 task_struct_t *tf = (task_struct_t *) args->tfRegister;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 7067ab997927..2ebc3760f261 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1776,7 +1776,7 @@ static void idetape_create_request_sense_cmd (idetape_pc_t *pc)
1776static void idetape_init_rq(struct request *rq, u8 cmd) 1776static void idetape_init_rq(struct request *rq, u8 cmd)
1777{ 1777{
1778 memset(rq, 0, sizeof(*rq)); 1778 memset(rq, 0, sizeof(*rq));
1779 rq->flags = REQ_SPECIAL; 1779 rq->cmd_type = REQ_TYPE_SPECIAL;
1780 rq->cmd[0] = cmd; 1780 rq->cmd[0] = cmd;
1781} 1781}
1782 1782
@@ -2423,8 +2423,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
2423#if IDETAPE_DEBUG_LOG 2423#if IDETAPE_DEBUG_LOG
2424#if 0 2424#if 0
2425 if (tape->debug_level >= 5) 2425 if (tape->debug_level >= 5)
2426 printk(KERN_INFO "ide-tape: rq_status: %d, " 2426 printk(KERN_INFO "ide-tape: %d, "
2427 "dev: %s, cmd: %ld, errors: %d\n", rq->rq_status, 2427 "dev: %s, cmd: %ld, errors: %d\n",
2428 rq->rq_disk->disk_name, rq->cmd[0], rq->errors); 2428 rq->rq_disk->disk_name, rq->cmd[0], rq->errors);
2429#endif 2429#endif
2430 if (tape->debug_level >= 2) 2430 if (tape->debug_level >= 2)
@@ -2433,12 +2433,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
2433 rq->sector, rq->nr_sectors, rq->current_nr_sectors); 2433 rq->sector, rq->nr_sectors, rq->current_nr_sectors);
2434#endif /* IDETAPE_DEBUG_LOG */ 2434#endif /* IDETAPE_DEBUG_LOG */
2435 2435
2436 if ((rq->flags & REQ_SPECIAL) == 0) { 2436 if (!blk_special_request(rq)) {
2437 /* 2437 /*
2438 * We do not support buffer cache originated requests. 2438 * We do not support buffer cache originated requests.
2439 */ 2439 */
2440 printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " 2440 printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
2441 "request queue (%ld)\n", drive->name, rq->flags); 2441 "request queue (%d)\n", drive->name, rq->cmd_type);
2442 ide_end_request(drive, 0, 0); 2442 ide_end_request(drive, 0, 0);
2443 return ide_stopped; 2443 return ide_stopped;
2444 } 2444 }
@@ -2768,12 +2768,12 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq)
2768 idetape_tape_t *tape = drive->driver_data; 2768 idetape_tape_t *tape = drive->driver_data;
2769 2769
2770#if IDETAPE_DEBUG_BUGS 2770#if IDETAPE_DEBUG_BUGS
2771 if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) { 2771 if (rq == NULL || !blk_special_request(rq)) {
2772 printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n"); 2772 printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
2773 return; 2773 return;
2774 } 2774 }
2775#endif /* IDETAPE_DEBUG_BUGS */ 2775#endif /* IDETAPE_DEBUG_BUGS */
2776 rq->waiting = &wait; 2776 rq->end_io_data = &wait;
2777 rq->end_io = blk_end_sync_rq; 2777 rq->end_io = blk_end_sync_rq;
2778 spin_unlock_irq(&tape->spinlock); 2778 spin_unlock_irq(&tape->spinlock);
2779 wait_for_completion(&wait); 2779 wait_for_completion(&wait);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 97a9244312fc..1d0470c1f957 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -363,7 +363,7 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
363 363
364static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat) 364static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
365{ 365{
366 if (rq->flags & REQ_DRIVE_TASKFILE) { 366 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
367 ide_task_t *task = rq->special; 367 ide_task_t *task = rq->special;
368 368
369 if (task->tf_out_flags.all) { 369 if (task->tf_out_flags.all) {
@@ -474,7 +474,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
474 struct request rq; 474 struct request rq;
475 475
476 memset(&rq, 0, sizeof(rq)); 476 memset(&rq, 0, sizeof(rq));
477 rq.flags = REQ_DRIVE_TASKFILE; 477 rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
478 rq.buffer = buf; 478 rq.buffer = buf;
479 479
480 /* 480 /*
@@ -499,7 +499,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
499 rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors; 499 rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
500 500
501 if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE) 501 if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
502 rq.flags |= REQ_RW; 502 rq.cmd_flags |= REQ_RW;
503 } 503 }
504 504
505 rq.special = args; 505 rq.special = args;
@@ -737,7 +737,7 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf)
737 struct request rq; 737 struct request rq;
738 738
739 ide_init_drive_cmd(&rq); 739 ide_init_drive_cmd(&rq);
740 rq.flags = REQ_DRIVE_TASK; 740 rq.cmd_type = REQ_TYPE_ATA_TASK;
741 rq.buffer = buf; 741 rq.buffer = buf;
742 return ide_do_drive_cmd(drive, &rq, ide_wait); 742 return ide_do_drive_cmd(drive, &rq, ide_wait);
743} 743}
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 9c8468de1a75..2b1a1389c318 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -1217,9 +1217,9 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
1217 memset(&rq, 0, sizeof(rq)); 1217 memset(&rq, 0, sizeof(rq));
1218 memset(&rqpm, 0, sizeof(rqpm)); 1218 memset(&rqpm, 0, sizeof(rqpm));
1219 memset(&args, 0, sizeof(args)); 1219 memset(&args, 0, sizeof(args));
1220 rq.flags = REQ_PM_SUSPEND; 1220 rq.cmd_type = REQ_TYPE_PM_SUSPEND;
1221 rq.special = &args; 1221 rq.special = &args;
1222 rq.end_io_data = &rqpm; 1222 rq.data = &rqpm;
1223 rqpm.pm_step = ide_pm_state_start_suspend; 1223 rqpm.pm_step = ide_pm_state_start_suspend;
1224 if (mesg.event == PM_EVENT_PRETHAW) 1224 if (mesg.event == PM_EVENT_PRETHAW)
1225 mesg.event = PM_EVENT_FREEZE; 1225 mesg.event = PM_EVENT_FREEZE;
@@ -1238,9 +1238,9 @@ static int generic_ide_resume(struct device *dev)
1238 memset(&rq, 0, sizeof(rq)); 1238 memset(&rq, 0, sizeof(rq));
1239 memset(&rqpm, 0, sizeof(rqpm)); 1239 memset(&rqpm, 0, sizeof(rqpm));
1240 memset(&args, 0, sizeof(args)); 1240 memset(&args, 0, sizeof(args));
1241 rq.flags = REQ_PM_RESUME; 1241 rq.cmd_type = REQ_TYPE_PM_RESUME;
1242 rq.special = &args; 1242 rq.special = &args;
1243 rq.end_io_data = &rqpm; 1243 rq.data = &rqpm;
1244 rqpm.pm_step = ide_pm_state_start_resume; 1244 rqpm.pm_step = ide_pm_state_start_resume;
1245 rqpm.pm_state = PM_EVENT_ON; 1245 rqpm.pm_state = PM_EVENT_ON;
1246 1246
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index aebecd8f51cc..4ab931145673 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -626,7 +626,7 @@ repeat:
626 req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ", 626 req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
627 cyl, head, sec, nsect, req->buffer); 627 cyl, head, sec, nsect, req->buffer);
628#endif 628#endif
629 if (req->flags & REQ_CMD) { 629 if (blk_fs_request(req)) {
630 switch (rq_data_dir(req)) { 630 switch (rq_data_dir(req)) {
631 case READ: 631 case READ:
632 hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr); 632 hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index bf869ed03eed..6dd31a291d84 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -2,6 +2,8 @@
2# Block device driver configuration 2# Block device driver configuration
3# 3#
4 4
5if BLOCK
6
5menu "Multi-device support (RAID and LVM)" 7menu "Multi-device support (RAID and LVM)"
6 8
7config MD 9config MD
@@ -251,3 +253,4 @@ config DM_MULTIPATH_EMC
251 253
252endmenu 254endmenu
253 255
256endif
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 2a374ccb30dd..2b2d45d7baaa 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -126,7 +126,8 @@ static struct request *get_failover_req(struct emc_handler *h,
126 memset(&rq->cmd, 0, BLK_MAX_CDB); 126 memset(&rq->cmd, 0, BLK_MAX_CDB);
127 127
128 rq->timeout = EMC_FAILOVER_TIMEOUT; 128 rq->timeout = EMC_FAILOVER_TIMEOUT;
129 rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE); 129 rq->cmd_type = REQ_TYPE_BLOCK_PC;
130 rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
130 131
131 return rq; 132 return rq;
132} 133}
diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig
index fef677103880..6443392bffff 100644
--- a/drivers/message/i2o/Kconfig
+++ b/drivers/message/i2o/Kconfig
@@ -88,7 +88,7 @@ config I2O_BUS
88 88
89config I2O_BLOCK 89config I2O_BLOCK
90 tristate "I2O Block OSM" 90 tristate "I2O Block OSM"
91 depends on I2O 91 depends on I2O && BLOCK
92 ---help--- 92 ---help---
93 Include support for the I2O Block OSM. The Block OSM presents disk 93 Include support for the I2O Block OSM. The Block OSM presents disk
94 and other structured block devices to the operating system. If you 94 and other structured block devices to the operating system. If you
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 1ddc2fb429d5..eaba81bf2eca 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -390,9 +390,9 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
390 } 390 }
391 391
392 /* request is already processed by us, so return */ 392 /* request is already processed by us, so return */
393 if (req->flags & REQ_SPECIAL) { 393 if (blk_special_request(req)) {
394 osm_debug("REQ_SPECIAL already set!\n"); 394 osm_debug("REQ_SPECIAL already set!\n");
395 req->flags |= REQ_DONTPREP; 395 req->cmd_flags |= REQ_DONTPREP;
396 return BLKPREP_OK; 396 return BLKPREP_OK;
397 } 397 }
398 398
@@ -411,7 +411,8 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
411 ireq = req->special; 411 ireq = req->special;
412 412
413 /* do not come back here */ 413 /* do not come back here */
414 req->flags |= REQ_DONTPREP | REQ_SPECIAL; 414 req->cmd_type = REQ_TYPE_SPECIAL;
415 req->cmd_flags |= REQ_DONTPREP;
415 416
416 return BLKPREP_OK; 417 return BLKPREP_OK;
417}; 418};
diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig
index 45bcf098e762..f540bd88dc5a 100644
--- a/drivers/mmc/Kconfig
+++ b/drivers/mmc/Kconfig
@@ -21,7 +21,7 @@ config MMC_DEBUG
21 21
22config MMC_BLOCK 22config MMC_BLOCK
23 tristate "MMC block device driver" 23 tristate "MMC block device driver"
24 depends on MMC 24 depends on MMC && BLOCK
25 default y 25 default y
26 help 26 help
27 Say Y here to enable the MMC block device driver support. 27 Say Y here to enable the MMC block device driver support.
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index d2957e35cc6f..b1f6e03e7aa9 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -24,7 +24,8 @@ obj-$(CONFIG_MMC_AU1X) += au1xmmc.o
24obj-$(CONFIG_MMC_OMAP) += omap.o 24obj-$(CONFIG_MMC_OMAP) += omap.o
25obj-$(CONFIG_MMC_AT91RM9200) += at91_mci.o 25obj-$(CONFIG_MMC_AT91RM9200) += at91_mci.o
26 26
27mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o 27mmc_core-y := mmc.o mmc_sysfs.o
28mmc_core-$(CONFIG_BLOCK) += mmc_queue.o
28 29
29ifeq ($(CONFIG_MMC_DEBUG),y) 30ifeq ($(CONFIG_MMC_DEBUG),y)
30EXTRA_CFLAGS += -DDEBUG 31EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/mmc/mmc_queue.c b/drivers/mmc/mmc_queue.c
index 74f8cdeeff0f..4ccdd82b680f 100644
--- a/drivers/mmc/mmc_queue.c
+++ b/drivers/mmc/mmc_queue.c
@@ -28,7 +28,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
28 struct mmc_queue *mq = q->queuedata; 28 struct mmc_queue *mq = q->queuedata;
29 int ret = BLKPREP_KILL; 29 int ret = BLKPREP_KILL;
30 30
31 if (req->flags & REQ_SPECIAL) { 31 if (blk_special_request(req)) {
32 /* 32 /*
33 * Special commands already have the command 33 * Special commands already have the command
34 * blocks already setup in req->special. 34 * blocks already setup in req->special.
@@ -36,7 +36,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
36 BUG_ON(!req->special); 36 BUG_ON(!req->special);
37 37
38 ret = BLKPREP_OK; 38 ret = BLKPREP_OK;
39 } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { 39 } else if (blk_fs_request(req) || blk_pc_request(req)) {
40 /* 40 /*
41 * Block I/O requests need translating according 41 * Block I/O requests need translating according
42 * to the protocol. 42 * to the protocol.
@@ -50,7 +50,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
50 } 50 }
51 51
52 if (ret == BLKPREP_OK) 52 if (ret == BLKPREP_OK)
53 req->flags |= REQ_DONTPREP; 53 req->cmd_flags |= REQ_DONTPREP;
54 54
55 return ret; 55 return ret;
56} 56}
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index a03e862851db..a304b34c2632 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -166,7 +166,7 @@ config MTD_CHAR
166 166
167config MTD_BLOCK 167config MTD_BLOCK
168 tristate "Caching block device access to MTD devices" 168 tristate "Caching block device access to MTD devices"
169 depends on MTD 169 depends on MTD && BLOCK
170 ---help--- 170 ---help---
171 Although most flash chips have an erase size too large to be useful 171 Although most flash chips have an erase size too large to be useful
172 as block devices, it is possible to use MTD devices which are based 172 as block devices, it is possible to use MTD devices which are based
@@ -188,7 +188,7 @@ config MTD_BLOCK
188 188
189config MTD_BLOCK_RO 189config MTD_BLOCK_RO
190 tristate "Readonly block device access to MTD devices" 190 tristate "Readonly block device access to MTD devices"
191 depends on MTD_BLOCK!=y && MTD 191 depends on MTD_BLOCK!=y && MTD && BLOCK
192 help 192 help
193 This allows you to mount read-only file systems (such as cramfs) 193 This allows you to mount read-only file systems (such as cramfs)
194 from an MTD device, without the overhead (and danger) of the caching 194 from an MTD device, without the overhead (and danger) of the caching
@@ -199,7 +199,7 @@ config MTD_BLOCK_RO
199 199
200config FTL 200config FTL
201 tristate "FTL (Flash Translation Layer) support" 201 tristate "FTL (Flash Translation Layer) support"
202 depends on MTD 202 depends on MTD && BLOCK
203 ---help--- 203 ---help---
204 This provides support for the original Flash Translation Layer which 204 This provides support for the original Flash Translation Layer which
205 is part of the PCMCIA specification. It uses a kind of pseudo- 205 is part of the PCMCIA specification. It uses a kind of pseudo-
@@ -215,7 +215,7 @@ config FTL
215 215
216config NFTL 216config NFTL
217 tristate "NFTL (NAND Flash Translation Layer) support" 217 tristate "NFTL (NAND Flash Translation Layer) support"
218 depends on MTD 218 depends on MTD && BLOCK
219 ---help--- 219 ---help---
220 This provides support for the NAND Flash Translation Layer which is 220 This provides support for the NAND Flash Translation Layer which is
221 used on M-Systems' DiskOnChip devices. It uses a kind of pseudo- 221 used on M-Systems' DiskOnChip devices. It uses a kind of pseudo-
@@ -238,7 +238,7 @@ config NFTL_RW
238 238
239config INFTL 239config INFTL
240 tristate "INFTL (Inverse NAND Flash Translation Layer) support" 240 tristate "INFTL (Inverse NAND Flash Translation Layer) support"
241 depends on MTD 241 depends on MTD && BLOCK
242 ---help--- 242 ---help---
243 This provides support for the Inverse NAND Flash Translation 243 This provides support for the Inverse NAND Flash Translation
244 Layer which is used on M-Systems' newer DiskOnChip devices. It 244 Layer which is used on M-Systems' newer DiskOnChip devices. It
@@ -255,7 +255,7 @@ config INFTL
255 255
256config RFD_FTL 256config RFD_FTL
257 tristate "Resident Flash Disk (Flash Translation Layer) support" 257 tristate "Resident Flash Disk (Flash Translation Layer) support"
258 depends on MTD 258 depends on MTD && BLOCK
259 ---help--- 259 ---help---
260 This provides support for the flash translation layer known 260 This provides support for the flash translation layer known
261 as the Resident Flash Disk (RFD), as used by the Embedded BIOS 261 as the Resident Flash Disk (RFD), as used by the Embedded BIOS
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 16c02b5ccf7e..440f6851da69 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -136,7 +136,7 @@ config MTDRAM_ABS_POS
136 136
137config MTD_BLOCK2MTD 137config MTD_BLOCK2MTD
138 tristate "MTD using block device" 138 tristate "MTD using block device"
139 depends on MTD 139 depends on MTD && BLOCK
140 help 140 help
141 This driver allows a block device to appear as an MTD. It would 141 This driver allows a block device to appear as an MTD. It would
142 generally be used in the following cases: 142 generally be used in the following cases:
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 458d3c8ae1ee..6baf5fe14230 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -46,7 +46,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
46 nsect = req->current_nr_sectors; 46 nsect = req->current_nr_sectors;
47 buf = req->buffer; 47 buf = req->buffer;
48 48
49 if (!(req->flags & REQ_CMD)) 49 if (!blk_fs_request(req))
50 return 0; 50 return 0;
51 51
52 if (block + nsect > get_capacity(req->rq_disk)) 52 if (block + nsect > get_capacity(req->rq_disk))
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 929d6fff6152..b250c5354503 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -1,4 +1,4 @@
1if S390 1if S390 && BLOCK
2 2
3comment "S/390 block device drivers" 3comment "S/390 block device drivers"
4 depends on S390 4 depends on S390
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 9d051e5687ea..222a8a71a5e8 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -529,7 +529,7 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req)
529 } 529 }
530 cqr->retries = DIAG_MAX_RETRIES; 530 cqr->retries = DIAG_MAX_RETRIES;
531 cqr->buildclk = get_clock(); 531 cqr->buildclk = get_clock();
532 if (req->flags & REQ_FAILFAST) 532 if (req->cmd_flags & REQ_FAILFAST)
533 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); 533 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
534 cqr->device = device; 534 cqr->device = device;
535 cqr->expires = DIAG_TIMEOUT; 535 cqr->expires = DIAG_TIMEOUT;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index b7a7fac3f7c3..5ecea3e4fdef 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1266,7 +1266,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
1266 recid++; 1266 recid++;
1267 } 1267 }
1268 } 1268 }
1269 if (req->flags & REQ_FAILFAST) 1269 if (req->cmd_flags & REQ_FAILFAST)
1270 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); 1270 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
1271 cqr->device = device; 1271 cqr->device = device;
1272 cqr->expires = 5 * 60 * HZ; /* 5 minutes */ 1272 cqr->expires = 5 * 60 * HZ; /* 5 minutes */
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index e85015be109b..80926c548228 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -344,7 +344,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
344 recid++; 344 recid++;
345 } 345 }
346 } 346 }
347 if (req->flags & REQ_FAILFAST) 347 if (req->cmd_flags & REQ_FAILFAST)
348 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); 348 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
349 cqr->device = device; 349 cqr->device = device;
350 cqr->expires = 5 * 60 * HZ; /* 5 minutes */ 350 cqr->expires = 5 * 60 * HZ; /* 5 minutes */
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index c4dfcc91ddda..dab082002e6f 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -3,11 +3,13 @@ menu "SCSI device support"
3config RAID_ATTRS 3config RAID_ATTRS
4 tristate "RAID Transport Class" 4 tristate "RAID Transport Class"
5 default n 5 default n
6 depends on BLOCK
6 ---help--- 7 ---help---
7 Provides RAID 8 Provides RAID
8 9
9config SCSI 10config SCSI
10 tristate "SCSI device support" 11 tristate "SCSI device support"
12 depends on BLOCK
11 ---help--- 13 ---help---
12 If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or 14 If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
13 any other SCSI device under Linux, say Y and make sure that you know 15 any other SCSI device under Linux, say Y and make sure that you know
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 5dcef48d414f..10353379a074 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -2862,7 +2862,7 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb)
2862 aic_dev->r_total++; 2862 aic_dev->r_total++;
2863 ptr = aic_dev->r_bins; 2863 ptr = aic_dev->r_bins;
2864 } 2864 }
2865 if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER) 2865 if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
2866 { 2866 {
2867 aic_dev->barrier_total++; 2867 aic_dev->barrier_total++;
2868 if(scb->tag_action == MSG_ORDERED_Q_TAG) 2868 if(scb->tag_action == MSG_ORDERED_Q_TAG)
@@ -10158,7 +10158,7 @@ aic7xxx_buildscb(struct aic7xxx_host *p, Scsi_Cmnd *cmd,
10158 /* We always force TEST_UNIT_READY to untagged */ 10158 /* We always force TEST_UNIT_READY to untagged */
10159 if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) 10159 if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
10160 { 10160 {
10161 if (req->flags & REQ_HARDBARRIER) 10161 if (req->cmd_flags & REQ_HARDBARRIER)
10162 { 10162 {
10163 if(sdptr->ordered_tags) 10163 if(sdptr->ordered_tags)
10164 { 10164 {
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 94d1de55607f..1427a41e8441 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -344,7 +344,7 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co
344 pc->buffer = buf; 344 pc->buffer = buf;
345 pc->c[0] = REQUEST_SENSE; 345 pc->c[0] = REQUEST_SENSE;
346 pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE; 346 pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
347 rq->flags = REQ_SENSE; 347 rq->cmd_type = REQ_TYPE_SENSE;
348 pc->timeout = jiffies + WAIT_READY; 348 pc->timeout = jiffies + WAIT_READY;
349 /* NOTE! Save the failed packet command in "rq->buffer" */ 349 /* NOTE! Save the failed packet command in "rq->buffer" */
350 rq->buffer = (void *) failed_command->special; 350 rq->buffer = (void *) failed_command->special;
@@ -398,12 +398,12 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
398 int errors = rq->errors; 398 int errors = rq->errors;
399 unsigned long flags; 399 unsigned long flags;
400 400
401 if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) { 401 if (!blk_special_request(rq) && !blk_sense_request(rq)) {
402 ide_end_request(drive, uptodate, nrsecs); 402 ide_end_request(drive, uptodate, nrsecs);
403 return 0; 403 return 0;
404 } 404 }
405 ide_end_drive_cmd (drive, 0, 0); 405 ide_end_drive_cmd (drive, 0, 0);
406 if (rq->flags & REQ_SENSE) { 406 if (blk_sense_request(rq)) {
407 idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer; 407 idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
408 if (log) { 408 if (log) {
409 printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number); 409 printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
@@ -708,11 +708,11 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc)
708static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block) 708static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
709{ 709{
710#if IDESCSI_DEBUG_LOG 710#if IDESCSI_DEBUG_LOG
711 printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors); 711 printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
712 printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); 712 printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
713#endif /* IDESCSI_DEBUG_LOG */ 713#endif /* IDESCSI_DEBUG_LOG */
714 714
715 if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) { 715 if (blk_sense_request(rq) || blk_special_request(rq)) {
716 return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special); 716 return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
717 } 717 }
718 blk_dump_rq_flags(rq, "ide-scsi: unsup command"); 718 blk_dump_rq_flags(rq, "ide-scsi: unsup command");
@@ -938,7 +938,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
938 938
939 ide_init_drive_cmd (rq); 939 ide_init_drive_cmd (rq);
940 rq->special = (char *) pc; 940 rq->special = (char *) pc;
941 rq->flags = REQ_SPECIAL; 941 rq->cmd_type = REQ_TYPE_SPECIAL;
942 spin_unlock_irq(host->host_lock); 942 spin_unlock_irq(host->host_lock);
943 rq->rq_disk = scsi->disk; 943 rq->rq_disk = scsi->disk;
944 (void) ide_do_drive_cmd (drive, rq, ide_end); 944 (void) ide_do_drive_cmd (drive, rq, ide_end);
@@ -992,7 +992,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
992 */ 992 */
993 printk (KERN_ERR "ide-scsi: cmd aborted!\n"); 993 printk (KERN_ERR "ide-scsi: cmd aborted!\n");
994 994
995 if (scsi->pc->rq->flags & REQ_SENSE) 995 if (blk_sense_request(scsi->pc->rq))
996 kfree(scsi->pc->buffer); 996 kfree(scsi->pc->buffer);
997 kfree(scsi->pc->rq); 997 kfree(scsi->pc->rq);
998 kfree(scsi->pc); 998 kfree(scsi->pc);
@@ -1042,7 +1042,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
1042 /* kill current request */ 1042 /* kill current request */
1043 blkdev_dequeue_request(req); 1043 blkdev_dequeue_request(req);
1044 end_that_request_last(req, 0); 1044 end_that_request_last(req, 0);
1045 if (req->flags & REQ_SENSE) 1045 if (blk_sense_request(req))
1046 kfree(scsi->pc->buffer); 1046 kfree(scsi->pc->buffer);
1047 kfree(scsi->pc); 1047 kfree(scsi->pc);
1048 scsi->pc = NULL; 1048 scsi->pc = NULL;
diff --git a/drivers/scsi/pluto.c b/drivers/scsi/pluto.c
index 0bd9c60e6455..aa60a5f1fbc3 100644
--- a/drivers/scsi/pluto.c
+++ b/drivers/scsi/pluto.c
@@ -67,7 +67,6 @@ static void __init pluto_detect_done(Scsi_Cmnd *SCpnt)
67 67
68static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt) 68static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
69{ 69{
70 SCpnt->request->rq_status = RQ_SCSI_DONE;
71 PLND(("Detect done %08lx\n", (long)SCpnt)) 70 PLND(("Detect done %08lx\n", (long)SCpnt))
72 if (atomic_dec_and_test (&fcss)) 71 if (atomic_dec_and_test (&fcss))
73 up(&fc_sem); 72 up(&fc_sem);
@@ -166,7 +165,7 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
166 165
167 SCpnt->cmd_len = COMMAND_SIZE(INQUIRY); 166 SCpnt->cmd_len = COMMAND_SIZE(INQUIRY);
168 167
169 SCpnt->request->rq_status = RQ_SCSI_BUSY; 168 SCpnt->request->cmd_flags &= ~REQ_STARTED;
170 169
171 SCpnt->done = pluto_detect_done; 170 SCpnt->done = pluto_detect_done;
172 SCpnt->request_bufflen = 256; 171 SCpnt->request_bufflen = 256;
@@ -178,7 +177,8 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
178 for (retry = 0; retry < 5; retry++) { 177 for (retry = 0; retry < 5; retry++) {
179 for (i = 0; i < fcscount; i++) { 178 for (i = 0; i < fcscount; i++) {
180 if (!fcs[i].fc) break; 179 if (!fcs[i].fc) break;
181 if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) { 180 if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) {
181 fcs[i].cmd.request->cmd_flags |= REQ_STARTED;
182 disable_irq(fcs[i].fc->irq); 182 disable_irq(fcs[i].fc->irq);
183 PLND(("queuecommand %d %d\n", retry, i)) 183 PLND(("queuecommand %d %d\n", retry, i))
184 fcp_scsi_queuecommand (&(fcs[i].cmd), 184 fcp_scsi_queuecommand (&(fcs[i].cmd),
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 7a054f9d1ee3..da95bce907dd 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -592,12 +592,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
592 return rtn; 592 return rtn;
593} 593}
594 594
595
596/*
597 * Per-CPU I/O completion queue.
598 */
599static DEFINE_PER_CPU(struct list_head, scsi_done_q);
600
601/** 595/**
602 * scsi_req_abort_cmd -- Request command recovery for the specified command 596 * scsi_req_abort_cmd -- Request command recovery for the specified command
603 * cmd: pointer to the SCSI command of interest 597 * cmd: pointer to the SCSI command of interest
@@ -1065,7 +1059,7 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
1065 1059
1066 spin_lock_irqsave(&sdev->list_lock, flags); 1060 spin_lock_irqsave(&sdev->list_lock, flags);
1067 list_for_each_entry(scmd, &sdev->cmd_list, list) { 1061 list_for_each_entry(scmd, &sdev->cmd_list, list) {
1068 if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) { 1062 if (scmd->request) {
1069 /* 1063 /*
1070 * If we are unable to remove the timer, it means 1064 * If we are unable to remove the timer, it means
1071 * that the command has already timed out or 1065 * that the command has already timed out or
@@ -1102,7 +1096,7 @@ MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels");
1102 1096
1103static int __init init_scsi(void) 1097static int __init init_scsi(void)
1104{ 1098{
1105 int error, i; 1099 int error;
1106 1100
1107 error = scsi_init_queue(); 1101 error = scsi_init_queue();
1108 if (error) 1102 if (error)
@@ -1123,9 +1117,6 @@ static int __init init_scsi(void)
1123 if (error) 1117 if (error)
1124 goto cleanup_sysctl; 1118 goto cleanup_sysctl;
1125 1119
1126 for_each_possible_cpu(i)
1127 INIT_LIST_HEAD(&per_cpu(scsi_done_q, i));
1128
1129 scsi_netlink_init(); 1120 scsi_netlink_init();
1130 1121
1131 printk(KERN_NOTICE "SCSI subsystem initialized\n"); 1122 printk(KERN_NOTICE "SCSI subsystem initialized\n");
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d6743b959a72..71084728eb42 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -82,7 +82,7 @@ static void scsi_unprep_request(struct request *req)
82{ 82{
83 struct scsi_cmnd *cmd = req->special; 83 struct scsi_cmnd *cmd = req->special;
84 84
85 req->flags &= ~REQ_DONTPREP; 85 req->cmd_flags &= ~REQ_DONTPREP;
86 req->special = NULL; 86 req->special = NULL;
87 87
88 scsi_put_command(cmd); 88 scsi_put_command(cmd);
@@ -196,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
196 req->sense_len = 0; 196 req->sense_len = 0;
197 req->retries = retries; 197 req->retries = retries;
198 req->timeout = timeout; 198 req->timeout = timeout;
199 req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET; 199 req->cmd_type = REQ_TYPE_BLOCK_PC;
200 req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
200 201
201 /* 202 /*
202 * head injection *required* here otherwise quiesce won't work 203 * head injection *required* here otherwise quiesce won't work
@@ -397,7 +398,8 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
397 req = blk_get_request(sdev->request_queue, write, gfp); 398 req = blk_get_request(sdev->request_queue, write, gfp);
398 if (!req) 399 if (!req)
399 goto free_sense; 400 goto free_sense;
400 req->flags |= REQ_BLOCK_PC | REQ_QUIET; 401 req->cmd_type = REQ_TYPE_BLOCK_PC;
402 req->cmd_flags |= REQ_QUIET;
401 403
402 if (use_sg) 404 if (use_sg)
403 err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp); 405 err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
@@ -933,7 +935,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
933 break; 935 break;
934 } 936 }
935 } 937 }
936 if (!(req->flags & REQ_QUIET)) { 938 if (!(req->cmd_flags & REQ_QUIET)) {
937 scmd_printk(KERN_INFO, cmd, 939 scmd_printk(KERN_INFO, cmd,
938 "Device not ready: "); 940 "Device not ready: ");
939 scsi_print_sense_hdr("", &sshdr); 941 scsi_print_sense_hdr("", &sshdr);
@@ -941,7 +943,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
941 scsi_end_request(cmd, 0, this_count, 1); 943 scsi_end_request(cmd, 0, this_count, 1);
942 return; 944 return;
943 case VOLUME_OVERFLOW: 945 case VOLUME_OVERFLOW:
944 if (!(req->flags & REQ_QUIET)) { 946 if (!(req->cmd_flags & REQ_QUIET)) {
945 scmd_printk(KERN_INFO, cmd, 947 scmd_printk(KERN_INFO, cmd,
946 "Volume overflow, CDB: "); 948 "Volume overflow, CDB: ");
947 __scsi_print_command(cmd->cmnd); 949 __scsi_print_command(cmd->cmnd);
@@ -963,7 +965,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
963 return; 965 return;
964 } 966 }
965 if (result) { 967 if (result) {
966 if (!(req->flags & REQ_QUIET)) { 968 if (!(req->cmd_flags & REQ_QUIET)) {
967 scmd_printk(KERN_INFO, cmd, 969 scmd_printk(KERN_INFO, cmd,
968 "SCSI error: return code = 0x%08x\n", 970 "SCSI error: return code = 0x%08x\n",
969 result); 971 result);
@@ -995,7 +997,7 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
995 /* 997 /*
996 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer 998 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
997 */ 999 */
998 if ((req->flags & REQ_BLOCK_PC) && !req->bio) { 1000 if (blk_pc_request(req) && !req->bio) {
999 cmd->request_bufflen = req->data_len; 1001 cmd->request_bufflen = req->data_len;
1000 cmd->request_buffer = req->data; 1002 cmd->request_buffer = req->data;
1001 req->buffer = req->data; 1003 req->buffer = req->data;
@@ -1139,13 +1141,12 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
1139 * these two cases differently. We differentiate by looking 1141 * these two cases differently. We differentiate by looking
1140 * at request->cmd, as this tells us the real story. 1142 * at request->cmd, as this tells us the real story.
1141 */ 1143 */
1142 if (req->flags & REQ_SPECIAL && req->special) { 1144 if (blk_special_request(req) && req->special)
1143 cmd = req->special; 1145 cmd = req->special;
1144 } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { 1146 else if (blk_pc_request(req) || blk_fs_request(req)) {
1145 1147 if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){
1146 if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) { 1148 if (specials_only == SDEV_QUIESCE ||
1147 if(specials_only == SDEV_QUIESCE || 1149 specials_only == SDEV_BLOCK)
1148 specials_only == SDEV_BLOCK)
1149 goto defer; 1150 goto defer;
1150 1151
1151 sdev_printk(KERN_ERR, sdev, 1152 sdev_printk(KERN_ERR, sdev,
@@ -1153,7 +1154,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
1153 goto kill; 1154 goto kill;
1154 } 1155 }
1155 1156
1156
1157 /* 1157 /*
1158 * Now try and find a command block that we can use. 1158 * Now try and find a command block that we can use.
1159 */ 1159 */
@@ -1184,7 +1184,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
1184 * lock. We hope REQ_STARTED prevents anything untoward from 1184 * lock. We hope REQ_STARTED prevents anything untoward from
1185 * happening now. 1185 * happening now.
1186 */ 1186 */
1187 if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { 1187 if (blk_fs_request(req) || blk_pc_request(req)) {
1188 int ret; 1188 int ret;
1189 1189
1190 /* 1190 /*
@@ -1216,7 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
1216 /* 1216 /*
1217 * Initialize the actual SCSI command for this request. 1217 * Initialize the actual SCSI command for this request.
1218 */ 1218 */
1219 if (req->flags & REQ_BLOCK_PC) { 1219 if (blk_pc_request(req)) {
1220 scsi_setup_blk_pc_cmnd(cmd); 1220 scsi_setup_blk_pc_cmnd(cmd);
1221 } else if (req->rq_disk) { 1221 } else if (req->rq_disk) {
1222 struct scsi_driver *drv; 1222 struct scsi_driver *drv;
@@ -1233,7 +1233,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
1233 /* 1233 /*
1234 * The request is now prepped, no need to come back here 1234 * The request is now prepped, no need to come back here
1235 */ 1235 */
1236 req->flags |= REQ_DONTPREP; 1236 req->cmd_flags |= REQ_DONTPREP;
1237 return BLKPREP_OK; 1237 return BLKPREP_OK;
1238 1238
1239 defer: 1239 defer:
@@ -1454,8 +1454,9 @@ static void scsi_request_fn(struct request_queue *q)
1454 if (unlikely(cmd == NULL)) { 1454 if (unlikely(cmd == NULL)) {
1455 printk(KERN_CRIT "impossible request in %s.\n" 1455 printk(KERN_CRIT "impossible request in %s.\n"
1456 "please mail a stack trace to " 1456 "please mail a stack trace to "
1457 "linux-scsi@vger.kernel.org", 1457 "linux-scsi@vger.kernel.org\n",
1458 __FUNCTION__); 1458 __FUNCTION__);
1459 blk_dump_rq_flags(req, "foo");
1459 BUG(); 1460 BUG();
1460 } 1461 }
1461 spin_lock(shost->host_lock); 1462 spin_lock(shost->host_lock);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 638cff41d436..10bc99c911fa 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -443,8 +443,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
443 SCpnt->cmnd[0] = READ_6; 443 SCpnt->cmnd[0] = READ_6;
444 SCpnt->sc_data_direction = DMA_FROM_DEVICE; 444 SCpnt->sc_data_direction = DMA_FROM_DEVICE;
445 } else { 445 } else {
446 printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags); 446 printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags);
447/* overkill panic("Unknown sd command %lx\n", rq->flags); */
448 return 0; 447 return 0;
449 } 448 }
450 449
@@ -840,7 +839,7 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector)
840static void sd_prepare_flush(request_queue_t *q, struct request *rq) 839static void sd_prepare_flush(request_queue_t *q, struct request *rq)
841{ 840{
842 memset(rq->cmd, 0, sizeof(rq->cmd)); 841 memset(rq->cmd, 0, sizeof(rq->cmd));
843 rq->flags |= REQ_BLOCK_PC; 842 rq->cmd_type = REQ_TYPE_BLOCK_PC;
844 rq->timeout = SD_TIMEOUT; 843 rq->timeout = SD_TIMEOUT;
845 rq->cmd[0] = SYNCHRONIZE_CACHE; 844 rq->cmd[0] = SYNCHRONIZE_CACHE;
846 rq->cmd_len = 10; 845 rq->cmd_len = 10;
diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index 2f8073b73bf3..7f9bcef6adfa 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c
@@ -2017,7 +2017,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance)
2017 if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done 2017 if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
2018 != cmd)) 2018 != cmd))
2019 { 2019 {
2020 if(cmd->request->flags & REQ_CMD) { 2020 if(blk_fs_request(cmd->request)) {
2021 sun3scsi_dma_setup(d, count, 2021 sun3scsi_dma_setup(d, count,
2022 rq_data_dir(cmd->request)); 2022 rq_data_dir(cmd->request));
2023 sun3_dma_setup_done = cmd; 2023 sun3_dma_setup_done = cmd;
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 837173415d4c..44a99aeb8180 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
524static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd, 524static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
525 int write_flag) 525 int write_flag)
526{ 526{
527 if(cmd->request->flags & REQ_CMD) 527 if(blk_fs_request(cmd->request))
528 return wanted; 528 return wanted;
529 else 529 else
530 return 0; 530 return 0;
diff --git a/drivers/scsi/sun3_scsi_vme.c b/drivers/scsi/sun3_scsi_vme.c
index 008a82ab8521..f5742b84b27a 100644
--- a/drivers/scsi/sun3_scsi_vme.c
+++ b/drivers/scsi/sun3_scsi_vme.c
@@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
458static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd, 458static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
459 int write_flag) 459 int write_flag)
460{ 460{
461 if(cmd->request->flags & REQ_CMD) 461 if(blk_fs_request(cmd->request))
462 return wanted; 462 return wanted;
463 else 463 else
464 return 0; 464 return 0;
diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index 86e48c42d6af..422a4b288e34 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig
@@ -8,8 +8,7 @@ comment "may also be needed; see USB_STORAGE Help for more information"
8 8
9config USB_STORAGE 9config USB_STORAGE
10 tristate "USB Mass Storage support" 10 tristate "USB Mass Storage support"
11 depends on USB 11 depends on USB && SCSI
12 select SCSI
13 ---help--- 12 ---help---
14 Say Y here if you want to connect USB mass storage devices to your 13 Say Y here if you want to connect USB mass storage devices to your
15 computer's USB port. This is the driver you need for USB 14 computer's USB port. This is the driver you need for USB
@@ -18,7 +17,7 @@ config USB_STORAGE
18 similar devices. This driver may also be used for some cameras 17 similar devices. This driver may also be used for some cameras
19 and card readers. 18 and card readers.
20 19
21 This option 'selects' (turns on, enables) 'SCSI', but you 20 This option depends on 'SCSI' support being enabled, but you
22 probably also need 'SCSI device support: SCSI disk support' 21 probably also need 'SCSI device support: SCSI disk support'
23 (BLK_DEV_SD) for most USB storage devices. 22 (BLK_DEV_SD) for most USB storage devices.
24 23
diff --git a/fs/Kconfig b/fs/Kconfig
index 4fd9efac29ab..1453d2d164f7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,8 @@
4 4
5menu "File systems" 5menu "File systems"
6 6
7if BLOCK
8
7config EXT2_FS 9config EXT2_FS
8 tristate "Second extended fs support" 10 tristate "Second extended fs support"
9 help 11 help
@@ -399,6 +401,8 @@ config ROMFS_FS
399 If you don't know whether you need it, then you don't need it: 401 If you don't know whether you need it, then you don't need it:
400 answer N. 402 answer N.
401 403
404endif
405
402config INOTIFY 406config INOTIFY
403 bool "Inotify file change notification support" 407 bool "Inotify file change notification support"
404 default y 408 default y
@@ -530,6 +534,7 @@ config FUSE_FS
530 If you want to develop a userspace FS, or if you want to use 534 If you want to develop a userspace FS, or if you want to use
531 a filesystem based on FUSE, answer Y or M. 535 a filesystem based on FUSE, answer Y or M.
532 536
537if BLOCK
533menu "CD-ROM/DVD Filesystems" 538menu "CD-ROM/DVD Filesystems"
534 539
535config ISO9660_FS 540config ISO9660_FS
@@ -597,7 +602,9 @@ config UDF_NLS
597 depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y) 602 depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
598 603
599endmenu 604endmenu
605endif
600 606
607if BLOCK
601menu "DOS/FAT/NT Filesystems" 608menu "DOS/FAT/NT Filesystems"
602 609
603config FAT_FS 610config FAT_FS
@@ -782,6 +789,7 @@ config NTFS_RW
782 It is perfectly safe to say N here. 789 It is perfectly safe to say N here.
783 790
784endmenu 791endmenu
792endif
785 793
786menu "Pseudo filesystems" 794menu "Pseudo filesystems"
787 795
@@ -939,7 +947,7 @@ menu "Miscellaneous filesystems"
939 947
940config ADFS_FS 948config ADFS_FS
941 tristate "ADFS file system support (EXPERIMENTAL)" 949 tristate "ADFS file system support (EXPERIMENTAL)"
942 depends on EXPERIMENTAL 950 depends on BLOCK && EXPERIMENTAL
943 help 951 help
944 The Acorn Disc Filing System is the standard file system of the 952 The Acorn Disc Filing System is the standard file system of the
945 RiscOS operating system which runs on Acorn's ARM-based Risc PC 953 RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -967,7 +975,7 @@ config ADFS_FS_RW
967 975
968config AFFS_FS 976config AFFS_FS
969 tristate "Amiga FFS file system support (EXPERIMENTAL)" 977 tristate "Amiga FFS file system support (EXPERIMENTAL)"
970 depends on EXPERIMENTAL 978 depends on BLOCK && EXPERIMENTAL
971 help 979 help
972 The Fast File System (FFS) is the common file system used on hard 980 The Fast File System (FFS) is the common file system used on hard
973 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y 981 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y
@@ -989,7 +997,7 @@ config AFFS_FS
989 997
990config HFS_FS 998config HFS_FS
991 tristate "Apple Macintosh file system support (EXPERIMENTAL)" 999 tristate "Apple Macintosh file system support (EXPERIMENTAL)"
992 depends on EXPERIMENTAL 1000 depends on BLOCK && EXPERIMENTAL
993 select NLS 1001 select NLS
994 help 1002 help
995 If you say Y here, you will be able to mount Macintosh-formatted 1003 If you say Y here, you will be able to mount Macintosh-formatted
@@ -1002,6 +1010,7 @@ config HFS_FS
1002 1010
1003config HFSPLUS_FS 1011config HFSPLUS_FS
1004 tristate "Apple Extended HFS file system support" 1012 tristate "Apple Extended HFS file system support"
1013 depends on BLOCK
1005 select NLS 1014 select NLS
1006 select NLS_UTF8 1015 select NLS_UTF8
1007 help 1016 help
@@ -1015,7 +1024,7 @@ config HFSPLUS_FS
1015 1024
1016config BEFS_FS 1025config BEFS_FS
1017 tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" 1026 tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
1018 depends on EXPERIMENTAL 1027 depends on BLOCK && EXPERIMENTAL
1019 select NLS 1028 select NLS
1020 help 1029 help
1021 The BeOS File System (BeFS) is the native file system of Be, Inc's 1030 The BeOS File System (BeFS) is the native file system of Be, Inc's
@@ -1042,7 +1051,7 @@ config BEFS_DEBUG
1042 1051
1043config BFS_FS 1052config BFS_FS
1044 tristate "BFS file system support (EXPERIMENTAL)" 1053 tristate "BFS file system support (EXPERIMENTAL)"
1045 depends on EXPERIMENTAL 1054 depends on BLOCK && EXPERIMENTAL
1046 help 1055 help
1047 Boot File System (BFS) is a file system used under SCO UnixWare to 1056 Boot File System (BFS) is a file system used under SCO UnixWare to
1048 allow the bootloader access to the kernel image and other important 1057 allow the bootloader access to the kernel image and other important
@@ -1064,7 +1073,7 @@ config BFS_FS
1064 1073
1065config EFS_FS 1074config EFS_FS
1066 tristate "EFS file system support (read only) (EXPERIMENTAL)" 1075 tristate "EFS file system support (read only) (EXPERIMENTAL)"
1067 depends on EXPERIMENTAL 1076 depends on BLOCK && EXPERIMENTAL
1068 help 1077 help
1069 EFS is an older file system used for non-ISO9660 CD-ROMs and hard 1078 EFS is an older file system used for non-ISO9660 CD-ROMs and hard
1070 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer 1079 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
@@ -1079,7 +1088,7 @@ config EFS_FS
1079 1088
1080config JFFS_FS 1089config JFFS_FS
1081 tristate "Journalling Flash File System (JFFS) support" 1090 tristate "Journalling Flash File System (JFFS) support"
1082 depends on MTD 1091 depends on MTD && BLOCK
1083 help 1092 help
1084 JFFS is the Journaling Flash File System developed by Axis 1093 JFFS is the Journaling Flash File System developed by Axis
1085 Communications in Sweden, aimed at providing a crash/powerdown-safe 1094 Communications in Sweden, aimed at providing a crash/powerdown-safe
@@ -1264,6 +1273,7 @@ endchoice
1264 1273
1265config CRAMFS 1274config CRAMFS
1266 tristate "Compressed ROM file system support (cramfs)" 1275 tristate "Compressed ROM file system support (cramfs)"
1276 depends on BLOCK
1267 select ZLIB_INFLATE 1277 select ZLIB_INFLATE
1268 help 1278 help
1269 Saying Y here includes support for CramFs (Compressed ROM File 1279 Saying Y here includes support for CramFs (Compressed ROM File
@@ -1283,6 +1293,7 @@ config CRAMFS
1283 1293
1284config VXFS_FS 1294config VXFS_FS
1285 tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" 1295 tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
1296 depends on BLOCK
1286 help 1297 help
1287 FreeVxFS is a file system driver that support the VERITAS VxFS(TM) 1298 FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
1288 file system format. VERITAS VxFS(TM) is the standard file system 1299 file system format. VERITAS VxFS(TM) is the standard file system
@@ -1300,6 +1311,7 @@ config VXFS_FS
1300 1311
1301config HPFS_FS 1312config HPFS_FS
1302 tristate "OS/2 HPFS file system support" 1313 tristate "OS/2 HPFS file system support"
1314 depends on BLOCK
1303 help 1315 help
1304 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS 1316 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
1305 is the file system used for organizing files on OS/2 hard disk 1317 is the file system used for organizing files on OS/2 hard disk
@@ -1316,6 +1328,7 @@ config HPFS_FS
1316 1328
1317config QNX4FS_FS 1329config QNX4FS_FS
1318 tristate "QNX4 file system support (read only)" 1330 tristate "QNX4 file system support (read only)"
1331 depends on BLOCK
1319 help 1332 help
1320 This is the file system used by the real-time operating systems 1333 This is the file system used by the real-time operating systems
1321 QNX 4 and QNX 6 (the latter is also called QNX RTP). 1334 QNX 4 and QNX 6 (the latter is also called QNX RTP).
@@ -1343,6 +1356,7 @@ config QNX4FS_RW
1343 1356
1344config SYSV_FS 1357config SYSV_FS
1345 tristate "System V/Xenix/V7/Coherent file system support" 1358 tristate "System V/Xenix/V7/Coherent file system support"
1359 depends on BLOCK
1346 help 1360 help
1347 SCO, Xenix and Coherent are commercial Unix systems for Intel 1361 SCO, Xenix and Coherent are commercial Unix systems for Intel
1348 machines, and Version 7 was used on the DEC PDP-11. Saying Y 1362 machines, and Version 7 was used on the DEC PDP-11. Saying Y
@@ -1381,6 +1395,7 @@ config SYSV_FS
1381 1395
1382config UFS_FS 1396config UFS_FS
1383 tristate "UFS file system support (read only)" 1397 tristate "UFS file system support (read only)"
1398 depends on BLOCK
1384 help 1399 help
1385 BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, 1400 BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
1386 OpenBSD and NeXTstep) use a file system called UFS. Some System V 1401 OpenBSD and NeXTstep) use a file system called UFS. Some System V
@@ -1959,11 +1974,13 @@ config GENERIC_ACL
1959 1974
1960endmenu 1975endmenu
1961 1976
1977if BLOCK
1962menu "Partition Types" 1978menu "Partition Types"
1963 1979
1964source "fs/partitions/Kconfig" 1980source "fs/partitions/Kconfig"
1965 1981
1966endmenu 1982endmenu
1983endif
1967 1984
1968source "fs/nls/Kconfig" 1985source "fs/nls/Kconfig"
1969 1986
diff --git a/fs/Makefile b/fs/Makefile
index 46b8cfe497b2..a503e6ce0f32 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -5,12 +5,18 @@
5# Rewritten to use lists instead of if-statements. 5# Rewritten to use lists instead of if-statements.
6# 6#
7 7
8obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ 8obj-y := open.o read_write.o file_table.o super.o \
9 block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ 9 char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 ioprio.o pnode.o drop_caches.o splice.o sync.o 13 pnode.o drop_caches.o splice.o sync.o
14
15ifeq ($(CONFIG_BLOCK),y)
16obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
17else
18obj-y += no-block.o
19endif
14 20
15obj-$(CONFIG_INOTIFY) += inotify.o 21obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_INOTIFY_USER) += inotify_user.o 22obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 67d6634101fd..2e8c42639eaa 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -16,7 +16,6 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/buffer_head.h>
20#include "volume.h" 19#include "volume.h"
21#include "vnode.h" 20#include "vnode.h"
22#include <rxrpc/call.h> 21#include <rxrpc/call.h>
@@ -37,7 +36,6 @@ struct inode_operations afs_file_inode_operations = {
37 36
38const struct address_space_operations afs_fs_aops = { 37const struct address_space_operations afs_fs_aops = {
39 .readpage = afs_file_readpage, 38 .readpage = afs_file_readpage,
40 .sync_page = block_sync_page,
41 .set_page_dirty = __set_page_dirty_nobuffers, 39 .set_page_dirty = __set_page_dirty_nobuffers,
42 .releasepage = afs_file_releasepage, 40 .releasepage = afs_file_releasepage,
43 .invalidatepage = afs_file_invalidatepage, 41 .invalidatepage = afs_file_invalidatepage,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6eb48e1446ec..bad52433de69 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -46,7 +46,6 @@
46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); 46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47static int load_elf_library(struct file *); 47static int load_elf_library(struct file *);
48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); 48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
50 49
51#ifndef elf_addr_t 50#ifndef elf_addr_t
52#define elf_addr_t unsigned long 51#define elf_addr_t unsigned long
diff --git a/fs/bio.c b/fs/bio.c
index 6a0b9ad8f8c9..8f93e939f213 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2001 Jens Axboe <axboe@suse.de> 2 * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -1142,7 +1142,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
1142 struct biovec_slab *bp = bvec_slabs + i; 1142 struct biovec_slab *bp = bvec_slabs + i;
1143 mempool_t **bvp = bs->bvec_pools + i; 1143 mempool_t **bvp = bs->bvec_pools + i;
1144 1144
1145 if (i >= scale) 1145 if (pool_entries > 1 && i >= scale)
1146 pool_entries >>= 1; 1146 pool_entries >>= 1;
1147 1147
1148 *bvp = mempool_create_slab_pool(pool_entries, bp->slab); 1148 *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4346468139e8..0c361ea7e5a6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -17,11 +17,13 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/blkpg.h> 18#include <linux/blkpg.h>
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include <linux/writeback.h>
20#include <linux/mpage.h> 21#include <linux/mpage.h>
21#include <linux/mount.h> 22#include <linux/mount.h>
22#include <linux/uio.h> 23#include <linux/uio.h>
23#include <linux/namei.h> 24#include <linux/namei.h>
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include "internal.h"
25 27
26struct bdev_inode { 28struct bdev_inode {
27 struct block_device bdev; 29 struct block_device bdev;
@@ -1313,3 +1315,24 @@ void close_bdev_excl(struct block_device *bdev)
1313} 1315}
1314 1316
1315EXPORT_SYMBOL(close_bdev_excl); 1317EXPORT_SYMBOL(close_bdev_excl);
1318
1319int __invalidate_device(struct block_device *bdev)
1320{
1321 struct super_block *sb = get_super(bdev);
1322 int res = 0;
1323
1324 if (sb) {
1325 /*
1326 * no need to lock the super, get_super holds the
1327 * read mutex so the filesystem cannot go away
1328 * under us (->put_super runs with the write lock
1329 * hold).
1330 */
1331 shrink_dcache_sb(sb);
1332 res = invalidate_inodes(sb);
1333 drop_super(sb);
1334 }
1335 invalidate_bdev(bdev, 0);
1336 return res;
1337}
1338EXPORT_SYMBOL(__invalidate_device);
diff --git a/fs/buffer.c b/fs/buffer.c
index 3b6d701073e7..16cfbcd254f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -159,31 +159,6 @@ int sync_blockdev(struct block_device *bdev)
159} 159}
160EXPORT_SYMBOL(sync_blockdev); 160EXPORT_SYMBOL(sync_blockdev);
161 161
162static void __fsync_super(struct super_block *sb)
163{
164 sync_inodes_sb(sb, 0);
165 DQUOT_SYNC(sb);
166 lock_super(sb);
167 if (sb->s_dirt && sb->s_op->write_super)
168 sb->s_op->write_super(sb);
169 unlock_super(sb);
170 if (sb->s_op->sync_fs)
171 sb->s_op->sync_fs(sb, 1);
172 sync_blockdev(sb->s_bdev);
173 sync_inodes_sb(sb, 1);
174}
175
176/*
177 * Write out and wait upon all dirty data associated with this
178 * superblock. Filesystem data as well as the underlying block
179 * device. Takes the superblock lock.
180 */
181int fsync_super(struct super_block *sb)
182{
183 __fsync_super(sb);
184 return sync_blockdev(sb->s_bdev);
185}
186
187/* 162/*
188 * Write out and wait upon all dirty data associated with this 163 * Write out and wait upon all dirty data associated with this
189 * device. Filesystem data as well as the underlying block 164 * device. Filesystem data as well as the underlying block
@@ -260,118 +235,6 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
260EXPORT_SYMBOL(thaw_bdev); 235EXPORT_SYMBOL(thaw_bdev);
261 236
262/* 237/*
263 * sync everything. Start out by waking pdflush, because that writes back
264 * all queues in parallel.
265 */
266static void do_sync(unsigned long wait)
267{
268 wakeup_pdflush(0);
269 sync_inodes(0); /* All mappings, inodes and their blockdevs */
270 DQUOT_SYNC(NULL);
271 sync_supers(); /* Write the superblocks */
272 sync_filesystems(0); /* Start syncing the filesystems */
273 sync_filesystems(wait); /* Waitingly sync the filesystems */
274 sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */
275 if (!wait)
276 printk("Emergency Sync complete\n");
277 if (unlikely(laptop_mode))
278 laptop_sync_completion();
279}
280
281asmlinkage long sys_sync(void)
282{
283 do_sync(1);
284 return 0;
285}
286
287void emergency_sync(void)
288{
289 pdflush_operation(do_sync, 0);
290}
291
292/*
293 * Generic function to fsync a file.
294 *
295 * filp may be NULL if called via the msync of a vma.
296 */
297
298int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
299{
300 struct inode * inode = dentry->d_inode;
301 struct super_block * sb;
302 int ret, err;
303
304 /* sync the inode to buffers */
305 ret = write_inode_now(inode, 0);
306
307 /* sync the superblock to buffers */
308 sb = inode->i_sb;
309 lock_super(sb);
310 if (sb->s_op->write_super)
311 sb->s_op->write_super(sb);
312 unlock_super(sb);
313
314 /* .. finally sync the buffers to disk */
315 err = sync_blockdev(sb->s_bdev);
316 if (!ret)
317 ret = err;
318 return ret;
319}
320
321long do_fsync(struct file *file, int datasync)
322{
323 int ret;
324 int err;
325 struct address_space *mapping = file->f_mapping;
326
327 if (!file->f_op || !file->f_op->fsync) {
328 /* Why? We can still call filemap_fdatawrite */
329 ret = -EINVAL;
330 goto out;
331 }
332
333 ret = filemap_fdatawrite(mapping);
334
335 /*
336 * We need to protect against concurrent writers, which could cause
337 * livelocks in fsync_buffers_list().
338 */
339 mutex_lock(&mapping->host->i_mutex);
340 err = file->f_op->fsync(file, file->f_dentry, datasync);
341 if (!ret)
342 ret = err;
343 mutex_unlock(&mapping->host->i_mutex);
344 err = filemap_fdatawait(mapping);
345 if (!ret)
346 ret = err;
347out:
348 return ret;
349}
350
351static long __do_fsync(unsigned int fd, int datasync)
352{
353 struct file *file;
354 int ret = -EBADF;
355
356 file = fget(fd);
357 if (file) {
358 ret = do_fsync(file, datasync);
359 fput(file);
360 }
361 return ret;
362}
363
364asmlinkage long sys_fsync(unsigned int fd)
365{
366 return __do_fsync(fd, 0);
367}
368
369asmlinkage long sys_fdatasync(unsigned int fd)
370{
371 return __do_fsync(fd, 1);
372}
373
374/*
375 * Various filesystems appear to want __find_get_block to be non-blocking. 238 * Various filesystems appear to want __find_get_block to be non-blocking.
376 * But it's the page lock which protects the buffers. To get around this, 239 * But it's the page lock which protects the buffers. To get around this,
377 * we get exclusion from try_to_free_buffers with the blockdev mapping's 240 * we get exclusion from try_to_free_buffers with the blockdev mapping's
@@ -1551,35 +1414,6 @@ static void discard_buffer(struct buffer_head * bh)
1551} 1414}
1552 1415
1553/** 1416/**
1554 * try_to_release_page() - release old fs-specific metadata on a page
1555 *
1556 * @page: the page which the kernel is trying to free
1557 * @gfp_mask: memory allocation flags (and I/O mode)
1558 *
1559 * The address_space is to try to release any data against the page
1560 * (presumably at page->private). If the release was successful, return `1'.
1561 * Otherwise return zero.
1562 *
1563 * The @gfp_mask argument specifies whether I/O may be performed to release
1564 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
1565 *
1566 * NOTE: @gfp_mask may go away, and this function may become non-blocking.
1567 */
1568int try_to_release_page(struct page *page, gfp_t gfp_mask)
1569{
1570 struct address_space * const mapping = page->mapping;
1571
1572 BUG_ON(!PageLocked(page));
1573 if (PageWriteback(page))
1574 return 0;
1575
1576 if (mapping && mapping->a_ops->releasepage)
1577 return mapping->a_ops->releasepage(page, gfp_mask);
1578 return try_to_free_buffers(page);
1579}
1580EXPORT_SYMBOL(try_to_release_page);
1581
1582/**
1583 * block_invalidatepage - invalidate part of all of a buffer-backed page 1417 * block_invalidatepage - invalidate part of all of a buffer-backed page
1584 * 1418 *
1585 * @page: the page which is affected 1419 * @page: the page which is affected
@@ -1630,14 +1464,6 @@ out:
1630} 1464}
1631EXPORT_SYMBOL(block_invalidatepage); 1465EXPORT_SYMBOL(block_invalidatepage);
1632 1466
1633void do_invalidatepage(struct page *page, unsigned long offset)
1634{
1635 void (*invalidatepage)(struct page *, unsigned long);
1636 invalidatepage = page->mapping->a_ops->invalidatepage ? :
1637 block_invalidatepage;
1638 (*invalidatepage)(page, offset);
1639}
1640
1641/* 1467/*
1642 * We attach and possibly dirty the buffers atomically wrt 1468 * We attach and possibly dirty the buffers atomically wrt
1643 * __set_page_dirty_buffers() via private_lock. try_to_free_buffers 1469 * __set_page_dirty_buffers() via private_lock. try_to_free_buffers
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 1f3285affa39..a885f46ca001 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -24,6 +24,7 @@
24#ifdef CONFIG_KMOD 24#ifdef CONFIG_KMOD
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#endif 26#endif
27#include "internal.h"
27 28
28/* 29/*
29 * capabilities for /dev/mem, /dev/kmem and similar directly mappable character 30 * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ddb012a68023..976a691c5a68 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -25,7 +25,6 @@
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/stat.h> 26#include <linux/stat.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28#include <linux/mpage.h>
29#include <linux/pagemap.h> 28#include <linux/pagemap.h>
30#include <linux/pagevec.h> 29#include <linux/pagevec.h>
31#include <linux/smp_lock.h> 30#include <linux/smp_lock.h>
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b88147c1dc27..05f874c7441b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -19,7 +19,6 @@
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/buffer_head.h>
23#include <linux/stat.h> 22#include <linux/stat.h>
24#include <linux/pagemap.h> 23#include <linux/pagemap.h>
25#include <asm/div64.h> 24#include <asm/div64.h>
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index b0ea6687ab55..e34c7db00f6f 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -22,7 +22,6 @@
22 */ 22 */
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/ext2_fs.h>
26#include "cifspdu.h" 25#include "cifspdu.h"
27#include "cifsglob.h" 26#include "cifsglob.h"
28#include "cifsproto.h" 27#include "cifsproto.h"
@@ -74,7 +73,7 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
74 } 73 }
75 break; 74 break;
76#ifdef CONFIG_CIFS_POSIX 75#ifdef CONFIG_CIFS_POSIX
77 case EXT2_IOC_GETFLAGS: 76 case FS_IOC_GETFLAGS:
78 if(CIFS_UNIX_EXTATTR_CAP & caps) { 77 if(CIFS_UNIX_EXTATTR_CAP & caps) {
79 if (pSMBFile == NULL) 78 if (pSMBFile == NULL)
80 break; 79 break;
@@ -82,12 +81,12 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
82 &ExtAttrBits, &ExtAttrMask); 81 &ExtAttrBits, &ExtAttrMask);
83 if(rc == 0) 82 if(rc == 0)
84 rc = put_user(ExtAttrBits & 83 rc = put_user(ExtAttrBits &
85 EXT2_FL_USER_VISIBLE, 84 FS_FL_USER_VISIBLE,
86 (int __user *)arg); 85 (int __user *)arg);
87 } 86 }
88 break; 87 break;
89 88
90 case EXT2_IOC_SETFLAGS: 89 case FS_IOC_SETFLAGS:
91 if(CIFS_UNIX_EXTATTR_CAP & caps) { 90 if(CIFS_UNIX_EXTATTR_CAP & caps) {
92 if(get_user(ExtAttrBits,(int __user *)arg)) { 91 if(get_user(ExtAttrBits,(int __user *)arg)) {
93 rc = -EFAULT; 92 rc = -EFAULT;
diff --git a/fs/compat.c b/fs/compat.c
index ce982f6e8c80..122b4e3992b5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -52,11 +52,12 @@
52#include <asm/uaccess.h> 52#include <asm/uaccess.h>
53#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
54#include <asm/ioctls.h> 54#include <asm/ioctls.h>
55 55#include "internal.h"
56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
57 56
58int compat_log = 1; 57int compat_log = 1;
59 58
59extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
60
60int compat_printk(const char *fmt, ...) 61int compat_printk(const char *fmt, ...)
61{ 62{
62 va_list ap; 63 va_list ap;
@@ -313,9 +314,6 @@ out:
313#define IOCTL_HASHSIZE 256 314#define IOCTL_HASHSIZE 256
314static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; 315static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
315 316
316extern struct ioctl_trans ioctl_start[];
317extern int ioctl_table_size;
318
319static inline unsigned long ioctl32_hash(unsigned long cmd) 317static inline unsigned long ioctl32_hash(unsigned long cmd)
320{ 318{
321 return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; 319 return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
@@ -838,8 +836,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
838 return 0; 836 return 0;
839} 837}
840 838
841extern int copy_mount_options (const void __user *, unsigned long *);
842
843#define SMBFS_NAME "smbfs" 839#define SMBFS_NAME "smbfs"
844#define NCPFS_NAME "ncpfs" 840#define NCPFS_NAME "ncpfs"
845#define NFS4_NAME "nfs4" 841#define NFS4_NAME "nfs4"
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 4063a9396977..64b34533edea 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -40,15 +40,11 @@
40#include <linux/if_pppox.h> 40#include <linux/if_pppox.h>
41#include <linux/mtio.h> 41#include <linux/mtio.h>
42#include <linux/cdrom.h> 42#include <linux/cdrom.h>
43#include <linux/loop.h>
44#include <linux/auto_fs.h> 43#include <linux/auto_fs.h>
45#include <linux/auto_fs4.h> 44#include <linux/auto_fs4.h>
46#include <linux/tty.h> 45#include <linux/tty.h>
47#include <linux/vt_kern.h> 46#include <linux/vt_kern.h>
48#include <linux/fb.h> 47#include <linux/fb.h>
49#include <linux/ext2_fs.h>
50#include <linux/ext3_jbd.h>
51#include <linux/ext3_fs.h>
52#include <linux/videodev.h> 48#include <linux/videodev.h>
53#include <linux/netdevice.h> 49#include <linux/netdevice.h>
54#include <linux/raw.h> 50#include <linux/raw.h>
@@ -60,7 +56,6 @@
60#include <linux/pci.h> 56#include <linux/pci.h>
61#include <linux/module.h> 57#include <linux/module.h>
62#include <linux/serial.h> 58#include <linux/serial.h>
63#include <linux/reiserfs_fs.h>
64#include <linux/if_tun.h> 59#include <linux/if_tun.h>
65#include <linux/ctype.h> 60#include <linux/ctype.h>
66#include <linux/ioctl32.h> 61#include <linux/ioctl32.h>
@@ -113,7 +108,6 @@
113#include <linux/nbd.h> 108#include <linux/nbd.h>
114#include <linux/random.h> 109#include <linux/random.h>
115#include <linux/filter.h> 110#include <linux/filter.h>
116#include <linux/msdos_fs.h>
117#include <linux/pktcdvd.h> 111#include <linux/pktcdvd.h>
118 112
119#include <linux/hiddev.h> 113#include <linux/hiddev.h>
@@ -124,21 +118,6 @@
124#include <linux/dvb/video.h> 118#include <linux/dvb/video.h>
125#include <linux/lp.h> 119#include <linux/lp.h>
126 120
127/* Aiee. Someone does not find a difference between int and long */
128#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int)
129#define EXT2_IOC32_SETFLAGS _IOW('f', 2, int)
130#define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
131#define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
132#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
133#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
134#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
135#ifdef CONFIG_JBD_DEBUG
136#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
137#endif
138
139#define EXT2_IOC32_GETVERSION _IOR('v', 1, int)
140#define EXT2_IOC32_SETVERSION _IOW('v', 2, int)
141
142static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, 121static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
143 unsigned long arg, struct file *f) 122 unsigned long arg, struct file *f)
144{ 123{
@@ -176,34 +155,6 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg)
176 return err; 155 return err;
177} 156}
178 157
179static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
180{
181 /* These are just misnamed, they actually get/put from/to user an int */
182 switch (cmd) {
183 case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break;
184 case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break;
185 case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break;
186 case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break;
187 }
188 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
189}
190
191static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
192{
193 /* These are just misnamed, they actually get/put from/to user an int */
194 switch (cmd) {
195 case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break;
196 case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break;
197 case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break;
198 case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break;
199 case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break;
200#ifdef CONFIG_JBD_DEBUG
201 case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break;
202#endif
203 }
204 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
205}
206
207struct compat_video_event { 158struct compat_video_event {
208 int32_t type; 159 int32_t type;
209 compat_time_t timestamp; 160 compat_time_t timestamp;
@@ -694,6 +645,7 @@ out:
694} 645}
695#endif 646#endif
696 647
648#ifdef CONFIG_BLOCK
697struct hd_geometry32 { 649struct hd_geometry32 {
698 unsigned char heads; 650 unsigned char heads;
699 unsigned char sectors; 651 unsigned char sectors;
@@ -918,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
918 } 870 }
919 return err; 871 return err;
920} 872}
873#endif /* CONFIG_BLOCK */
921 874
922struct sock_fprog32 { 875struct sock_fprog32 {
923 unsigned short len; 876 unsigned short len;
@@ -1041,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1041} 994}
1042 995
1043 996
997#ifdef CONFIG_BLOCK
1044struct mtget32 { 998struct mtget32 {
1045 compat_long_t mt_type; 999 compat_long_t mt_type;
1046 compat_long_t mt_resid; 1000 compat_long_t mt_resid;
@@ -1213,73 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
1213 1167
1214 return err; 1168 return err;
1215} 1169}
1216 1170#endif /* CONFIG_BLOCK */
1217struct loop_info32 {
1218 compat_int_t lo_number; /* ioctl r/o */
1219 compat_dev_t lo_device; /* ioctl r/o */
1220 compat_ulong_t lo_inode; /* ioctl r/o */
1221 compat_dev_t lo_rdevice; /* ioctl r/o */
1222 compat_int_t lo_offset;
1223 compat_int_t lo_encrypt_type;
1224 compat_int_t lo_encrypt_key_size; /* ioctl w/o */
1225 compat_int_t lo_flags; /* ioctl r/o */
1226 char lo_name[LO_NAME_SIZE];
1227 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1228 compat_ulong_t lo_init[2];
1229 char reserved[4];
1230};
1231
1232static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg)
1233{
1234 mm_segment_t old_fs = get_fs();
1235 struct loop_info l;
1236 struct loop_info32 __user *ul;
1237 int err = -EINVAL;
1238
1239 ul = compat_ptr(arg);
1240 switch(cmd) {
1241 case LOOP_SET_STATUS:
1242 err = get_user(l.lo_number, &ul->lo_number);
1243 err |= __get_user(l.lo_device, &ul->lo_device);
1244 err |= __get_user(l.lo_inode, &ul->lo_inode);
1245 err |= __get_user(l.lo_rdevice, &ul->lo_rdevice);
1246 err |= __copy_from_user(&l.lo_offset, &ul->lo_offset,
1247 8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
1248 if (err) {
1249 err = -EFAULT;
1250 } else {
1251 set_fs (KERNEL_DS);
1252 err = sys_ioctl (fd, cmd, (unsigned long)&l);
1253 set_fs (old_fs);
1254 }
1255 break;
1256 case LOOP_GET_STATUS:
1257 set_fs (KERNEL_DS);
1258 err = sys_ioctl (fd, cmd, (unsigned long)&l);
1259 set_fs (old_fs);
1260 if (!err) {
1261 err = put_user(l.lo_number, &ul->lo_number);
1262 err |= __put_user(l.lo_device, &ul->lo_device);
1263 err |= __put_user(l.lo_inode, &ul->lo_inode);
1264 err |= __put_user(l.lo_rdevice, &ul->lo_rdevice);
1265 err |= __copy_to_user(&ul->lo_offset, &l.lo_offset,
1266 (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
1267 if (err)
1268 err = -EFAULT;
1269 }
1270 break;
1271 default: {
1272 static int count;
1273 if (++count <= 20)
1274 printk("%s: Unknown loop ioctl cmd, fd(%d) "
1275 "cmd(%08x) arg(%08lx)\n",
1276 __FUNCTION__, fd, cmd, arg);
1277 }
1278 }
1279 return err;
1280}
1281
1282extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
1283 1171
1284#ifdef CONFIG_VT 1172#ifdef CONFIG_VT
1285 1173
@@ -1607,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
1607 return -EINVAL; 1495 return -EINVAL;
1608} 1496}
1609 1497
1498#ifdef CONFIG_BLOCK
1610static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg) 1499static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
1611{ 1500{
1612 /* The mkswap binary hard codes it to Intel value :-((( */ 1501 /* The mkswap binary hard codes it to Intel value :-((( */
@@ -1641,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
1641 1530
1642 return sys_ioctl(fd, cmd, (unsigned long)a); 1531 return sys_ioctl(fd, cmd, (unsigned long)a);
1643} 1532}
1533#endif
1644 1534
1645static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) 1535static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
1646{ 1536{
1647 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); 1537 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
1648} 1538}
1649 1539
1540#ifdef CONFIG_BLOCK
1650/* Fix sizeof(sizeof()) breakage */ 1541/* Fix sizeof(sizeof()) breakage */
1651#define BLKBSZGET_32 _IOR(0x12,112,int) 1542#define BLKBSZGET_32 _IOR(0x12,112,int)
1652#define BLKBSZSET_32 _IOW(0x12,113,int) 1543#define BLKBSZSET_32 _IOW(0x12,113,int)
@@ -1667,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
1667{ 1558{
1668 return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg)); 1559 return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
1669} 1560}
1561#endif
1670 1562
1671/* Bluetooth ioctls */ 1563/* Bluetooth ioctls */
1672#define HCIUARTSETPROTO _IOW('U', 200, int) 1564#define HCIUARTSETPROTO _IOW('U', 200, int)
@@ -1687,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
1687#define HIDPGETCONNLIST _IOR('H', 210, int) 1579#define HIDPGETCONNLIST _IOR('H', 210, int)
1688#define HIDPGETCONNINFO _IOR('H', 211, int) 1580#define HIDPGETCONNINFO _IOR('H', 211, int)
1689 1581
1582#ifdef CONFIG_BLOCK
1690struct floppy_struct32 { 1583struct floppy_struct32 {
1691 compat_uint_t size; 1584 compat_uint_t size;
1692 compat_uint_t sect; 1585 compat_uint_t sect;
@@ -2011,6 +1904,7 @@ out:
2011 kfree(karg); 1904 kfree(karg);
2012 return err; 1905 return err;
2013} 1906}
1907#endif
2014 1908
2015struct mtd_oob_buf32 { 1909struct mtd_oob_buf32 {
2016 u_int32_t start; 1910 u_int32_t start;
@@ -2052,61 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
2052 return err; 1946 return err;
2053} 1947}
2054 1948
2055#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2]) 1949#ifdef CONFIG_BLOCK
2056#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2])
2057
2058static long
2059put_dirent32 (struct dirent *d, struct compat_dirent __user *d32)
2060{
2061 if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
2062 return -EFAULT;
2063
2064 __put_user(d->d_ino, &d32->d_ino);
2065 __put_user(d->d_off, &d32->d_off);
2066 __put_user(d->d_reclen, &d32->d_reclen);
2067 if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
2068 return -EFAULT;
2069
2070 return 0;
2071}
2072
2073static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg)
2074{
2075 struct compat_dirent __user *p = compat_ptr(arg);
2076 int ret;
2077 mm_segment_t oldfs = get_fs();
2078 struct dirent d[2];
2079
2080 switch(cmd)
2081 {
2082 case VFAT_IOCTL_READDIR_BOTH32:
2083 cmd = VFAT_IOCTL_READDIR_BOTH;
2084 break;
2085 case VFAT_IOCTL_READDIR_SHORT32:
2086 cmd = VFAT_IOCTL_READDIR_SHORT;
2087 break;
2088 }
2089
2090 set_fs(KERNEL_DS);
2091 ret = sys_ioctl(fd,cmd,(unsigned long)&d);
2092 set_fs(oldfs);
2093 if (ret >= 0) {
2094 ret |= put_dirent32(&d[0], p);
2095 ret |= put_dirent32(&d[1], p + 1);
2096 }
2097 return ret;
2098}
2099
2100#define REISERFS_IOC_UNPACK32 _IOW(0xCD,1,int)
2101
2102static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr)
2103{
2104 if (cmd == REISERFS_IOC_UNPACK32)
2105 cmd = REISERFS_IOC_UNPACK;
2106
2107 return sys_ioctl(fd,cmd,ptr);
2108}
2109
2110struct raw32_config_request 1950struct raw32_config_request
2111{ 1951{
2112 compat_int_t raw_minor; 1952 compat_int_t raw_minor;
@@ -2171,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
2171 } 2011 }
2172 return ret; 2012 return ret;
2173} 2013}
2014#endif /* CONFIG_BLOCK */
2174 2015
2175struct serial_struct32 { 2016struct serial_struct32 {
2176 compat_int_t type; 2017 compat_int_t type;
@@ -2777,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
2777HANDLE_IOCTL(SIOCRTMSG, ret_einval) 2618HANDLE_IOCTL(SIOCRTMSG, ret_einval)
2778HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) 2619HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
2779#endif 2620#endif
2621#ifdef CONFIG_BLOCK
2780HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) 2622HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
2781HANDLE_IOCTL(BLKRAGET, w_long) 2623HANDLE_IOCTL(BLKRAGET, w_long)
2782HANDLE_IOCTL(BLKGETSIZE, w_long) 2624HANDLE_IOCTL(BLKGETSIZE, w_long)
@@ -2802,16 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
2802HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans) 2644HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
2803HANDLE_IOCTL(SG_IO,sg_ioctl_trans) 2645HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
2804HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans) 2646HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
2647#endif
2805HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans) 2648HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
2806HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans) 2649HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
2807HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans) 2650HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
2808HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans) 2651HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
2652#ifdef CONFIG_BLOCK
2809HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans) 2653HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
2810HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans) 2654HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
2811HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans) 2655HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
2812HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans) 2656HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
2813HANDLE_IOCTL(LOOP_SET_STATUS, loop_status) 2657#endif
2814HANDLE_IOCTL(LOOP_GET_STATUS, loop_status)
2815#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) 2658#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
2816HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout) 2659HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
2817#ifdef CONFIG_VT 2660#ifdef CONFIG_VT
@@ -2821,19 +2664,6 @@ HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
2821HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl) 2664HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
2822HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl) 2665HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
2823#endif 2666#endif
2824HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl)
2825HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl)
2826HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl)
2827HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl)
2828HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl)
2829HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl)
2830HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl)
2831HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl)
2832HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl)
2833COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
2834#ifdef CONFIG_JBD_DEBUG
2835HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
2836#endif
2837/* One SMB ioctl needs translations. */ 2667/* One SMB ioctl needs translations. */
2838#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) 2668#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
2839HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid) 2669HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
@@ -2863,16 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
2863HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) 2693HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
2864HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) 2694HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
2865/* block stuff */ 2695/* block stuff */
2696#ifdef CONFIG_BLOCK
2866HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget) 2697HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
2867HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset) 2698HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
2868HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64) 2699HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
2869/* vfat */
2870HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32)
2871HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32)
2872HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32)
2873/* Raw devices */ 2700/* Raw devices */
2874HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) 2701HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
2875HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) 2702HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
2703#endif
2876/* Serial */ 2704/* Serial */
2877HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl) 2705HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
2878HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl) 2706HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
diff --git a/fs/dcache.c b/fs/dcache.c
index 17b392a2049e..fc2faa44f8d1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
32#include <linux/seqlock.h> 32#include <linux/seqlock.h>
33#include <linux/swap.h> 33#include <linux/swap.h>
34#include <linux/bootmem.h> 34#include <linux/bootmem.h>
35#include "internal.h"
35 36
36 37
37int sysctl_vfs_cache_pressure __read_mostly = 100; 38int sysctl_vfs_cache_pressure __read_mostly = 100;
@@ -1877,9 +1878,6 @@ kmem_cache_t *filp_cachep __read_mostly;
1877 1878
1878EXPORT_SYMBOL(d_genocide); 1879EXPORT_SYMBOL(d_genocide);
1879 1880
1880extern void bdev_cache_init(void);
1881extern void chrdev_init(void);
1882
1883void __init vfs_caches_init_early(void) 1881void __init vfs_caches_init_early(void)
1884{ 1882{
1885 dcache_init_early(); 1883 dcache_init_early();
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 92ea8265d7d5..3e7a84a1e509 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -661,5 +661,8 @@ const struct file_operations ext2_dir_operations = {
661 .read = generic_read_dir, 661 .read = generic_read_dir,
662 .readdir = ext2_readdir, 662 .readdir = ext2_readdir,
663 .ioctl = ext2_ioctl, 663 .ioctl = ext2_ioctl,
664#ifdef CONFIG_COMPAT
665 .compat_ioctl = ext2_compat_ioctl,
666#endif
664 .fsync = ext2_sync_file, 667 .fsync = ext2_sync_file,
665}; 668};
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e65a019fc7a5..c19ac153f56b 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -137,6 +137,7 @@ extern void ext2_set_inode_flags(struct inode *inode);
137/* ioctl.c */ 137/* ioctl.c */
138extern int ext2_ioctl (struct inode *, struct file *, unsigned int, 138extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
139 unsigned long); 139 unsigned long);
140extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long);
140 141
141/* namei.c */ 142/* namei.c */
142struct dentry *ext2_get_parent(struct dentry *child); 143struct dentry *ext2_get_parent(struct dentry *child);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 23e2c7ccec1d..e8bbed9dd268 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -46,6 +46,9 @@ const struct file_operations ext2_file_operations = {
46 .aio_read = generic_file_aio_read, 46 .aio_read = generic_file_aio_read,
47 .aio_write = generic_file_aio_write, 47 .aio_write = generic_file_aio_write,
48 .ioctl = ext2_ioctl, 48 .ioctl = ext2_ioctl,
49#ifdef CONFIG_COMPAT
50 .compat_ioctl = ext2_compat_ioctl,
51#endif
49 .mmap = generic_file_mmap, 52 .mmap = generic_file_mmap,
50 .open = generic_file_open, 53 .open = generic_file_open,
51 .release = ext2_release_file, 54 .release = ext2_release_file,
@@ -63,6 +66,9 @@ const struct file_operations ext2_xip_file_operations = {
63 .read = xip_file_read, 66 .read = xip_file_read,
64 .write = xip_file_write, 67 .write = xip_file_write,
65 .ioctl = ext2_ioctl, 68 .ioctl = ext2_ioctl,
69#ifdef CONFIG_COMPAT
70 .compat_ioctl = ext2_compat_ioctl,
71#endif
66 .mmap = xip_file_mmap, 72 .mmap = xip_file_mmap,
67 .open = generic_file_open, 73 .open = generic_file_open,
68 .release = ext2_release_file, 74 .release = ext2_release_file,
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 3ca9afdf713d..1dfba77eab10 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -11,6 +11,8 @@
11#include <linux/capability.h> 11#include <linux/capability.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/compat.h>
15#include <linux/smp_lock.h>
14#include <asm/current.h> 16#include <asm/current.h>
15#include <asm/uaccess.h> 17#include <asm/uaccess.h>
16 18
@@ -80,3 +82,33 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
80 return -ENOTTY; 82 return -ENOTTY;
81 } 83 }
82} 84}
85
86#ifdef CONFIG_COMPAT
87long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
88{
89 struct inode *inode = file->f_dentry->d_inode;
90 int ret;
91
92 /* These are just misnamed, they actually get/put from/to user an int */
93 switch (cmd) {
94 case EXT2_IOC32_GETFLAGS:
95 cmd = EXT2_IOC_GETFLAGS;
96 break;
97 case EXT2_IOC32_SETFLAGS:
98 cmd = EXT2_IOC_SETFLAGS;
99 break;
100 case EXT2_IOC32_GETVERSION:
101 cmd = EXT2_IOC_GETVERSION;
102 break;
103 case EXT2_IOC32_SETVERSION:
104 cmd = EXT2_IOC_SETVERSION;
105 break;
106 default:
107 return -ENOIOCTLCMD;
108 }
109 lock_kernel();
110 ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
111 unlock_kernel();
112 return ret;
113}
114#endif
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 429acbb4e064..d0b54f30b914 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -44,6 +44,9 @@ const struct file_operations ext3_dir_operations = {
44 .read = generic_read_dir, 44 .read = generic_read_dir,
45 .readdir = ext3_readdir, /* we take BKL. needed?*/ 45 .readdir = ext3_readdir, /* we take BKL. needed?*/
46 .ioctl = ext3_ioctl, /* BKL held */ 46 .ioctl = ext3_ioctl, /* BKL held */
47#ifdef CONFIG_COMPAT
48 .compat_ioctl = ext3_compat_ioctl,
49#endif
47 .fsync = ext3_sync_file, /* BKL held */ 50 .fsync = ext3_sync_file, /* BKL held */
48#ifdef CONFIG_EXT3_INDEX 51#ifdef CONFIG_EXT3_INDEX
49 .release = ext3_release_dir, 52 .release = ext3_release_dir,
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 994efd189f4e..74ff20f9d09b 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -114,6 +114,9 @@ const struct file_operations ext3_file_operations = {
114 .readv = generic_file_readv, 114 .readv = generic_file_readv,
115 .writev = generic_file_writev, 115 .writev = generic_file_writev,
116 .ioctl = ext3_ioctl, 116 .ioctl = ext3_ioctl,
117#ifdef CONFIG_COMPAT
118 .compat_ioctl = ext3_compat_ioctl,
119#endif
117 .mmap = generic_file_mmap, 120 .mmap = generic_file_mmap,
118 .open = generic_file_open, 121 .open = generic_file_open,
119 .release = ext3_release_file, 122 .release = ext3_release_file,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index dcf4f1dd108b..03ba5bcab186 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -36,6 +36,7 @@
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37#include <linux/mpage.h> 37#include <linux/mpage.h>
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h>
39#include "xattr.h" 40#include "xattr.h"
40#include "acl.h" 41#include "acl.h"
41 42
@@ -1073,7 +1074,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1073 return bh; 1074 return bh;
1074 if (buffer_uptodate(bh)) 1075 if (buffer_uptodate(bh))
1075 return bh; 1076 return bh;
1076 ll_rw_block(READ, 1, &bh); 1077 ll_rw_block(READ_META, 1, &bh);
1077 wait_on_buffer(bh); 1078 wait_on_buffer(bh);
1078 if (buffer_uptodate(bh)) 1079 if (buffer_uptodate(bh))
1079 return bh; 1080 return bh;
@@ -2540,7 +2541,7 @@ make_io:
2540 */ 2541 */
2541 get_bh(bh); 2542 get_bh(bh);
2542 bh->b_end_io = end_buffer_read_sync; 2543 bh->b_end_io = end_buffer_read_sync;
2543 submit_bh(READ, bh); 2544 submit_bh(READ_META, bh);
2544 wait_on_buffer(bh); 2545 wait_on_buffer(bh);
2545 if (!buffer_uptodate(bh)) { 2546 if (!buffer_uptodate(bh)) {
2546 ext3_error(inode->i_sb, "ext3_get_inode_loc", 2547 ext3_error(inode->i_sb, "ext3_get_inode_loc",
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 3a6b012d120c..12daa6869572 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -13,9 +13,10 @@
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include <linux/ext3_jbd.h> 14#include <linux/ext3_jbd.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/compat.h>
17#include <linux/smp_lock.h>
16#include <asm/uaccess.h> 18#include <asm/uaccess.h>
17 19
18
19int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 20int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
20 unsigned long arg) 21 unsigned long arg)
21{ 22{
@@ -252,3 +253,55 @@ flags_err:
252 return -ENOTTY; 253 return -ENOTTY;
253 } 254 }
254} 255}
256
257#ifdef CONFIG_COMPAT
258long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
259{
260 struct inode *inode = file->f_dentry->d_inode;
261 int ret;
262
263 /* These are just misnamed, they actually get/put from/to user an int */
264 switch (cmd) {
265 case EXT3_IOC32_GETFLAGS:
266 cmd = EXT3_IOC_GETFLAGS;
267 break;
268 case EXT3_IOC32_SETFLAGS:
269 cmd = EXT3_IOC_SETFLAGS;
270 break;
271 case EXT3_IOC32_GETVERSION:
272 cmd = EXT3_IOC_GETVERSION;
273 break;
274 case EXT3_IOC32_SETVERSION:
275 cmd = EXT3_IOC_SETVERSION;
276 break;
277 case EXT3_IOC32_GROUP_EXTEND:
278 cmd = EXT3_IOC_GROUP_EXTEND;
279 break;
280 case EXT3_IOC32_GETVERSION_OLD:
281 cmd = EXT3_IOC_GETVERSION_OLD;
282 break;
283 case EXT3_IOC32_SETVERSION_OLD:
284 cmd = EXT3_IOC_SETVERSION_OLD;
285 break;
286#ifdef CONFIG_JBD_DEBUG
287 case EXT3_IOC32_WAIT_FOR_READONLY:
288 cmd = EXT3_IOC_WAIT_FOR_READONLY;
289 break;
290#endif
291 case EXT3_IOC32_GETRSVSZ:
292 cmd = EXT3_IOC_GETRSVSZ;
293 break;
294 case EXT3_IOC32_SETRSVSZ:
295 cmd = EXT3_IOC_SETRSVSZ;
296 break;
297 case EXT3_IOC_GROUP_ADD:
298 break;
299 default:
300 return -ENOIOCTLCMD;
301 }
302 lock_kernel();
303 ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
304 unlock_kernel();
305 return ret;
306}
307#endif
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 85d132c37ee0..235e77b52ea5 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -35,6 +35,7 @@
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/quotaops.h> 36#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
38#include <linux/bio.h>
38#include <linux/smp_lock.h> 39#include <linux/smp_lock.h>
39 40
40#include "namei.h" 41#include "namei.h"
@@ -870,7 +871,7 @@ restart:
870 bh = ext3_getblk(NULL, dir, b++, 0, &err); 871 bh = ext3_getblk(NULL, dir, b++, 0, &err);
871 bh_use[ra_max] = bh; 872 bh_use[ra_max] = bh;
872 if (bh) 873 if (bh)
873 ll_rw_block(READ, 1, &bh); 874 ll_rw_block(READ_META, 1, &bh);
874 } 875 }
875 } 876 }
876 if ((bh = bh_use[ra_ptr++]) == NULL) 877 if ((bh = bh_use[ra_ptr++]) == NULL)
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 698b85bb1dd4..3e50a4166283 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -20,6 +20,7 @@
20#include <linux/dirent.h> 20#include <linux/dirent.h>
21#include <linux/smp_lock.h> 21#include <linux/smp_lock.h>
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/compat.h>
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
24 25
25static inline loff_t fat_make_i_pos(struct super_block *sb, 26static inline loff_t fat_make_i_pos(struct super_block *sb,
@@ -741,10 +742,65 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
741 return ret; 742 return ret;
742} 743}
743 744
745#ifdef CONFIG_COMPAT
746#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2])
747#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2])
748
749static long fat_compat_put_dirent32(struct dirent *d,
750 struct compat_dirent __user *d32)
751{
752 if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent)))
753 return -EFAULT;
754
755 __put_user(d->d_ino, &d32->d_ino);
756 __put_user(d->d_off, &d32->d_off);
757 __put_user(d->d_reclen, &d32->d_reclen);
758 if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
759 return -EFAULT;
760
761 return 0;
762}
763
764static long fat_compat_dir_ioctl(struct file *file, unsigned cmd,
765 unsigned long arg)
766{
767 struct compat_dirent __user *p = compat_ptr(arg);
768 int ret;
769 mm_segment_t oldfs = get_fs();
770 struct dirent d[2];
771
772 switch (cmd) {
773 case VFAT_IOCTL_READDIR_BOTH32:
774 cmd = VFAT_IOCTL_READDIR_BOTH;
775 break;
776 case VFAT_IOCTL_READDIR_SHORT32:
777 cmd = VFAT_IOCTL_READDIR_SHORT;
778 break;
779 default:
780 return -ENOIOCTLCMD;
781 }
782
783 set_fs(KERNEL_DS);
784 lock_kernel();
785 ret = fat_dir_ioctl(file->f_dentry->d_inode, file,
786 cmd, (unsigned long) &d);
787 unlock_kernel();
788 set_fs(oldfs);
789 if (ret >= 0) {
790 ret |= fat_compat_put_dirent32(&d[0], p);
791 ret |= fat_compat_put_dirent32(&d[1], p + 1);
792 }
793 return ret;
794}
795#endif /* CONFIG_COMPAT */
796
744const struct file_operations fat_dir_operations = { 797const struct file_operations fat_dir_operations = {
745 .read = generic_read_dir, 798 .read = generic_read_dir,
746 .readdir = fat_readdir, 799 .readdir = fat_readdir,
747 .ioctl = fat_dir_ioctl, 800 .ioctl = fat_dir_ioctl,
801#ifdef CONFIG_COMPAT
802 .compat_ioctl = fat_compat_dir_ioctl,
803#endif
748 .fsync = file_fsync, 804 .fsync = file_fsync,
749}; 805};
750 806
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 892643dc9af1..c403b66ec83c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -22,8 +22,7 @@
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include <linux/buffer_head.h> 24#include <linux/buffer_head.h>
25 25#include "internal.h"
26extern struct super_block *blockdev_superblock;
27 26
28/** 27/**
29 * __mark_inode_dirty - internal function 28 * __mark_inode_dirty - internal function
@@ -320,7 +319,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
320 319
321 if (!bdi_cap_writeback_dirty(bdi)) { 320 if (!bdi_cap_writeback_dirty(bdi)) {
322 list_move(&inode->i_list, &sb->s_dirty); 321 list_move(&inode->i_list, &sb->s_dirty);
323 if (sb == blockdev_superblock) { 322 if (sb_is_blkdev_sb(sb)) {
324 /* 323 /*
325 * Dirty memory-backed blockdev: the ramdisk 324 * Dirty memory-backed blockdev: the ramdisk
326 * driver does this. Skip just this inode 325 * driver does this. Skip just this inode
@@ -337,14 +336,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
337 336
338 if (wbc->nonblocking && bdi_write_congested(bdi)) { 337 if (wbc->nonblocking && bdi_write_congested(bdi)) {
339 wbc->encountered_congestion = 1; 338 wbc->encountered_congestion = 1;
340 if (sb != blockdev_superblock) 339 if (!sb_is_blkdev_sb(sb))
341 break; /* Skip a congested fs */ 340 break; /* Skip a congested fs */
342 list_move(&inode->i_list, &sb->s_dirty); 341 list_move(&inode->i_list, &sb->s_dirty);
343 continue; /* Skip a congested blockdev */ 342 continue; /* Skip a congested blockdev */
344 } 343 }
345 344
346 if (wbc->bdi && bdi != wbc->bdi) { 345 if (wbc->bdi && bdi != wbc->bdi) {
347 if (sb != blockdev_superblock) 346 if (!sb_is_blkdev_sb(sb))
348 break; /* fs has the wrong queue */ 347 break; /* fs has the wrong queue */
349 list_move(&inode->i_list, &sb->s_dirty); 348 list_move(&inode->i_list, &sb->s_dirty);
350 continue; /* blockdev has wrong queue */ 349 continue; /* blockdev has wrong queue */
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 8a1ca5ef7ada..3915635b4470 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -246,12 +246,8 @@ struct hfsplus_readdir_data {
246 246
247/* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support 247/* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support
248 * chattr/lsattr */ 248 * chattr/lsattr */
249#define HFSPLUS_IOC_EXT2_GETFLAGS _IOR('f', 1, long) 249#define HFSPLUS_IOC_EXT2_GETFLAGS FS_IOC_GETFLAGS
250#define HFSPLUS_IOC_EXT2_SETFLAGS _IOW('f', 2, long) 250#define HFSPLUS_IOC_EXT2_SETFLAGS FS_IOC_SETFLAGS
251
252#define EXT2_FLAG_IMMUTABLE 0x00000010 /* Immutable file */
253#define EXT2_FLAG_APPEND 0x00000020 /* writes to file may only append */
254#define EXT2_FLAG_NODUMP 0x00000040 /* do not dump file */
255 251
256 252
257/* 253/*
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 13cf848ac833..79fd10402ea3 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -28,11 +28,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
28 case HFSPLUS_IOC_EXT2_GETFLAGS: 28 case HFSPLUS_IOC_EXT2_GETFLAGS:
29 flags = 0; 29 flags = 0;
30 if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) 30 if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE)
31 flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */ 31 flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */
32 if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) 32 if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND)
33 flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */ 33 flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */
34 if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) 34 if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP)
35 flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */ 35 flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
36 return put_user(flags, (int __user *)arg); 36 return put_user(flags, (int __user *)arg);
37 case HFSPLUS_IOC_EXT2_SETFLAGS: { 37 case HFSPLUS_IOC_EXT2_SETFLAGS: {
38 if (IS_RDONLY(inode)) 38 if (IS_RDONLY(inode))
@@ -44,32 +44,31 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
44 if (get_user(flags, (int __user *)arg)) 44 if (get_user(flags, (int __user *)arg))
45 return -EFAULT; 45 return -EFAULT;
46 46
47 if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) || 47 if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
48 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { 48 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
49 if (!capable(CAP_LINUX_IMMUTABLE)) 49 if (!capable(CAP_LINUX_IMMUTABLE))
50 return -EPERM; 50 return -EPERM;
51 } 51 }
52 52
53 /* don't silently ignore unsupported ext2 flags */ 53 /* don't silently ignore unsupported ext2 flags */
54 if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND| 54 if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
55 EXT2_FLAG_NODUMP))
56 return -EOPNOTSUPP; 55 return -EOPNOTSUPP;
57 56
58 if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */ 57 if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
59 inode->i_flags |= S_IMMUTABLE; 58 inode->i_flags |= S_IMMUTABLE;
60 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; 59 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
61 } else { 60 } else {
62 inode->i_flags &= ~S_IMMUTABLE; 61 inode->i_flags &= ~S_IMMUTABLE;
63 HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; 62 HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
64 } 63 }
65 if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */ 64 if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */
66 inode->i_flags |= S_APPEND; 65 inode->i_flags |= S_APPEND;
67 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; 66 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND;
68 } else { 67 } else {
69 inode->i_flags &= ~S_APPEND; 68 inode->i_flags &= ~S_APPEND;
70 HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; 69 HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND;
71 } 70 }
72 if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */ 71 if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */
73 HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; 72 HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP;
74 else 73 else
75 HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; 74 HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP;
diff --git a/fs/inode.c b/fs/inode.c
index abf77471e6c4..ada7643104e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -362,27 +362,6 @@ int invalidate_inodes(struct super_block * sb)
362} 362}
363 363
364EXPORT_SYMBOL(invalidate_inodes); 364EXPORT_SYMBOL(invalidate_inodes);
365
366int __invalidate_device(struct block_device *bdev)
367{
368 struct super_block *sb = get_super(bdev);
369 int res = 0;
370
371 if (sb) {
372 /*
373 * no need to lock the super, get_super holds the
374 * read mutex so the filesystem cannot go away
375 * under us (->put_super runs with the write lock
376 * hold).
377 */
378 shrink_dcache_sb(sb);
379 res = invalidate_inodes(sb);
380 drop_super(sb);
381 }
382 invalidate_bdev(bdev, 0);
383 return res;
384}
385EXPORT_SYMBOL(__invalidate_device);
386 365
387static int can_unuse(struct inode *inode) 366static int can_unuse(struct inode *inode)
388{ 367{
diff --git a/fs/internal.h b/fs/internal.h
new file mode 100644
index 000000000000..ea00126c9a59
--- /dev/null
+++ b/fs/internal.h
@@ -0,0 +1,55 @@
1/* fs/ internal definitions
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/ioctl32.h>
13
14struct super_block;
15
16/*
17 * block_dev.c
18 */
19#ifdef CONFIG_BLOCK
20extern struct super_block *blockdev_superblock;
21extern void __init bdev_cache_init(void);
22
23static inline int sb_is_blkdev_sb(struct super_block *sb)
24{
25 return sb == blockdev_superblock;
26}
27
28#else
29static inline void bdev_cache_init(void)
30{
31}
32
33static inline int sb_is_blkdev_sb(struct super_block *sb)
34{
35 return 0;
36}
37#endif
38
39/*
40 * char_dev.c
41 */
42extern void __init chrdev_init(void);
43
44/*
45 * compat_ioctl.c
46 */
47#ifdef CONFIG_COMPAT
48extern struct ioctl_trans ioctl_start[];
49extern int ioctl_table_size;
50#endif
51
52/*
53 * namespace.c
54 */
55extern int copy_mount_options(const void __user *, unsigned long *);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 78b1deae3fa2..6dc6721d9e82 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/ioprio.c 2 * fs/ioprio.c
3 * 3 *
4 * Copyright (C) 2004 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
5 * 5 *
6 * Helper functions for setting/querying io priorities of processes. The 6 * Helper functions for setting/querying io priorities of processes. The
7 * system calls closely mimmick getpriority/setpriority, see the man page for 7 * system calls closely mimmick getpriority/setpriority, see the man page for
@@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
47 /* see wmb() in current_io_context() */ 47 /* see wmb() in current_io_context() */
48 smp_read_barrier_depends(); 48 smp_read_barrier_depends();
49 49
50 if (ioc && ioc->set_ioprio) 50 if (ioc)
51 ioc->set_ioprio(ioc, ioprio); 51 ioc->ioprio_changed = 1;
52 52
53 task_unlock(task); 53 task_unlock(task);
54 return 0; 54 return 0;
@@ -81,7 +81,12 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
81 } 81 }
82 82
83 ret = -ESRCH; 83 ret = -ESRCH;
84 read_lock_irq(&tasklist_lock); 84 /*
85 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
86 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
87 * in copy_process().
88 */
89 read_lock(&tasklist_lock);
85 switch (which) { 90 switch (which) {
86 case IOPRIO_WHO_PROCESS: 91 case IOPRIO_WHO_PROCESS:
87 if (!who) 92 if (!who)
@@ -124,7 +129,7 @@ free_uid:
124 ret = -EINVAL; 129 ret = -EINVAL;
125 } 130 }
126 131
127 read_unlock_irq(&tasklist_lock); 132 read_unlock(&tasklist_lock);
128 return ret; 133 return ret;
129} 134}
130 135
@@ -170,7 +175,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
170 int ret = -ESRCH; 175 int ret = -ESRCH;
171 int tmpio; 176 int tmpio;
172 177
173 read_lock_irq(&tasklist_lock); 178 read_lock(&tasklist_lock);
174 switch (which) { 179 switch (which) {
175 case IOPRIO_WHO_PROCESS: 180 case IOPRIO_WHO_PROCESS:
176 if (!who) 181 if (!who)
@@ -221,7 +226,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
221 ret = -EINVAL; 226 ret = -EINVAL;
222 } 227 }
223 228
224 read_unlock_irq(&tasklist_lock); 229 read_unlock(&tasklist_lock);
225 return ret; 230 return ret;
226} 231}
227 232
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 67b3774820eb..37db52488262 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ext2_fs.h>
10#include <linux/ctype.h> 9#include <linux/ctype.h>
11#include <linux/capability.h> 10#include <linux/capability.h>
12#include <linux/time.h> 11#include <linux/time.h>
@@ -22,13 +21,13 @@ static struct {
22 long jfs_flag; 21 long jfs_flag;
23 long ext2_flag; 22 long ext2_flag;
24} jfs_map[] = { 23} jfs_map[] = {
25 {JFS_NOATIME_FL, EXT2_NOATIME_FL}, 24 {JFS_NOATIME_FL, FS_NOATIME_FL},
26 {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL}, 25 {JFS_DIRSYNC_FL, FS_DIRSYNC_FL},
27 {JFS_SYNC_FL, EXT2_SYNC_FL}, 26 {JFS_SYNC_FL, FS_SYNC_FL},
28 {JFS_SECRM_FL, EXT2_SECRM_FL}, 27 {JFS_SECRM_FL, FS_SECRM_FL},
29 {JFS_UNRM_FL, EXT2_UNRM_FL}, 28 {JFS_UNRM_FL, FS_UNRM_FL},
30 {JFS_APPEND_FL, EXT2_APPEND_FL}, 29 {JFS_APPEND_FL, FS_APPEND_FL},
31 {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL}, 30 {JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL},
32 {0, 0}, 31 {0, 0},
33}; 32};
34 33
diff --git a/fs/mpage.c b/fs/mpage.c
index 1e4598247d0b..692a3e578fc8 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -693,6 +693,8 @@ out:
693 * the call was made get new I/O started against them. If wbc->sync_mode is 693 * the call was made get new I/O started against them. If wbc->sync_mode is
694 * WB_SYNC_ALL then we were called for data integrity and we must wait for 694 * WB_SYNC_ALL then we were called for data integrity and we must wait for
695 * existing IO to complete. 695 * existing IO to complete.
696 *
697 * If you fix this you should check generic_writepages() also!
696 */ 698 */
697int 699int
698mpage_writepages(struct address_space *mapping, 700mpage_writepages(struct address_space *mapping,
diff --git a/fs/namespace.c b/fs/namespace.c
index 6ede3a539ed8..66d921e14fee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -24,12 +24,11 @@
24#include <linux/namei.h> 24#include <linux/namei.h>
25#include <linux/security.h> 25#include <linux/security.h>
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/ramfs.h>
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28#include <asm/unistd.h> 29#include <asm/unistd.h>
29#include "pnode.h" 30#include "pnode.h"
30 31
31extern int __init init_rootfs(void);
32
33/* spinlock for vfsmount related operations, inplace of dcache_lock */ 32/* spinlock for vfsmount related operations, inplace of dcache_lock */
34__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 33__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
35 34
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b674462793d3..f6675d2c386c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -51,7 +51,6 @@
51#include <linux/mm.h> 51#include <linux/mm.h>
52#include <linux/pagemap.h> 52#include <linux/pagemap.h>
53#include <linux/file.h> 53#include <linux/file.h>
54#include <linux/mpage.h>
55#include <linux/writeback.h> 54#include <linux/writeback.h>
56 55
57#include <linux/sunrpc/clnt.h> 56#include <linux/sunrpc/clnt.h>
diff --git a/fs/no-block.c b/fs/no-block.c
new file mode 100644
index 000000000000..d269a93d3467
--- /dev/null
+++ b/fs/no-block.c
@@ -0,0 +1,22 @@
1/* no-block.c: implementation of routines required for non-BLOCK configuration
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/fs.h>
14
15static int no_blkdev_open(struct inode * inode, struct file * filp)
16{
17 return -ENODEV;
18}
19
20const struct file_operations def_blk_fops = {
21 .open = no_blkdev_open,
22};
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index d713ce6b3e12..67e665fdb7fc 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5obj-y := check.o 5obj-$(CONFIG_BLOCK) := check.o
6 6
7obj-$(CONFIG_ACORN_PARTITION) += acorn.o 7obj-$(CONFIG_ACORN_PARTITION) += acorn.o
8obj-$(CONFIG_AMIGA_PARTITION) += amiga.o 8obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5bbd60896050..66bc425f2f3d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -277,12 +277,15 @@ static int devinfo_show(struct seq_file *f, void *v)
277 if (i == 0) 277 if (i == 0)
278 seq_printf(f, "Character devices:\n"); 278 seq_printf(f, "Character devices:\n");
279 chrdev_show(f, i); 279 chrdev_show(f, i);
280 } else { 280 }
281#ifdef CONFIG_BLOCK
282 else {
281 i -= CHRDEV_MAJOR_HASH_SIZE; 283 i -= CHRDEV_MAJOR_HASH_SIZE;
282 if (i == 0) 284 if (i == 0)
283 seq_printf(f, "\nBlock devices:\n"); 285 seq_printf(f, "\nBlock devices:\n");
284 blkdev_show(f, i); 286 blkdev_show(f, i);
285 } 287 }
288#endif
286 return 0; 289 return 0;
287} 290}
288 291
@@ -355,6 +358,7 @@ static int stram_read_proc(char *page, char **start, off_t off,
355} 358}
356#endif 359#endif
357 360
361#ifdef CONFIG_BLOCK
358extern struct seq_operations partitions_op; 362extern struct seq_operations partitions_op;
359static int partitions_open(struct inode *inode, struct file *file) 363static int partitions_open(struct inode *inode, struct file *file)
360{ 364{
@@ -378,6 +382,7 @@ static struct file_operations proc_diskstats_operations = {
378 .llseek = seq_lseek, 382 .llseek = seq_lseek,
379 .release = seq_release, 383 .release = seq_release,
380}; 384};
385#endif
381 386
382#ifdef CONFIG_MODULES 387#ifdef CONFIG_MODULES
383extern struct seq_operations modules_op; 388extern struct seq_operations modules_op;
@@ -695,7 +700,9 @@ void __init proc_misc_init(void)
695 entry->proc_fops = &proc_kmsg_operations; 700 entry->proc_fops = &proc_kmsg_operations;
696 create_seq_entry("devices", 0, &proc_devinfo_operations); 701 create_seq_entry("devices", 0, &proc_devinfo_operations);
697 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 702 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
703#ifdef CONFIG_BLOCK
698 create_seq_entry("partitions", 0, &proc_partitions_operations); 704 create_seq_entry("partitions", 0, &proc_partitions_operations);
705#endif
699 create_seq_entry("stat", 0, &proc_stat_operations); 706 create_seq_entry("stat", 0, &proc_stat_operations);
700 create_seq_entry("interrupts", 0, &proc_interrupts_operations); 707 create_seq_entry("interrupts", 0, &proc_interrupts_operations);
701#ifdef CONFIG_SLAB 708#ifdef CONFIG_SLAB
@@ -707,7 +714,9 @@ void __init proc_misc_init(void)
707 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 714 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
708 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 715 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
709 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 716 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
717#ifdef CONFIG_BLOCK
710 create_seq_entry("diskstats", 0, &proc_diskstats_operations); 718 create_seq_entry("diskstats", 0, &proc_diskstats_operations);
719#endif
711#ifdef CONFIG_MODULES 720#ifdef CONFIG_MODULES
712 create_seq_entry("modules", 0, &proc_modules_operations); 721 create_seq_entry("modules", 0, &proc_modules_operations);
713#endif 722#endif
diff --git a/fs/quota.c b/fs/quota.c
index d6a2be826e29..b9dae76a0b6e 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -338,6 +338,34 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
338} 338}
339 339
340/* 340/*
341 * look up a superblock on which quota ops will be performed
342 * - use the name of a block device to find the superblock thereon
343 */
344static inline struct super_block *quotactl_block(const char __user *special)
345{
346#ifdef CONFIG_BLOCK
347 struct block_device *bdev;
348 struct super_block *sb;
349 char *tmp = getname(special);
350
351 if (IS_ERR(tmp))
352 return ERR_PTR(PTR_ERR(tmp));
353 bdev = lookup_bdev(tmp);
354 putname(tmp);
355 if (IS_ERR(bdev))
356 return ERR_PTR(PTR_ERR(bdev));
357 sb = get_super(bdev);
358 bdput(bdev);
359 if (!sb)
360 return ERR_PTR(-ENODEV);
361
362 return sb;
363#else
364 return ERR_PTR(-ENODEV);
365#endif
366}
367
368/*
341 * This is the system call interface. This communicates with 369 * This is the system call interface. This communicates with
342 * the user-level programs. Currently this only supports diskquota 370 * the user-level programs. Currently this only supports diskquota
343 * calls. Maybe we need to add the process quotas etc. in the future, 371 * calls. Maybe we need to add the process quotas etc. in the future,
@@ -347,25 +375,15 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t
347{ 375{
348 uint cmds, type; 376 uint cmds, type;
349 struct super_block *sb = NULL; 377 struct super_block *sb = NULL;
350 struct block_device *bdev;
351 char *tmp;
352 int ret; 378 int ret;
353 379
354 cmds = cmd >> SUBCMDSHIFT; 380 cmds = cmd >> SUBCMDSHIFT;
355 type = cmd & SUBCMDMASK; 381 type = cmd & SUBCMDMASK;
356 382
357 if (cmds != Q_SYNC || special) { 383 if (cmds != Q_SYNC || special) {
358 tmp = getname(special); 384 sb = quotactl_block(special);
359 if (IS_ERR(tmp)) 385 if (IS_ERR(sb))
360 return PTR_ERR(tmp); 386 return PTR_ERR(sb);
361 bdev = lookup_bdev(tmp);
362 putname(tmp);
363 if (IS_ERR(bdev))
364 return PTR_ERR(bdev);
365 sb = get_super(bdev);
366 bdput(bdev);
367 if (!sb)
368 return -ENODEV;
369 } 387 }
370 388
371 ret = check_quotactl_valid(sb, type, cmds, id); 389 ret = check_quotactl_valid(sb, type, cmds, id);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 9aabcc0ccd2d..657050ad7430 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -22,6 +22,9 @@ const struct file_operations reiserfs_dir_operations = {
22 .readdir = reiserfs_readdir, 22 .readdir = reiserfs_readdir,
23 .fsync = reiserfs_dir_fsync, 23 .fsync = reiserfs_dir_fsync,
24 .ioctl = reiserfs_ioctl, 24 .ioctl = reiserfs_ioctl,
25#ifdef CONFIG_COMPAT
26 .compat_ioctl = reiserfs_compat_ioctl,
27#endif
25}; 28};
26 29
27static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, 30static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1cfbe857ba27..3e08f7161a3d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -2,6 +2,7 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/config.h>
5#include <linux/time.h> 6#include <linux/time.h>
6#include <linux/reiserfs_fs.h> 7#include <linux/reiserfs_fs.h>
7#include <linux/reiserfs_acl.h> 8#include <linux/reiserfs_acl.h>
@@ -1568,6 +1569,9 @@ const struct file_operations reiserfs_file_operations = {
1568 .read = generic_file_read, 1569 .read = generic_file_read,
1569 .write = reiserfs_file_write, 1570 .write = reiserfs_file_write,
1570 .ioctl = reiserfs_ioctl, 1571 .ioctl = reiserfs_ioctl,
1572#ifdef CONFIG_COMPAT
1573 .compat_ioctl = reiserfs_compat_ioctl,
1574#endif
1571 .mmap = generic_file_mmap, 1575 .mmap = generic_file_mmap,
1572 .release = reiserfs_file_release, 1576 .release = reiserfs_file_release,
1573 .fsync = reiserfs_sync_file, 1577 .fsync = reiserfs_sync_file,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index a986b5e1e288..9c57578cb831 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,6 +9,7 @@
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/smp_lock.h> 11#include <linux/smp_lock.h>
12#include <linux/compat.h>
12 13
13static int reiserfs_unpack(struct inode *inode, struct file *filp); 14static int reiserfs_unpack(struct inode *inode, struct file *filp);
14 15
@@ -94,6 +95,40 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
94 } 95 }
95} 96}
96 97
98#ifdef CONFIG_COMPAT
99long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
100 unsigned long arg)
101{
102 struct inode *inode = file->f_dentry->d_inode;
103 int ret;
104
105 /* These are just misnamed, they actually get/put from/to user an int */
106 switch (cmd) {
107 case REISERFS_IOC32_UNPACK:
108 cmd = REISERFS_IOC_UNPACK;
109 break;
110 case REISERFS_IOC32_GETFLAGS:
111 cmd = REISERFS_IOC_GETFLAGS;
112 break;
113 case REISERFS_IOC32_SETFLAGS:
114 cmd = REISERFS_IOC_SETFLAGS;
115 break;
116 case REISERFS_IOC32_GETVERSION:
117 cmd = REISERFS_IOC_GETVERSION;
118 break;
119 case REISERFS_IOC32_SETVERSION:
120 cmd = REISERFS_IOC_SETVERSION;
121 break;
122 default:
123 return -ENOIOCTLCMD;
124 }
125 lock_kernel();
126 ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
127 unlock_kernel();
128 return ret;
129}
130#endif
131
97/* 132/*
98** reiserfs_unpack 133** reiserfs_unpack
99** Function try to convert tail from direct item into indirect. 134** Function try to convert tail from direct item into indirect.
diff --git a/fs/splice.c b/fs/splice.c
index 684bca3d3a10..13e92dd19fbb 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -12,7 +12,7 @@
12 * Jens to support splicing to files, network, direct splicing, etc and 12 * Jens to support splicing to files, network, direct splicing, etc and
13 * fixing lots of bugs. 13 * fixing lots of bugs.
14 * 14 *
15 * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de> 15 * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> 16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> 17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
18 * 18 *
diff --git a/fs/super.c b/fs/super.c
index 6987824d0dce..aec99ddbe53f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -220,6 +220,37 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
220 return 0; 220 return 0;
221} 221}
222 222
223/*
224 * Write out and wait upon all dirty data associated with this
225 * superblock. Filesystem data as well as the underlying block
226 * device. Takes the superblock lock. Requires a second blkdev
227 * flush by the caller to complete the operation.
228 */
229void __fsync_super(struct super_block *sb)
230{
231 sync_inodes_sb(sb, 0);
232 DQUOT_SYNC(sb);
233 lock_super(sb);
234 if (sb->s_dirt && sb->s_op->write_super)
235 sb->s_op->write_super(sb);
236 unlock_super(sb);
237 if (sb->s_op->sync_fs)
238 sb->s_op->sync_fs(sb, 1);
239 sync_blockdev(sb->s_bdev);
240 sync_inodes_sb(sb, 1);
241}
242
243/*
244 * Write out and wait upon all dirty data associated with this
245 * superblock. Filesystem data as well as the underlying block
246 * device. Takes the superblock lock.
247 */
248int fsync_super(struct super_block *sb)
249{
250 __fsync_super(sb);
251 return sync_blockdev(sb->s_bdev);
252}
253
223/** 254/**
224 * generic_shutdown_super - common helper for ->kill_sb() 255 * generic_shutdown_super - common helper for ->kill_sb()
225 * @sb: superblock to kill 256 * @sb: superblock to kill
@@ -540,8 +571,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
540{ 571{
541 int retval; 572 int retval;
542 573
574#ifdef CONFIG_BLOCK
543 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) 575 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
544 return -EACCES; 576 return -EACCES;
577#endif
545 if (flags & MS_RDONLY) 578 if (flags & MS_RDONLY)
546 acct_auto_close(sb); 579 acct_auto_close(sb);
547 shrink_dcache_sb(sb); 580 shrink_dcache_sb(sb);
@@ -661,6 +694,7 @@ void kill_litter_super(struct super_block *sb)
661 694
662EXPORT_SYMBOL(kill_litter_super); 695EXPORT_SYMBOL(kill_litter_super);
663 696
697#ifdef CONFIG_BLOCK
664static int set_bdev_super(struct super_block *s, void *data) 698static int set_bdev_super(struct super_block *s, void *data)
665{ 699{
666 s->s_bdev = data; 700 s->s_bdev = data;
@@ -756,6 +790,7 @@ void kill_block_super(struct super_block *sb)
756} 790}
757 791
758EXPORT_SYMBOL(kill_block_super); 792EXPORT_SYMBOL(kill_block_super);
793#endif
759 794
760int get_sb_nodev(struct file_system_type *fs_type, 795int get_sb_nodev(struct file_system_type *fs_type,
761 int flags, void *data, 796 int flags, void *data,
diff --git a/fs/sync.c b/fs/sync.c
index 955aef04da28..1de747b5ddb9 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -10,11 +10,124 @@
10#include <linux/syscalls.h> 10#include <linux/syscalls.h>
11#include <linux/linkage.h> 11#include <linux/linkage.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/quotaops.h>
14#include <linux/buffer_head.h>
13 15
14#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ 16#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
15 SYNC_FILE_RANGE_WAIT_AFTER) 17 SYNC_FILE_RANGE_WAIT_AFTER)
16 18
17/* 19/*
20 * sync everything. Start out by waking pdflush, because that writes back
21 * all queues in parallel.
22 */
23static void do_sync(unsigned long wait)
24{
25 wakeup_pdflush(0);
26 sync_inodes(0); /* All mappings, inodes and their blockdevs */
27 DQUOT_SYNC(NULL);
28 sync_supers(); /* Write the superblocks */
29 sync_filesystems(0); /* Start syncing the filesystems */
30 sync_filesystems(wait); /* Waitingly sync the filesystems */
31 sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */
32 if (!wait)
33 printk("Emergency Sync complete\n");
34 if (unlikely(laptop_mode))
35 laptop_sync_completion();
36}
37
38asmlinkage long sys_sync(void)
39{
40 do_sync(1);
41 return 0;
42}
43
44void emergency_sync(void)
45{
46 pdflush_operation(do_sync, 0);
47}
48
49/*
50 * Generic function to fsync a file.
51 *
52 * filp may be NULL if called via the msync of a vma.
53 */
54int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
55{
56 struct inode * inode = dentry->d_inode;
57 struct super_block * sb;
58 int ret, err;
59
60 /* sync the inode to buffers */
61 ret = write_inode_now(inode, 0);
62
63 /* sync the superblock to buffers */
64 sb = inode->i_sb;
65 lock_super(sb);
66 if (sb->s_op->write_super)
67 sb->s_op->write_super(sb);
68 unlock_super(sb);
69
70 /* .. finally sync the buffers to disk */
71 err = sync_blockdev(sb->s_bdev);
72 if (!ret)
73 ret = err;
74 return ret;
75}
76
77long do_fsync(struct file *file, int datasync)
78{
79 int ret;
80 int err;
81 struct address_space *mapping = file->f_mapping;
82
83 if (!file->f_op || !file->f_op->fsync) {
84 /* Why? We can still call filemap_fdatawrite */
85 ret = -EINVAL;
86 goto out;
87 }
88
89 ret = filemap_fdatawrite(mapping);
90
91 /*
92 * We need to protect against concurrent writers, which could cause
93 * livelocks in fsync_buffers_list().
94 */
95 mutex_lock(&mapping->host->i_mutex);
96 err = file->f_op->fsync(file, file->f_dentry, datasync);
97 if (!ret)
98 ret = err;
99 mutex_unlock(&mapping->host->i_mutex);
100 err = filemap_fdatawait(mapping);
101 if (!ret)
102 ret = err;
103out:
104 return ret;
105}
106
107static long __do_fsync(unsigned int fd, int datasync)
108{
109 struct file *file;
110 int ret = -EBADF;
111
112 file = fget(fd);
113 if (file) {
114 ret = do_fsync(file, datasync);
115 fput(file);
116 }
117 return ret;
118}
119
120asmlinkage long sys_fsync(unsigned int fd)
121{
122 return __do_fsync(fd, 0);
123}
124
125asmlinkage long sys_fdatasync(unsigned int fd)
126{
127 return __do_fsync(fd, 1);
128}
129
130/*
18 * sys_sync_file_range() permits finely controlled syncing over a segment of 131 * sys_sync_file_range() permits finely controlled syncing over a segment of
19 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is 132 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
20 * zero then sys_sync_file_range() will operate from offset out to EOF. 133 * zero then sys_sync_file_range() will operate from offset out to EOF.
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 26b364c9d62c..35115bca036e 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,5 +1,6 @@
1config XFS_FS 1config XFS_FS
2 tristate "XFS filesystem support" 2 tristate "XFS filesystem support"
3 depends on BLOCK
3 help 4 help
4 XFS is a high performance journaling filesystem which originated 5 XFS is a high performance journaling filesystem which originated
5 on the SGI IRIX platform. It is completely multi-threaded, can 6 on the SGI IRIX platform. It is completely multi-threaded, can
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 76bdaeab6f62..711c321a7011 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -148,6 +148,7 @@ struct bio {
148#define BIO_RW_BARRIER 2 148#define BIO_RW_BARRIER 2
149#define BIO_RW_FAILFAST 3 149#define BIO_RW_FAILFAST 3
150#define BIO_RW_SYNC 4 150#define BIO_RW_SYNC 4
151#define BIO_RW_META 5
151 152
152/* 153/*
153 * upper 16 bits of bi_rw define the io priority of this bio 154 * upper 16 bits of bi_rw define the io priority of this bio
@@ -178,6 +179,7 @@ struct bio {
178#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) 179#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
179#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) 180#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
180#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) 181#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
182#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
181 183
182/* 184/*
183 * will die 185 * will die
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cfde8b3ee919..1d79b8d4ca6d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1,6 +1,7 @@
1#ifndef _LINUX_BLKDEV_H 1#ifndef _LINUX_BLKDEV_H
2#define _LINUX_BLKDEV_H 2#define _LINUX_BLKDEV_H
3 3
4#include <linux/sched.h>
4#include <linux/major.h> 5#include <linux/major.h>
5#include <linux/genhd.h> 6#include <linux/genhd.h>
6#include <linux/list.h> 7#include <linux/list.h>
@@ -16,6 +17,22 @@
16 17
17#include <asm/scatterlist.h> 18#include <asm/scatterlist.h>
18 19
20#ifdef CONFIG_LBD
21# include <asm/div64.h>
22# define sector_div(a, b) do_div(a, b)
23#else
24# define sector_div(n, b)( \
25{ \
26 int _res; \
27 _res = (n) % (b); \
28 (n) /= (b); \
29 _res; \
30} \
31)
32#endif
33
34#ifdef CONFIG_BLOCK
35
19struct scsi_ioctl_command; 36struct scsi_ioctl_command;
20 37
21struct request_queue; 38struct request_queue;
@@ -90,7 +107,7 @@ struct io_context {
90 atomic_t refcount; 107 atomic_t refcount;
91 struct task_struct *task; 108 struct task_struct *task;
92 109
93 int (*set_ioprio)(struct io_context *, unsigned int); 110 unsigned int ioprio_changed;
94 111
95 /* 112 /*
96 * For request batching 113 * For request batching
@@ -104,8 +121,7 @@ struct io_context {
104 121
105void put_io_context(struct io_context *ioc); 122void put_io_context(struct io_context *ioc);
106void exit_io_context(void); 123void exit_io_context(void);
107struct io_context *current_io_context(gfp_t gfp_flags); 124struct io_context *get_io_context(gfp_t gfp_flags, int node);
108struct io_context *get_io_context(gfp_t gfp_flags);
109void copy_io_context(struct io_context **pdst, struct io_context **psrc); 125void copy_io_context(struct io_context **pdst, struct io_context **psrc);
110void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); 126void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
111 127
@@ -120,6 +136,90 @@ struct request_list {
120 wait_queue_head_t wait[2]; 136 wait_queue_head_t wait[2];
121}; 137};
122 138
139/*
140 * request command types
141 */
142enum rq_cmd_type_bits {
143 REQ_TYPE_FS = 1, /* fs request */
144 REQ_TYPE_BLOCK_PC, /* scsi command */
145 REQ_TYPE_SENSE, /* sense request */
146 REQ_TYPE_PM_SUSPEND, /* suspend request */
147 REQ_TYPE_PM_RESUME, /* resume request */
148 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
149 REQ_TYPE_FLUSH, /* flush request */
150 REQ_TYPE_SPECIAL, /* driver defined type */
151 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
152 /*
153 * for ATA/ATAPI devices. this really doesn't belong here, ide should
154 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
155 * private REQ_LB opcodes to differentiate what type of request this is
156 */
157 REQ_TYPE_ATA_CMD,
158 REQ_TYPE_ATA_TASK,
159 REQ_TYPE_ATA_TASKFILE,
160};
161
162/*
163 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
164 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
165 * SCSI cdb.
166 *
167 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
168 * typically to differentiate REQ_TYPE_SPECIAL requests.
169 *
170 */
171enum {
172 /*
173 * just examples for now
174 */
175 REQ_LB_OP_EJECT = 0x40, /* eject request */
176 REQ_LB_OP_FLUSH = 0x41, /* flush device */
177};
178
179/*
180 * request type modified bits. first three bits match BIO_RW* bits, important
181 */
182enum rq_flag_bits {
183 __REQ_RW, /* not set, read. set, write */
184 __REQ_FAILFAST, /* no low level driver retries */
185 __REQ_SORTED, /* elevator knows about this request */
186 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
187 __REQ_HARDBARRIER, /* may not be passed by drive either */
188 __REQ_FUA, /* forced unit access */
189 __REQ_NOMERGE, /* don't touch this for merging */
190 __REQ_STARTED, /* drive already may have started this one */
191 __REQ_DONTPREP, /* don't call prep for this one */
192 __REQ_QUEUED, /* uses queueing */
193 __REQ_ELVPRIV, /* elevator private data attached */
194 __REQ_FAILED, /* set if the request failed */
195 __REQ_QUIET, /* don't worry about errors */
196 __REQ_PREEMPT, /* set for "ide_preempt" requests */
197 __REQ_ORDERED_COLOR, /* is before or after barrier */
198 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
199 __REQ_ALLOCED, /* request came from our alloc pool */
200 __REQ_RW_META, /* metadata io request */
201 __REQ_NR_BITS, /* stops here */
202};
203
204#define REQ_RW (1 << __REQ_RW)
205#define REQ_FAILFAST (1 << __REQ_FAILFAST)
206#define REQ_SORTED (1 << __REQ_SORTED)
207#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
208#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
209#define REQ_FUA (1 << __REQ_FUA)
210#define REQ_NOMERGE (1 << __REQ_NOMERGE)
211#define REQ_STARTED (1 << __REQ_STARTED)
212#define REQ_DONTPREP (1 << __REQ_DONTPREP)
213#define REQ_QUEUED (1 << __REQ_QUEUED)
214#define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
215#define REQ_FAILED (1 << __REQ_FAILED)
216#define REQ_QUIET (1 << __REQ_QUIET)
217#define REQ_PREEMPT (1 << __REQ_PREEMPT)
218#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
219#define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
220#define REQ_ALLOCED (1 << __REQ_ALLOCED)
221#define REQ_RW_META (1 << __REQ_RW_META)
222
123#define BLK_MAX_CDB 16 223#define BLK_MAX_CDB 16
124 224
125/* 225/*
@@ -129,30 +229,46 @@ struct request {
129 struct list_head queuelist; 229 struct list_head queuelist;
130 struct list_head donelist; 230 struct list_head donelist;
131 231
132 unsigned long flags; /* see REQ_ bits below */ 232 request_queue_t *q;
233
234 unsigned int cmd_flags;
235 enum rq_cmd_type_bits cmd_type;
133 236
134 /* Maintain bio traversal state for part by part I/O submission. 237 /* Maintain bio traversal state for part by part I/O submission.
135 * hard_* are block layer internals, no driver should touch them! 238 * hard_* are block layer internals, no driver should touch them!
136 */ 239 */
137 240
138 sector_t sector; /* next sector to submit */ 241 sector_t sector; /* next sector to submit */
242 sector_t hard_sector; /* next sector to complete */
139 unsigned long nr_sectors; /* no. of sectors left to submit */ 243 unsigned long nr_sectors; /* no. of sectors left to submit */
244 unsigned long hard_nr_sectors; /* no. of sectors left to complete */
140 /* no. of sectors left to submit in the current segment */ 245 /* no. of sectors left to submit in the current segment */
141 unsigned int current_nr_sectors; 246 unsigned int current_nr_sectors;
142 247
143 sector_t hard_sector; /* next sector to complete */
144 unsigned long hard_nr_sectors; /* no. of sectors left to complete */
145 /* no. of sectors left to complete in the current segment */ 248 /* no. of sectors left to complete in the current segment */
146 unsigned int hard_cur_sectors; 249 unsigned int hard_cur_sectors;
147 250
148 struct bio *bio; 251 struct bio *bio;
149 struct bio *biotail; 252 struct bio *biotail;
150 253
254 struct hlist_node hash; /* merge hash */
255 /*
256 * The rb_node is only used inside the io scheduler, requests
257 * are pruned when moved to the dispatch queue. So let the
258 * completion_data share space with the rb_node.
259 */
260 union {
261 struct rb_node rb_node; /* sort/lookup */
262 void *completion_data;
263 };
264
265 /*
266 * two pointers are available for the IO schedulers, if they need
267 * more they have to dynamically allocate it.
268 */
151 void *elevator_private; 269 void *elevator_private;
152 void *completion_data; 270 void *elevator_private2;
153 271
154 int rq_status; /* should split this into a few status bits */
155 int errors;
156 struct gendisk *rq_disk; 272 struct gendisk *rq_disk;
157 unsigned long start_time; 273 unsigned long start_time;
158 274
@@ -170,15 +286,13 @@ struct request {
170 286
171 unsigned short ioprio; 287 unsigned short ioprio;
172 288
289 void *special;
290 char *buffer;
291
173 int tag; 292 int tag;
293 int errors;
174 294
175 int ref_count; 295 int ref_count;
176 request_queue_t *q;
177 struct request_list *rl;
178
179 struct completion *waiting;
180 void *special;
181 char *buffer;
182 296
183 /* 297 /*
184 * when request is used as a packet command carrier 298 * when request is used as a packet command carrier
@@ -195,80 +309,14 @@ struct request {
195 int retries; 309 int retries;
196 310
197 /* 311 /*
198 * completion callback. end_io_data should be folded in with waiting 312 * completion callback.
199 */ 313 */
200 rq_end_io_fn *end_io; 314 rq_end_io_fn *end_io;
201 void *end_io_data; 315 void *end_io_data;
202}; 316};
203 317
204/* 318/*
205 * first three bits match BIO_RW* bits, important 319 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
206 */
207enum rq_flag_bits {
208 __REQ_RW, /* not set, read. set, write */
209 __REQ_FAILFAST, /* no low level driver retries */
210 __REQ_SORTED, /* elevator knows about this request */
211 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
212 __REQ_HARDBARRIER, /* may not be passed by drive either */
213 __REQ_FUA, /* forced unit access */
214 __REQ_CMD, /* is a regular fs rw request */
215 __REQ_NOMERGE, /* don't touch this for merging */
216 __REQ_STARTED, /* drive already may have started this one */
217 __REQ_DONTPREP, /* don't call prep for this one */
218 __REQ_QUEUED, /* uses queueing */
219 __REQ_ELVPRIV, /* elevator private data attached */
220 /*
221 * for ATA/ATAPI devices
222 */
223 __REQ_PC, /* packet command (special) */
224 __REQ_BLOCK_PC, /* queued down pc from block layer */
225 __REQ_SENSE, /* sense retrival */
226
227 __REQ_FAILED, /* set if the request failed */
228 __REQ_QUIET, /* don't worry about errors */
229 __REQ_SPECIAL, /* driver suplied command */
230 __REQ_DRIVE_CMD,
231 __REQ_DRIVE_TASK,
232 __REQ_DRIVE_TASKFILE,
233 __REQ_PREEMPT, /* set for "ide_preempt" requests */
234 __REQ_PM_SUSPEND, /* suspend request */
235 __REQ_PM_RESUME, /* resume request */
236 __REQ_PM_SHUTDOWN, /* shutdown request */
237 __REQ_ORDERED_COLOR, /* is before or after barrier */
238 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
239 __REQ_NR_BITS, /* stops here */
240};
241
242#define REQ_RW (1 << __REQ_RW)
243#define REQ_FAILFAST (1 << __REQ_FAILFAST)
244#define REQ_SORTED (1 << __REQ_SORTED)
245#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
246#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
247#define REQ_FUA (1 << __REQ_FUA)
248#define REQ_CMD (1 << __REQ_CMD)
249#define REQ_NOMERGE (1 << __REQ_NOMERGE)
250#define REQ_STARTED (1 << __REQ_STARTED)
251#define REQ_DONTPREP (1 << __REQ_DONTPREP)
252#define REQ_QUEUED (1 << __REQ_QUEUED)
253#define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
254#define REQ_PC (1 << __REQ_PC)
255#define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC)
256#define REQ_SENSE (1 << __REQ_SENSE)
257#define REQ_FAILED (1 << __REQ_FAILED)
258#define REQ_QUIET (1 << __REQ_QUIET)
259#define REQ_SPECIAL (1 << __REQ_SPECIAL)
260#define REQ_DRIVE_CMD (1 << __REQ_DRIVE_CMD)
261#define REQ_DRIVE_TASK (1 << __REQ_DRIVE_TASK)
262#define REQ_DRIVE_TASKFILE (1 << __REQ_DRIVE_TASKFILE)
263#define REQ_PREEMPT (1 << __REQ_PREEMPT)
264#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND)
265#define REQ_PM_RESUME (1 << __REQ_PM_RESUME)
266#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN)
267#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
268#define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
269
270/*
271 * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
272 * requests. Some step values could eventually be made generic. 320 * requests. Some step values could eventually be made generic.
273 */ 321 */
274struct request_pm_state 322struct request_pm_state
@@ -432,9 +480,6 @@ struct request_queue
432 struct mutex sysfs_lock; 480 struct mutex sysfs_lock;
433}; 481};
434 482
435#define RQ_INACTIVE (-1)
436#define RQ_ACTIVE 1
437
438#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 483#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
439#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 484#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
440#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 485#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
@@ -490,25 +535,34 @@ enum {
490#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 535#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
491#define blk_queue_flushing(q) ((q)->ordseq) 536#define blk_queue_flushing(q) ((q)->ordseq)
492 537
493#define blk_fs_request(rq) ((rq)->flags & REQ_CMD) 538#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
494#define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) 539#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
495#define blk_noretry_request(rq) ((rq)->flags & REQ_FAILFAST) 540#define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL)
496#define blk_rq_started(rq) ((rq)->flags & REQ_STARTED) 541#define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE)
542
543#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST)
544#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
497 545
498#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) 546#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq))
499 547
500#define blk_pm_suspend_request(rq) ((rq)->flags & REQ_PM_SUSPEND) 548#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
501#define blk_pm_resume_request(rq) ((rq)->flags & REQ_PM_RESUME) 549#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
502#define blk_pm_request(rq) \ 550#define blk_pm_request(rq) \
503 ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME)) 551 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
504 552
505#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) 553#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
506#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) 554#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
507#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA) 555#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
508 556
509#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 557#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
510 558
511#define rq_data_dir(rq) ((rq)->flags & 1) 559#define rq_data_dir(rq) ((rq)->cmd_flags & 1)
560
561/*
562 * We regard a request as sync, if it's a READ or a SYNC write.
563 */
564#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
565#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)
512 566
513static inline int blk_queue_full(struct request_queue *q, int rw) 567static inline int blk_queue_full(struct request_queue *q, int rw)
514{ 568{
@@ -541,13 +595,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw)
541#define RQ_NOMERGE_FLAGS \ 595#define RQ_NOMERGE_FLAGS \
542 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 596 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
543#define rq_mergeable(rq) \ 597#define rq_mergeable(rq) \
544 (!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) 598 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
545
546/*
547 * noop, requests are automagically marked as active/inactive by I/O
548 * scheduler -- see elv_next_request
549 */
550#define blk_queue_headactive(q, head_active)
551 599
552/* 600/*
553 * q->prep_rq_fn return values 601 * q->prep_rq_fn return values
@@ -586,11 +634,6 @@ static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio)
586 if ((rq->bio)) \ 634 if ((rq->bio)) \
587 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) 635 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
588 636
589struct sec_size {
590 unsigned block_size;
591 unsigned block_size_bits;
592};
593
594extern int blk_register_queue(struct gendisk *disk); 637extern int blk_register_queue(struct gendisk *disk);
595extern void blk_unregister_queue(struct gendisk *disk); 638extern void blk_unregister_queue(struct gendisk *disk);
596extern void register_disk(struct gendisk *dev); 639extern void register_disk(struct gendisk *dev);
@@ -612,6 +655,7 @@ extern void blk_stop_queue(request_queue_t *q);
612extern void blk_sync_queue(struct request_queue *q); 655extern void blk_sync_queue(struct request_queue *q);
613extern void __blk_stop_queue(request_queue_t *q); 656extern void __blk_stop_queue(request_queue_t *q);
614extern void blk_run_queue(request_queue_t *); 657extern void blk_run_queue(request_queue_t *);
658extern void blk_start_queueing(request_queue_t *);
615extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); 659extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
616extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); 660extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
617extern int blk_rq_unmap_user(struct bio *, unsigned int); 661extern int blk_rq_unmap_user(struct bio *, unsigned int);
@@ -655,16 +699,6 @@ extern void end_that_request_last(struct request *, int);
655extern void end_request(struct request *req, int uptodate); 699extern void end_request(struct request *req, int uptodate);
656extern void blk_complete_request(struct request *); 700extern void blk_complete_request(struct request *);
657 701
658static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
659{
660 if (blk_fs_request(rq))
661 return (nr_bytes >= (rq->hard_nr_sectors << 9));
662 else if (blk_pc_request(rq))
663 return nr_bytes >= rq->data_len;
664
665 return 0;
666}
667
668/* 702/*
669 * end_that_request_first/chunk() takes an uptodate argument. we account 703 * end_that_request_first/chunk() takes an uptodate argument. we account
670 * any value <= as an io error. 0 means -EIO for compatability reasons, 704 * any value <= as an io error. 0 means -EIO for compatability reasons,
@@ -679,21 +713,6 @@ static inline void blkdev_dequeue_request(struct request *req)
679} 713}
680 714
681/* 715/*
682 * This should be in elevator.h, but that requires pulling in rq and q
683 */
684static inline void elv_dispatch_add_tail(struct request_queue *q,
685 struct request *rq)
686{
687 if (q->last_merge == rq)
688 q->last_merge = NULL;
689 q->nr_sorted--;
690
691 q->end_sector = rq_end_sector(rq);
692 q->boundary_rq = rq;
693 list_add_tail(&rq->queuelist, &q->queue_head);
694}
695
696/*
697 * Access functions for manipulating queue properties 716 * Access functions for manipulating queue properties
698 */ 717 */
699extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn, 718extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn,
@@ -737,7 +756,7 @@ extern void blk_put_queue(request_queue_t *);
737 */ 756 */
738#define blk_queue_tag_depth(q) ((q)->queue_tags->busy) 757#define blk_queue_tag_depth(q) ((q)->queue_tags->busy)
739#define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) 758#define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth)
740#define blk_rq_tagged(rq) ((rq)->flags & REQ_QUEUED) 759#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
741extern int blk_queue_start_tag(request_queue_t *, struct request *); 760extern int blk_queue_start_tag(request_queue_t *, struct request *);
742extern struct request *blk_queue_find_tag(request_queue_t *, int); 761extern struct request *blk_queue_find_tag(request_queue_t *, int);
743extern void blk_queue_end_tag(request_queue_t *, struct request *); 762extern void blk_queue_end_tag(request_queue_t *, struct request *);
@@ -787,14 +806,6 @@ static inline int queue_dma_alignment(request_queue_t *q)
787 return retval; 806 return retval;
788} 807}
789 808
790static inline int bdev_dma_aligment(struct block_device *bdev)
791{
792 return queue_dma_alignment(bdev_get_queue(bdev));
793}
794
795#define blk_finished_io(nsects) do { } while (0)
796#define blk_started_io(nsects) do { } while (0)
797
798/* assumes size > 256 */ 809/* assumes size > 256 */
799static inline unsigned int blksize_bits(unsigned int size) 810static inline unsigned int blksize_bits(unsigned int size)
800{ 811{
@@ -824,24 +835,32 @@ struct work_struct;
824int kblockd_schedule_work(struct work_struct *work); 835int kblockd_schedule_work(struct work_struct *work);
825void kblockd_flush(void); 836void kblockd_flush(void);
826 837
827#ifdef CONFIG_LBD
828# include <asm/div64.h>
829# define sector_div(a, b) do_div(a, b)
830#else
831# define sector_div(n, b)( \
832{ \
833 int _res; \
834 _res = (n) % (b); \
835 (n) /= (b); \
836 _res; \
837} \
838)
839#endif
840
841#define MODULE_ALIAS_BLOCKDEV(major,minor) \ 838#define MODULE_ALIAS_BLOCKDEV(major,minor) \
842 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 839 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
843#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 840#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
844 MODULE_ALIAS("block-major-" __stringify(major) "-*") 841 MODULE_ALIAS("block-major-" __stringify(major) "-*")
845 842
846 843
844#else /* CONFIG_BLOCK */
845/*
846 * stubs for when the block layer is configured out
847 */
848#define buffer_heads_over_limit 0
849
850static inline long blk_congestion_wait(int rw, long timeout)
851{
852 return io_schedule_timeout(timeout);
853}
854
855static inline long nr_blockdev_pages(void)
856{
857 return 0;
858}
859
860static inline void exit_io_context(void)
861{
862}
863
864#endif /* CONFIG_BLOCK */
865
847#endif 866#endif
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7520cc1ff9e2..b99a714fcac6 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -20,6 +20,7 @@ enum blktrace_cat {
20 BLK_TC_PC = 1 << 9, /* pc requests */ 20 BLK_TC_PC = 1 << 9, /* pc requests */
21 BLK_TC_NOTIFY = 1 << 10, /* special message */ 21 BLK_TC_NOTIFY = 1 << 10, /* special message */
22 BLK_TC_AHEAD = 1 << 11, /* readahead */ 22 BLK_TC_AHEAD = 1 << 11, /* readahead */
23 BLK_TC_META = 1 << 12, /* metadata */
23 24
24 BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ 25 BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
25}; 26};
@@ -148,7 +149,7 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
148 u32 what) 149 u32 what)
149{ 150{
150 struct blk_trace *bt = q->blk_trace; 151 struct blk_trace *bt = q->blk_trace;
151 int rw = rq->flags & 0x03; 152 int rw = rq->cmd_flags & 0x03;
152 153
153 if (likely(!bt)) 154 if (likely(!bt))
154 return; 155 return;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 737e407d0cd1..131ffd37e716 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -14,6 +14,8 @@
14#include <linux/wait.h> 14#include <linux/wait.h>
15#include <asm/atomic.h> 15#include <asm/atomic.h>
16 16
17#ifdef CONFIG_BLOCK
18
17enum bh_state_bits { 19enum bh_state_bits {
18 BH_Uptodate, /* Contains valid data */ 20 BH_Uptodate, /* Contains valid data */
19 BH_Dirty, /* Is dirty */ 21 BH_Dirty, /* Is dirty */
@@ -190,9 +192,7 @@ extern int buffer_heads_over_limit;
190 * Generic address_space_operations implementations for buffer_head-backed 192 * Generic address_space_operations implementations for buffer_head-backed
191 * address_spaces. 193 * address_spaces.
192 */ 194 */
193int try_to_release_page(struct page * page, gfp_t gfp_mask);
194void block_invalidatepage(struct page *page, unsigned long offset); 195void block_invalidatepage(struct page *page, unsigned long offset);
195void do_invalidatepage(struct page *page, unsigned long offset);
196int block_write_full_page(struct page *page, get_block_t *get_block, 196int block_write_full_page(struct page *page, get_block_t *get_block,
197 struct writeback_control *wbc); 197 struct writeback_control *wbc);
198int block_read_full_page(struct page*, get_block_t*); 198int block_read_full_page(struct page*, get_block_t*);
@@ -302,4 +302,19 @@ static inline void lock_buffer(struct buffer_head *bh)
302 __lock_buffer(bh); 302 __lock_buffer(bh);
303} 303}
304 304
305extern int __set_page_dirty_buffers(struct page *page);
306
307#else /* CONFIG_BLOCK */
308
309static inline void buffer_init(void) {}
310static inline int try_to_free_buffers(struct page *page) { return 1; }
311static inline int sync_blockdev(struct block_device *bdev) { return 0; }
312static inline int inode_has_buffers(struct inode *inode) { return 0; }
313static inline void invalidate_inode_buffers(struct inode *inode) {}
314static inline int remove_inode_buffers(struct inode *inode) { return 1; }
315static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
316static inline void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) {}
317
318
319#endif /* CONFIG_BLOCK */
305#endif /* _LINUX_BUFFER_HEAD_H */ 320#endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index bea0255196c4..d61ef5951538 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -90,6 +90,7 @@ COMPATIBLE_IOCTL(FDTWADDLE)
90COMPATIBLE_IOCTL(FDFMTTRK) 90COMPATIBLE_IOCTL(FDFMTTRK)
91COMPATIBLE_IOCTL(FDRAWCMD) 91COMPATIBLE_IOCTL(FDRAWCMD)
92/* 0x12 */ 92/* 0x12 */
93#ifdef CONFIG_BLOCK
93COMPATIBLE_IOCTL(BLKRASET) 94COMPATIBLE_IOCTL(BLKRASET)
94COMPATIBLE_IOCTL(BLKROSET) 95COMPATIBLE_IOCTL(BLKROSET)
95COMPATIBLE_IOCTL(BLKROGET) 96COMPATIBLE_IOCTL(BLKROGET)
@@ -103,6 +104,7 @@ COMPATIBLE_IOCTL(BLKTRACESETUP)
103COMPATIBLE_IOCTL(BLKTRACETEARDOWN) 104COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
104ULONG_IOCTL(BLKRASET) 105ULONG_IOCTL(BLKRASET)
105ULONG_IOCTL(BLKFRASET) 106ULONG_IOCTL(BLKFRASET)
107#endif
106/* RAID */ 108/* RAID */
107COMPATIBLE_IOCTL(RAID_VERSION) 109COMPATIBLE_IOCTL(RAID_VERSION)
108COMPATIBLE_IOCTL(GET_ARRAY_INFO) 110COMPATIBLE_IOCTL(GET_ARRAY_INFO)
@@ -395,12 +397,6 @@ COMPATIBLE_IOCTL(DVD_WRITE_STRUCT)
395COMPATIBLE_IOCTL(DVD_AUTH) 397COMPATIBLE_IOCTL(DVD_AUTH)
396/* pktcdvd */ 398/* pktcdvd */
397COMPATIBLE_IOCTL(PACKET_CTRL_CMD) 399COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
398/* Big L */
399ULONG_IOCTL(LOOP_SET_FD)
400ULONG_IOCTL(LOOP_CHANGE_FD)
401COMPATIBLE_IOCTL(LOOP_CLR_FD)
402COMPATIBLE_IOCTL(LOOP_GET_STATUS64)
403COMPATIBLE_IOCTL(LOOP_SET_STATUS64)
404/* Big A */ 400/* Big A */
405/* sparc only */ 401/* sparc only */
406/* Big Q for sound/OSS */ 402/* Big Q for sound/OSS */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1713ace808bf..b3370ef5164d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -1,12 +1,16 @@
1#ifndef _LINUX_ELEVATOR_H 1#ifndef _LINUX_ELEVATOR_H
2#define _LINUX_ELEVATOR_H 2#define _LINUX_ELEVATOR_H
3 3
4#include <linux/percpu.h>
5
6#ifdef CONFIG_BLOCK
7
4typedef int (elevator_merge_fn) (request_queue_t *, struct request **, 8typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
5 struct bio *); 9 struct bio *);
6 10
7typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *); 11typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *);
8 12
9typedef void (elevator_merged_fn) (request_queue_t *, struct request *); 13typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int);
10 14
11typedef int (elevator_dispatch_fn) (request_queue_t *, int); 15typedef int (elevator_dispatch_fn) (request_queue_t *, int);
12 16
@@ -14,9 +18,9 @@ typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
14typedef int (elevator_queue_empty_fn) (request_queue_t *); 18typedef int (elevator_queue_empty_fn) (request_queue_t *);
15typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); 19typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
16typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); 20typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
17typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); 21typedef int (elevator_may_queue_fn) (request_queue_t *, int);
18 22
19typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); 23typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, gfp_t);
20typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); 24typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
21typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); 25typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
22typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); 26typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
@@ -82,19 +86,21 @@ struct elevator_queue
82 struct kobject kobj; 86 struct kobject kobj;
83 struct elevator_type *elevator_type; 87 struct elevator_type *elevator_type;
84 struct mutex sysfs_lock; 88 struct mutex sysfs_lock;
89 struct hlist_head *hash;
85}; 90};
86 91
87/* 92/*
88 * block elevator interface 93 * block elevator interface
89 */ 94 */
90extern void elv_dispatch_sort(request_queue_t *, struct request *); 95extern void elv_dispatch_sort(request_queue_t *, struct request *);
96extern void elv_dispatch_add_tail(request_queue_t *, struct request *);
91extern void elv_add_request(request_queue_t *, struct request *, int, int); 97extern void elv_add_request(request_queue_t *, struct request *, int, int);
92extern void __elv_add_request(request_queue_t *, struct request *, int, int); 98extern void __elv_add_request(request_queue_t *, struct request *, int, int);
93extern void elv_insert(request_queue_t *, struct request *, int); 99extern void elv_insert(request_queue_t *, struct request *, int);
94extern int elv_merge(request_queue_t *, struct request **, struct bio *); 100extern int elv_merge(request_queue_t *, struct request **, struct bio *);
95extern void elv_merge_requests(request_queue_t *, struct request *, 101extern void elv_merge_requests(request_queue_t *, struct request *,
96 struct request *); 102 struct request *);
97extern void elv_merged_request(request_queue_t *, struct request *); 103extern void elv_merged_request(request_queue_t *, struct request *, int);
98extern void elv_dequeue_request(request_queue_t *, struct request *); 104extern void elv_dequeue_request(request_queue_t *, struct request *);
99extern void elv_requeue_request(request_queue_t *, struct request *); 105extern void elv_requeue_request(request_queue_t *, struct request *);
100extern int elv_queue_empty(request_queue_t *); 106extern int elv_queue_empty(request_queue_t *);
@@ -103,9 +109,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *);
103extern struct request *elv_latter_request(request_queue_t *, struct request *); 109extern struct request *elv_latter_request(request_queue_t *, struct request *);
104extern int elv_register_queue(request_queue_t *q); 110extern int elv_register_queue(request_queue_t *q);
105extern void elv_unregister_queue(request_queue_t *q); 111extern void elv_unregister_queue(request_queue_t *q);
106extern int elv_may_queue(request_queue_t *, int, struct bio *); 112extern int elv_may_queue(request_queue_t *, int);
107extern void elv_completed_request(request_queue_t *, struct request *); 113extern void elv_completed_request(request_queue_t *, struct request *);
108extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); 114extern int elv_set_request(request_queue_t *, struct request *, gfp_t);
109extern void elv_put_request(request_queue_t *, struct request *); 115extern void elv_put_request(request_queue_t *, struct request *);
110 116
111/* 117/*
@@ -125,6 +131,19 @@ extern void elevator_exit(elevator_t *);
125extern int elv_rq_merge_ok(struct request *, struct bio *); 131extern int elv_rq_merge_ok(struct request *, struct bio *);
126 132
127/* 133/*
134 * Helper functions.
135 */
136extern struct request *elv_rb_former_request(request_queue_t *, struct request *);
137extern struct request *elv_rb_latter_request(request_queue_t *, struct request *);
138
139/*
140 * rb support functions.
141 */
142extern struct request *elv_rb_add(struct rb_root *, struct request *);
143extern void elv_rb_del(struct rb_root *, struct request *);
144extern struct request *elv_rb_find(struct rb_root *, sector_t);
145
146/*
128 * Return values from elevator merger 147 * Return values from elevator merger
129 */ 148 */
130#define ELEVATOR_NO_MERGE 0 149#define ELEVATOR_NO_MERGE 0
@@ -149,5 +168,42 @@ enum {
149}; 168};
150 169
151#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) 170#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors)
171#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
172
173/*
174 * Hack to reuse the donelist list_head as the fifo time holder while
175 * the request is in the io scheduler. Saves an unsigned long in rq.
176 */
177#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next)
178#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp))
179#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
180#define rq_fifo_clear(rq) do { \
181 list_del_init(&(rq)->queuelist); \
182 INIT_LIST_HEAD(&(rq)->donelist); \
183 } while (0)
152 184
185/*
186 * io context count accounting
187 */
188#define elv_ioc_count_mod(name, __val) \
189 do { \
190 preempt_disable(); \
191 __get_cpu_var(name) += (__val); \
192 preempt_enable(); \
193 } while (0)
194
195#define elv_ioc_count_inc(name) elv_ioc_count_mod(name, 1)
196#define elv_ioc_count_dec(name) elv_ioc_count_mod(name, -1)
197
198#define elv_ioc_count_read(name) \
199({ \
200 unsigned long __val = 0; \
201 int __cpu; \
202 smp_wmb(); \
203 for_each_possible_cpu(__cpu) \
204 __val += per_cpu(name, __cpu); \
205 __val; \
206})
207
208#endif /* CONFIG_BLOCK */
153#endif 209#endif
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 33a1aa107329..153d755376a4 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -165,41 +165,49 @@ struct ext2_group_desc
165#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) 165#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
166 166
167/* 167/*
168 * Inode flags 168 * Inode flags (GETFLAGS/SETFLAGS)
169 */ 169 */
170#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */ 170#define EXT2_SECRM_FL FS_SECRM_FL /* Secure deletion */
171#define EXT2_UNRM_FL 0x00000002 /* Undelete */ 171#define EXT2_UNRM_FL FS_UNRM_FL /* Undelete */
172#define EXT2_COMPR_FL 0x00000004 /* Compress file */ 172#define EXT2_COMPR_FL FS_COMPR_FL /* Compress file */
173#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ 173#define EXT2_SYNC_FL FS_SYNC_FL /* Synchronous updates */
174#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ 174#define EXT2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */
175#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ 175#define EXT2_APPEND_FL FS_APPEND_FL /* writes to file may only append */
176#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ 176#define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */
177#define EXT2_NOATIME_FL 0x00000080 /* do not update atime */ 177#define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */
178/* Reserved for compression usage... */ 178/* Reserved for compression usage... */
179#define EXT2_DIRTY_FL 0x00000100 179#define EXT2_DIRTY_FL FS_DIRTY_FL
180#define EXT2_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ 180#define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */
181#define EXT2_NOCOMP_FL 0x00000400 /* Don't compress */ 181#define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */
182#define EXT2_ECOMPR_FL 0x00000800 /* Compression error */ 182#define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */
183/* End compression flags --- maybe not all used */ 183/* End compression flags --- maybe not all used */
184#define EXT2_BTREE_FL 0x00001000 /* btree format dir */ 184#define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */
185#define EXT2_INDEX_FL 0x00001000 /* hash-indexed directory */ 185#define EXT2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */
186#define EXT2_IMAGIC_FL 0x00002000 /* AFS directory */ 186#define EXT2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */
187#define EXT2_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ 187#define EXT2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */
188#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ 188#define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
189#define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ 189#define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
190#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 190#define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
191#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ 191#define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
192 192
193#define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ 193#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
194#define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ 194#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
195 195
196/* 196/*
197 * ioctl commands 197 * ioctl commands
198 */ 198 */
199#define EXT2_IOC_GETFLAGS _IOR('f', 1, long) 199#define EXT2_IOC_GETFLAGS FS_IOC_GETFLAGS
200#define EXT2_IOC_SETFLAGS _IOW('f', 2, long) 200#define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS
201#define EXT2_IOC_GETVERSION _IOR('v', 1, long) 201#define EXT2_IOC_GETVERSION FS_IOC_GETVERSION
202#define EXT2_IOC_SETVERSION _IOW('v', 2, long) 202#define EXT2_IOC_SETVERSION FS_IOC_SETVERSION
203
204/*
205 * ioctl commands in 32 bit emulation
206 */
207#define EXT2_IOC32_GETFLAGS FS_IOC32_GETFLAGS
208#define EXT2_IOC32_SETFLAGS FS_IOC32_SETFLAGS
209#define EXT2_IOC32_GETVERSION FS_IOC32_GETVERSION
210#define EXT2_IOC32_SETVERSION FS_IOC32_SETVERSION
203 211
204/* 212/*
205 * Structure of an inode on the disk 213 * Structure of an inode on the disk
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index cc08f56750da..11cca1bdc0c7 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -216,14 +216,14 @@ struct ext3_new_group_data {
216/* 216/*
217 * ioctl commands 217 * ioctl commands
218 */ 218 */
219#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) 219#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS
220#define EXT3_IOC_SETFLAGS _IOW('f', 2, long) 220#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
221#define EXT3_IOC_GETVERSION _IOR('f', 3, long) 221#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
222#define EXT3_IOC_SETVERSION _IOW('f', 4, long) 222#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
223#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) 223#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
224#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input) 224#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
225#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) 225#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
226#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) 226#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
227#ifdef CONFIG_JBD_DEBUG 227#ifdef CONFIG_JBD_DEBUG
228#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) 228#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
229#endif 229#endif
@@ -231,6 +231,23 @@ struct ext3_new_group_data {
231#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) 231#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
232 232
233/* 233/*
234 * ioctl commands in 32 bit emulation
235 */
236#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS
237#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS
238#define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
239#define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
240#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
241#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
242#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
243#ifdef CONFIG_JBD_DEBUG
244#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
245#endif
246#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
247#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
248
249
250/*
234 * Mount options 251 * Mount options
235 */ 252 */
236struct ext3_mount_options { 253struct ext3_mount_options {
@@ -812,6 +829,7 @@ extern void ext3_set_aops(struct inode *inode);
812/* ioctl.c */ 829/* ioctl.c */
813extern int ext3_ioctl (struct inode *, struct file *, unsigned int, 830extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
814 unsigned long); 831 unsigned long);
832extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long);
815 833
816/* namei.c */ 834/* namei.c */
817extern int ext3_orphan_add(handle_t *, struct inode *); 835extern int ext3_orphan_add(handle_t *, struct inode *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6eafbe309483..5baf3a153403 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -79,8 +79,8 @@ extern int dir_notify_enable;
79#define WRITE 1 79#define WRITE 1
80#define READA 2 /* read-ahead - don't block if no resources */ 80#define READA 2 /* read-ahead - don't block if no resources */
81#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ 81#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */
82#define SPECIAL 4 /* For non-blockdevice requests in request queue */
83#define READ_SYNC (READ | (1 << BIO_RW_SYNC)) 82#define READ_SYNC (READ | (1 << BIO_RW_SYNC))
83#define READ_META (READ | (1 << BIO_RW_META))
84#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) 84#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
85#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) 85#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
86 86
@@ -217,6 +217,45 @@ extern int dir_notify_enable;
217#define FIBMAP _IO(0x00,1) /* bmap access */ 217#define FIBMAP _IO(0x00,1) /* bmap access */
218#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ 218#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
219 219
220#define FS_IOC_GETFLAGS _IOR('f', 1, long)
221#define FS_IOC_SETFLAGS _IOW('f', 2, long)
222#define FS_IOC_GETVERSION _IOR('v', 1, long)
223#define FS_IOC_SETVERSION _IOW('v', 2, long)
224#define FS_IOC32_GETFLAGS _IOR('f', 1, int)
225#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
226#define FS_IOC32_GETVERSION _IOR('v', 1, int)
227#define FS_IOC32_SETVERSION _IOW('v', 2, int)
228
229/*
230 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
231 */
232#define FS_SECRM_FL 0x00000001 /* Secure deletion */
233#define FS_UNRM_FL 0x00000002 /* Undelete */
234#define FS_COMPR_FL 0x00000004 /* Compress file */
235#define FS_SYNC_FL 0x00000008 /* Synchronous updates */
236#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
237#define FS_APPEND_FL 0x00000020 /* writes to file may only append */
238#define FS_NODUMP_FL 0x00000040 /* do not dump file */
239#define FS_NOATIME_FL 0x00000080 /* do not update atime */
240/* Reserved for compression usage... */
241#define FS_DIRTY_FL 0x00000100
242#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
243#define FS_NOCOMP_FL 0x00000400 /* Don't compress */
244#define FS_ECOMPR_FL 0x00000800 /* Compression error */
245/* End compression flags --- maybe not all used */
246#define FS_BTREE_FL 0x00001000 /* btree format dir */
247#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
248#define FS_IMAGIC_FL 0x00002000 /* AFS directory */
249#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
250#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
251#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
252#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
253#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
254
255#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
256#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
257
258
220#define SYNC_FILE_RANGE_WAIT_BEFORE 1 259#define SYNC_FILE_RANGE_WAIT_BEFORE 1
221#define SYNC_FILE_RANGE_WRITE 2 260#define SYNC_FILE_RANGE_WRITE 2
222#define SYNC_FILE_RANGE_WAIT_AFTER 4 261#define SYNC_FILE_RANGE_WAIT_AFTER 4
@@ -1443,6 +1482,7 @@ extern void __init vfs_caches_init(unsigned long);
1443extern void putname(const char *name); 1482extern void putname(const char *name);
1444#endif 1483#endif
1445 1484
1485#ifdef CONFIG_BLOCK
1446extern int register_blkdev(unsigned int, const char *); 1486extern int register_blkdev(unsigned int, const char *);
1447extern int unregister_blkdev(unsigned int, const char *); 1487extern int unregister_blkdev(unsigned int, const char *);
1448extern struct block_device *bdget(dev_t); 1488extern struct block_device *bdget(dev_t);
@@ -1451,11 +1491,15 @@ extern void bd_forget(struct inode *inode);
1451extern void bdput(struct block_device *); 1491extern void bdput(struct block_device *);
1452extern struct block_device *open_by_devnum(dev_t, unsigned); 1492extern struct block_device *open_by_devnum(dev_t, unsigned);
1453extern struct block_device *open_partition_by_devnum(dev_t, unsigned); 1493extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
1454extern const struct file_operations def_blk_fops;
1455extern const struct address_space_operations def_blk_aops; 1494extern const struct address_space_operations def_blk_aops;
1495#else
1496static inline void bd_forget(struct inode *inode) {}
1497#endif
1498extern const struct file_operations def_blk_fops;
1456extern const struct file_operations def_chr_fops; 1499extern const struct file_operations def_chr_fops;
1457extern const struct file_operations bad_sock_fops; 1500extern const struct file_operations bad_sock_fops;
1458extern const struct file_operations def_fifo_fops; 1501extern const struct file_operations def_fifo_fops;
1502#ifdef CONFIG_BLOCK
1459extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 1503extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
1460extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); 1504extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
1461extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 1505extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
@@ -1471,6 +1515,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
1471#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) 1515#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder)
1472#define bd_release_from_disk(bdev, disk) bd_release(bdev) 1516#define bd_release_from_disk(bdev, disk) bd_release(bdev)
1473#endif 1517#endif
1518#endif
1474 1519
1475/* fs/char_dev.c */ 1520/* fs/char_dev.c */
1476#define CHRDEV_MAJOR_HASH_SIZE 255 1521#define CHRDEV_MAJOR_HASH_SIZE 255
@@ -1484,14 +1529,19 @@ extern int chrdev_open(struct inode *, struct file *);
1484extern void chrdev_show(struct seq_file *,off_t); 1529extern void chrdev_show(struct seq_file *,off_t);
1485 1530
1486/* fs/block_dev.c */ 1531/* fs/block_dev.c */
1487#define BLKDEV_MAJOR_HASH_SIZE 255
1488#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 1532#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
1533
1534#ifdef CONFIG_BLOCK
1535#define BLKDEV_MAJOR_HASH_SIZE 255
1489extern const char *__bdevname(dev_t, char *buffer); 1536extern const char *__bdevname(dev_t, char *buffer);
1490extern const char *bdevname(struct block_device *bdev, char *buffer); 1537extern const char *bdevname(struct block_device *bdev, char *buffer);
1491extern struct block_device *lookup_bdev(const char *); 1538extern struct block_device *lookup_bdev(const char *);
1492extern struct block_device *open_bdev_excl(const char *, int, void *); 1539extern struct block_device *open_bdev_excl(const char *, int, void *);
1493extern void close_bdev_excl(struct block_device *); 1540extern void close_bdev_excl(struct block_device *);
1494extern void blkdev_show(struct seq_file *,off_t); 1541extern void blkdev_show(struct seq_file *,off_t);
1542#else
1543#define BLKDEV_MAJOR_HASH_SIZE 0
1544#endif
1495 1545
1496extern void init_special_inode(struct inode *, umode_t, dev_t); 1546extern void init_special_inode(struct inode *, umode_t, dev_t);
1497 1547
@@ -1505,6 +1555,7 @@ extern const struct file_operations rdwr_fifo_fops;
1505 1555
1506extern int fs_may_remount_ro(struct super_block *); 1556extern int fs_may_remount_ro(struct super_block *);
1507 1557
1558#ifdef CONFIG_BLOCK
1508/* 1559/*
1509 * return READ, READA, or WRITE 1560 * return READ, READA, or WRITE
1510 */ 1561 */
@@ -1516,9 +1567,10 @@ extern int fs_may_remount_ro(struct super_block *);
1516#define bio_data_dir(bio) ((bio)->bi_rw & 1) 1567#define bio_data_dir(bio) ((bio)->bi_rw & 1)
1517 1568
1518extern int check_disk_change(struct block_device *); 1569extern int check_disk_change(struct block_device *);
1519extern int invalidate_inodes(struct super_block *);
1520extern int __invalidate_device(struct block_device *); 1570extern int __invalidate_device(struct block_device *);
1521extern int invalidate_partition(struct gendisk *, int); 1571extern int invalidate_partition(struct gendisk *, int);
1572#endif
1573extern int invalidate_inodes(struct super_block *);
1522unsigned long invalidate_mapping_pages(struct address_space *mapping, 1574unsigned long invalidate_mapping_pages(struct address_space *mapping,
1523 pgoff_t start, pgoff_t end); 1575 pgoff_t start, pgoff_t end);
1524unsigned long invalidate_inode_pages(struct address_space *mapping); 1576unsigned long invalidate_inode_pages(struct address_space *mapping);
@@ -1546,11 +1598,14 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
1546extern long do_fsync(struct file *file, int datasync); 1598extern long do_fsync(struct file *file, int datasync);
1547extern void sync_supers(void); 1599extern void sync_supers(void);
1548extern void sync_filesystems(int wait); 1600extern void sync_filesystems(int wait);
1601extern void __fsync_super(struct super_block *sb);
1549extern void emergency_sync(void); 1602extern void emergency_sync(void);
1550extern void emergency_remount(void); 1603extern void emergency_remount(void);
1551extern int do_remount_sb(struct super_block *sb, int flags, 1604extern int do_remount_sb(struct super_block *sb, int flags,
1552 void *data, int force); 1605 void *data, int force);
1606#ifdef CONFIG_BLOCK
1553extern sector_t bmap(struct inode *, sector_t); 1607extern sector_t bmap(struct inode *, sector_t);
1608#endif
1554extern int notify_change(struct dentry *, struct iattr *); 1609extern int notify_change(struct dentry *, struct iattr *);
1555extern int permission(struct inode *, int, struct nameidata *); 1610extern int permission(struct inode *, int, struct nameidata *);
1556extern int generic_permission(struct inode *, int, 1611extern int generic_permission(struct inode *, int,
@@ -1633,9 +1688,11 @@ static inline void insert_inode_hash(struct inode *inode) {
1633extern struct file * get_empty_filp(void); 1688extern struct file * get_empty_filp(void);
1634extern void file_move(struct file *f, struct list_head *list); 1689extern void file_move(struct file *f, struct list_head *list);
1635extern void file_kill(struct file *f); 1690extern void file_kill(struct file *f);
1691#ifdef CONFIG_BLOCK
1636struct bio; 1692struct bio;
1637extern void submit_bio(int, struct bio *); 1693extern void submit_bio(int, struct bio *);
1638extern int bdev_read_only(struct block_device *); 1694extern int bdev_read_only(struct block_device *);
1695#endif
1639extern int set_blocksize(struct block_device *, int); 1696extern int set_blocksize(struct block_device *, int);
1640extern int sb_set_blocksize(struct super_block *, int); 1697extern int sb_set_blocksize(struct super_block *, int);
1641extern int sb_min_blocksize(struct super_block *, int); 1698extern int sb_min_blocksize(struct super_block *, int);
@@ -1716,6 +1773,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
1716 actor); 1773 actor);
1717} 1774}
1718 1775
1776#ifdef CONFIG_BLOCK
1719ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1777ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1720 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1778 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1721 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1779 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
@@ -1753,6 +1811,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
1753 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 1811 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
1754 nr_segs, get_block, end_io, DIO_OWN_LOCKING); 1812 nr_segs, get_block, end_io, DIO_OWN_LOCKING);
1755} 1813}
1814#endif
1756 1815
1757extern const struct file_operations generic_ro_fops; 1816extern const struct file_operations generic_ro_fops;
1758 1817
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e4af57e87c17..41f276fdd185 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -11,6 +11,8 @@
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13 13
14#ifdef CONFIG_BLOCK
15
14enum { 16enum {
15/* These three have identical behaviour; use the second one if DOS FDISK gets 17/* These three have identical behaviour; use the second one if DOS FDISK gets
16 confused about extended/logical partitions starting past cylinder 1023. */ 18 confused about extended/logical partitions starting past cylinder 1023. */
@@ -420,3 +422,5 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index)
420#endif 422#endif
421 423
422#endif 424#endif
425
426#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b703b6d4358..4edf1934e5ca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -743,7 +743,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
743 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 743 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
744void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long); 744void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
745 745
746int __set_page_dirty_buffers(struct page *page); 746extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
747extern void do_invalidatepage(struct page *page, unsigned long offset);
748
747int __set_page_dirty_nobuffers(struct page *page); 749int __set_page_dirty_nobuffers(struct page *page);
748int redirty_page_for_writepage(struct writeback_control *wbc, 750int redirty_page_for_writepage(struct writeback_control *wbc,
749 struct page *page); 751 struct page *page);
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 3ca880463c47..cc5fb75af78a 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -9,6 +9,7 @@
9 * (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do 9 * (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do
10 * nested includes. Get it right in the .c file). 10 * nested includes. Get it right in the .c file).
11 */ 11 */
12#ifdef CONFIG_BLOCK
12 13
13struct writeback_control; 14struct writeback_control;
14typedef int (writepage_t)(struct page *page, struct writeback_control *wbc); 15typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
@@ -21,8 +22,4 @@ int mpage_writepages(struct address_space *mapping,
21int mpage_writepage(struct page *page, get_block_t *get_block, 22int mpage_writepage(struct page *page, get_block_t *get_block,
22 struct writeback_control *wbc); 23 struct writeback_control *wbc);
23 24
24static inline int 25#endif
25generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
26{
27 return mpage_writepages(mapping, wbc, NULL);
28}
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index eb3e547c8fee..c588709acbbc 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -53,6 +53,8 @@
53#include <linux/raid/md_u.h> 53#include <linux/raid/md_u.h>
54#include <linux/raid/md_k.h> 54#include <linux/raid/md_k.h>
55 55
56#ifdef CONFIG_MD
57
56/* 58/*
57 * Different major versions are not compatible. 59 * Different major versions are not compatible.
58 * Different minor versions are only downward compatible. 60 * Different minor versions are only downward compatible.
@@ -95,5 +97,6 @@ extern void md_new_event(mddev_t *mddev);
95 97
96extern void md_update_sb(mddev_t * mddev); 98extern void md_update_sb(mddev_t * mddev);
97 99
100#endif /* CONFIG_MD */
98#endif 101#endif
99 102
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index d28890295852..920b94fe31fa 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -18,6 +18,8 @@
18/* and dm-bio-list.h is not under include/linux because.... ??? */ 18/* and dm-bio-list.h is not under include/linux because.... ??? */
19#include "../../../drivers/md/dm-bio-list.h" 19#include "../../../drivers/md/dm-bio-list.h"
20 20
21#ifdef CONFIG_BLOCK
22
21#define LEVEL_MULTIPATH (-4) 23#define LEVEL_MULTIPATH (-4)
22#define LEVEL_LINEAR (-1) 24#define LEVEL_LINEAR (-1)
23#define LEVEL_FAULTY (-5) 25#define LEVEL_FAULTY (-5)
@@ -362,5 +364,6 @@ static inline void safe_put_page(struct page *p)
362 if (p) put_page(p); 364 if (p) put_page(p);
363} 365}
364 366
367#endif /* CONFIG_BLOCK */
365#endif 368#endif
366 369
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 00b340ba6612..b160fb18e8d6 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -17,5 +17,6 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
17 17
18extern const struct file_operations ramfs_file_operations; 18extern const struct file_operations ramfs_file_operations;
19extern struct vm_operations_struct generic_file_vm_ops; 19extern struct vm_operations_struct generic_file_vm_ops;
20extern int __init init_rootfs(void);
20 21
21#endif 22#endif
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 8d5382e62c08..344bc3495ddb 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -133,7 +133,7 @@ static inline void rb_set_color(struct rb_node *rb, int color)
133#define rb_entry(ptr, type, member) container_of(ptr, type, member) 133#define rb_entry(ptr, type, member) container_of(ptr, type, member)
134 134
135#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) 135#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
136#define RB_EMPTY_NODE(node) (rb_parent(node) != node) 136#define RB_EMPTY_NODE(node) (rb_parent(node) == node)
137#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) 137#define RB_CLEAR_NODE(node) (rb_set_parent(node, node))
138 138
139extern void rb_insert_color(struct rb_node *, struct rb_root *); 139extern void rb_insert_color(struct rb_node *, struct rb_root *);
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 28493ffaafe7..9c63abffd7b2 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -807,21 +807,19 @@ struct stat_data_v1 {
807#define set_sd_v1_first_direct_byte(sdp,v) \ 807#define set_sd_v1_first_direct_byte(sdp,v) \
808 ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) 808 ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
809 809
810#include <linux/ext2_fs.h>
811
812/* inode flags stored in sd_attrs (nee sd_reserved) */ 810/* inode flags stored in sd_attrs (nee sd_reserved) */
813 811
814/* we want common flags to have the same values as in ext2, 812/* we want common flags to have the same values as in ext2,
815 so chattr(1) will work without problems */ 813 so chattr(1) will work without problems */
816#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL 814#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
817#define REISERFS_APPEND_FL EXT2_APPEND_FL 815#define REISERFS_APPEND_FL FS_APPEND_FL
818#define REISERFS_SYNC_FL EXT2_SYNC_FL 816#define REISERFS_SYNC_FL FS_SYNC_FL
819#define REISERFS_NOATIME_FL EXT2_NOATIME_FL 817#define REISERFS_NOATIME_FL FS_NOATIME_FL
820#define REISERFS_NODUMP_FL EXT2_NODUMP_FL 818#define REISERFS_NODUMP_FL FS_NODUMP_FL
821#define REISERFS_SECRM_FL EXT2_SECRM_FL 819#define REISERFS_SECRM_FL FS_SECRM_FL
822#define REISERFS_UNRM_FL EXT2_UNRM_FL 820#define REISERFS_UNRM_FL FS_UNRM_FL
823#define REISERFS_COMPR_FL EXT2_COMPR_FL 821#define REISERFS_COMPR_FL FS_COMPR_FL
824#define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL 822#define REISERFS_NOTAIL_FL FS_NOTAIL_FL
825 823
826/* persistent flags that file inherits from the parent directory */ 824/* persistent flags that file inherits from the parent directory */
827#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ 825#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \
@@ -2163,15 +2161,24 @@ __u32 r5_hash(const signed char *msg, int len);
2163/* prototypes from ioctl.c */ 2161/* prototypes from ioctl.c */
2164int reiserfs_ioctl(struct inode *inode, struct file *filp, 2162int reiserfs_ioctl(struct inode *inode, struct file *filp,
2165 unsigned int cmd, unsigned long arg); 2163 unsigned int cmd, unsigned long arg);
2164long reiserfs_compat_ioctl(struct file *filp,
2165 unsigned int cmd, unsigned long arg);
2166 2166
2167/* ioctl's command */ 2167/* ioctl's command */
2168#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) 2168#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long)
2169/* define following flags to be the same as in ext2, so that chattr(1), 2169/* define following flags to be the same as in ext2, so that chattr(1),
2170 lsattr(1) will work with us. */ 2170 lsattr(1) will work with us. */
2171#define REISERFS_IOC_GETFLAGS EXT2_IOC_GETFLAGS 2171#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS
2172#define REISERFS_IOC_SETFLAGS EXT2_IOC_SETFLAGS 2172#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS
2173#define REISERFS_IOC_GETVERSION EXT2_IOC_GETVERSION 2173#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION
2174#define REISERFS_IOC_SETVERSION EXT2_IOC_SETVERSION 2174#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION
2175
2176/* the 32 bit compat definitions with int argument */
2177#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
2178#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
2179#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
2180#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
2181#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
2175 2182
2176/* Locking primitives */ 2183/* Locking primitives */
2177/* Right now we are still falling back to (un)lock_kernel, but eventually that 2184/* Right now we are still falling back to (un)lock_kernel, but eventually that
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a06fc89cf6e5..fc4a9873ec10 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -710,7 +710,6 @@ extern unsigned int max_cache_size;
710 710
711 711
712struct io_context; /* See blkdev.h */ 712struct io_context; /* See blkdev.h */
713void exit_io_context(void);
714struct cpuset; 713struct cpuset;
715 714
716#define NGROUPS_SMALL 32 715#define NGROUPS_SMALL 32
diff --git a/include/linux/tty.h b/include/linux/tty.h
index ea4c2605f8da..44091c0db0b4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -307,6 +307,9 @@ extern void tty_ldisc_put(int);
307extern void tty_wakeup(struct tty_struct *tty); 307extern void tty_wakeup(struct tty_struct *tty);
308extern void tty_ldisc_flush(struct tty_struct *tty); 308extern void tty_ldisc_flush(struct tty_struct *tty);
309 309
310extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
311 unsigned long arg);
312
310extern struct mutex tty_mutex; 313extern struct mutex tty_mutex;
311 314
312/* n_tty.c */ 315/* n_tty.c */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9d4074ecd0cd..4f4d98addb44 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -111,6 +111,8 @@ balance_dirty_pages_ratelimited(struct address_space *mapping)
111} 111}
112 112
113int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); 113int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
114extern int generic_writepages(struct address_space *mapping,
115 struct writeback_control *wbc);
114int do_writepages(struct address_space *mapping, struct writeback_control *wbc); 116int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
115int sync_page_range(struct inode *inode, struct address_space *mapping, 117int sync_page_range(struct inode *inode, struct address_space *mapping,
116 loff_t pos, loff_t count); 118 loff_t pos, loff_t count);
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index d04d05adfa9b..c247a28259bc 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -6,7 +6,6 @@
6#include <scsi/scsi_device.h> 6#include <scsi/scsi_device.h>
7#include <scsi/scsi_host.h> 7#include <scsi/scsi_host.h>
8 8
9
10#define MSG_SIMPLE_TAG 0x20 9#define MSG_SIMPLE_TAG 0x20
11#define MSG_HEAD_TAG 0x21 10#define MSG_HEAD_TAG 0x21
12#define MSG_ORDERED_TAG 0x22 11#define MSG_ORDERED_TAG 0x22
@@ -14,6 +13,7 @@
14#define SCSI_NO_TAG (-1) /* identify no tag in use */ 13#define SCSI_NO_TAG (-1) /* identify no tag in use */
15 14
16 15
16#ifdef CONFIG_BLOCK
17 17
18/** 18/**
19 * scsi_get_tag_type - get the type of tag the device supports 19 * scsi_get_tag_type - get the type of tag the device supports
@@ -100,7 +100,7 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
100 struct scsi_device *sdev = cmd->device; 100 struct scsi_device *sdev = cmd->device;
101 101
102 if (blk_rq_tagged(req)) { 102 if (blk_rq_tagged(req)) {
103 if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER) 103 if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
104 *msg++ = MSG_ORDERED_TAG; 104 *msg++ = MSG_ORDERED_TAG;
105 else 105 else
106 *msg++ = MSG_SIMPLE_TAG; 106 *msg++ = MSG_SIMPLE_TAG;
@@ -144,4 +144,5 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
144 return shost->bqt ? 0 : -ENOMEM; 144 return shost->bqt ? 0 : -ENOMEM;
145} 145}
146 146
147#endif /* CONFIG_BLOCK */
147#endif /* _SCSI_SCSI_TCQ_H */ 148#endif /* _SCSI_SCSI_TCQ_H */
diff --git a/init/Kconfig b/init/Kconfig
index 4381006dd666..d2eb7a84a264 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,7 +92,7 @@ config LOCALVERSION_AUTO
92 92
93config SWAP 93config SWAP
94 bool "Support for paging of anonymous memory (swap)" 94 bool "Support for paging of anonymous memory (swap)"
95 depends on MMU 95 depends on MMU && BLOCK
96 default y 96 default y
97 help 97 help
98 This option allows you to choose whether you want to have support 98 This option allows you to choose whether you want to have support
diff --git a/init/do_mounts.c b/init/do_mounts.c
index b290aadb1d3f..dc1ec0803ef9 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -285,7 +285,11 @@ void __init mount_block_root(char *name, int flags)
285{ 285{
286 char *fs_names = __getname(); 286 char *fs_names = __getname();
287 char *p; 287 char *p;
288#ifdef CONFIG_BLOCK
288 char b[BDEVNAME_SIZE]; 289 char b[BDEVNAME_SIZE];
290#else
291 const char *b = name;
292#endif
289 293
290 get_fs_names(fs_names); 294 get_fs_names(fs_names);
291retry: 295retry:
@@ -304,7 +308,9 @@ retry:
304 * Allow the user to distinguish between failed sys_open 308 * Allow the user to distinguish between failed sys_open
305 * and bad superblock on root device. 309 * and bad superblock on root device.
306 */ 310 */
311#ifdef CONFIG_BLOCK
307 __bdevname(ROOT_DEV, b); 312 __bdevname(ROOT_DEV, b);
313#endif
308 printk("VFS: Cannot open root device \"%s\" or %s\n", 314 printk("VFS: Cannot open root device \"%s\" or %s\n",
309 root_device_name, b); 315 root_device_name, b);
310 printk("Please append a correct \"root=\" boot option\n"); 316 printk("Please append a correct \"root=\" boot option\n");
@@ -316,7 +322,10 @@ retry:
316 for (p = fs_names; *p; p += strlen(p)+1) 322 for (p = fs_names; *p; p += strlen(p)+1)
317 printk(" %s", p); 323 printk(" %s", p);
318 printk("\n"); 324 printk("\n");
319 panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b)); 325#ifdef CONFIG_BLOCK
326 __bdevname(ROOT_DEV, b);
327#endif
328 panic("VFS: Unable to mount root fs on %s", b);
320out: 329out:
321 putname(fs_names); 330 putname(fs_names);
322} 331}
@@ -387,8 +396,10 @@ void __init mount_root(void)
387 change_floppy("root floppy"); 396 change_floppy("root floppy");
388 } 397 }
389#endif 398#endif
399#ifdef CONFIG_BLOCK
390 create_dev("/dev/root", ROOT_DEV); 400 create_dev("/dev/root", ROOT_DEV);
391 mount_block_root("/dev/root", root_mountflags); 401 mount_block_root("/dev/root", root_mountflags);
402#endif
392} 403}
393 404
394/* 405/*
diff --git a/kernel/compat.c b/kernel/compat.c
index 75573e5d27b0..b4fbd838cd77 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -26,6 +26,8 @@
26 26
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28 28
29extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
30
29int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) 31int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
30{ 32{
31 return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || 33 return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
diff --git a/kernel/exit.c b/kernel/exit.c
index 2e4c13cba95a..c189de2927ab 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -38,6 +38,7 @@
38#include <linux/pipe_fs_i.h> 38#include <linux/pipe_fs_i.h>
39#include <linux/audit.h> /* for audit_free() */ 39#include <linux/audit.h> /* for audit_free() */
40#include <linux/resource.h> 40#include <linux/resource.h>
41#include <linux/blkdev.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/unistd.h> 44#include <asm/unistd.h>
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 6991bece67e8..7a3b2e75f040 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -134,3 +134,8 @@ cond_syscall(sys_madvise);
134cond_syscall(sys_mremap); 134cond_syscall(sys_mremap);
135cond_syscall(sys_remap_file_pages); 135cond_syscall(sys_remap_file_pages);
136cond_syscall(compat_sys_move_pages); 136cond_syscall(compat_sys_move_pages);
137
138/* block-layer dependent */
139cond_syscall(sys_bdflush);
140cond_syscall(sys_ioprio_set);
141cond_syscall(sys_ioprio_get);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1e55ba1c2edf..48499c2d88cc 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -322,6 +322,9 @@ struct rb_node *rb_next(struct rb_node *node)
322{ 322{
323 struct rb_node *parent; 323 struct rb_node *parent;
324 324
325 if (rb_parent(node) == node)
326 return NULL;
327
325 /* If we have a right-hand child, go down and then left as far 328 /* If we have a right-hand child, go down and then left as far
326 as we can. */ 329 as we can. */
327 if (node->rb_right) { 330 if (node->rb_right) {
@@ -348,6 +351,9 @@ struct rb_node *rb_prev(struct rb_node *node)
348{ 351{
349 struct rb_node *parent; 352 struct rb_node *parent;
350 353
354 if (rb_parent(node) == node)
355 return NULL;
356
351 /* If we have a left-hand child, go down and then right as far 357 /* If we have a left-hand child, go down and then right as far
352 as we can. */ 358 as we can. */
353 if (node->rb_left) { 359 if (node->rb_left) {
diff --git a/mm/Makefile b/mm/Makefile
index 6200c6d6afd2..12b3a4eee88d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -12,6 +12,9 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
12 readahead.o swap.o truncate.o vmscan.o \ 12 readahead.o swap.o truncate.o vmscan.o \
13 prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) 13 prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
14 14
15ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
16obj-y += bounce.o
17endif
15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 18obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
16obj-$(CONFIG_HUGETLBFS) += hugetlb.o 19obj-$(CONFIG_HUGETLBFS) += hugetlb.o
17obj-$(CONFIG_NUMA) += mempolicy.o 20obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/bounce.c b/mm/bounce.c
new file mode 100644
index 000000000000..e4b62d2a4024
--- /dev/null
+++ b/mm/bounce.c
@@ -0,0 +1,302 @@
1/* bounce buffer handling for block devices
2 *
3 * - Split from highmem.c
4 */
5
6#include <linux/mm.h>
7#include <linux/module.h>
8#include <linux/swap.h>
9#include <linux/bio.h>
10#include <linux/pagemap.h>
11#include <linux/mempool.h>
12#include <linux/blkdev.h>
13#include <linux/init.h>
14#include <linux/hash.h>
15#include <linux/highmem.h>
16#include <linux/blktrace_api.h>
17#include <asm/tlbflush.h>
18
19#define POOL_SIZE 64
20#define ISA_POOL_SIZE 16
21
22static mempool_t *page_pool, *isa_page_pool;
23
24#ifdef CONFIG_HIGHMEM
25static __init int init_emergency_pool(void)
26{
27 struct sysinfo i;
28 si_meminfo(&i);
29 si_swapinfo(&i);
30
31 if (!i.totalhigh)
32 return 0;
33
34 page_pool = mempool_create_page_pool(POOL_SIZE, 0);
35 BUG_ON(!page_pool);
36 printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
37
38 return 0;
39}
40
41__initcall(init_emergency_pool);
42
43/*
44 * highmem version, map in to vec
45 */
46static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
47{
48 unsigned long flags;
49 unsigned char *vto;
50
51 local_irq_save(flags);
52 vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
53 memcpy(vto + to->bv_offset, vfrom, to->bv_len);
54 kunmap_atomic(vto, KM_BOUNCE_READ);
55 local_irq_restore(flags);
56}
57
58#else /* CONFIG_HIGHMEM */
59
60#define bounce_copy_vec(to, vfrom) \
61 memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
62
63#endif /* CONFIG_HIGHMEM */
64
65/*
66 * allocate pages in the DMA region for the ISA pool
67 */
68static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
69{
70 return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
71}
72
73/*
74 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
75 * as the max address, so check if the pool has already been created.
76 */
77int init_emergency_isa_pool(void)
78{
79 if (isa_page_pool)
80 return 0;
81
82 isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
83 mempool_free_pages, (void *) 0);
84 BUG_ON(!isa_page_pool);
85
86 printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
87 return 0;
88}
89
90/*
91 * Simple bounce buffer support for highmem pages. Depending on the
92 * queue gfp mask set, *to may or may not be a highmem page. kmap it
93 * always, it will do the Right Thing
94 */
95static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
96{
97 unsigned char *vfrom;
98 struct bio_vec *tovec, *fromvec;
99 int i;
100
101 __bio_for_each_segment(tovec, to, i, 0) {
102 fromvec = from->bi_io_vec + i;
103
104 /*
105 * not bounced
106 */
107 if (tovec->bv_page == fromvec->bv_page)
108 continue;
109
110 /*
111 * fromvec->bv_offset and fromvec->bv_len might have been
112 * modified by the block layer, so use the original copy,
113 * bounce_copy_vec already uses tovec->bv_len
114 */
115 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
116
117 flush_dcache_page(tovec->bv_page);
118 bounce_copy_vec(tovec, vfrom);
119 }
120}
121
122static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
123{
124 struct bio *bio_orig = bio->bi_private;
125 struct bio_vec *bvec, *org_vec;
126 int i;
127
128 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
129 set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
130
131 /*
132 * free up bounce indirect pages used
133 */
134 __bio_for_each_segment(bvec, bio, i, 0) {
135 org_vec = bio_orig->bi_io_vec + i;
136 if (bvec->bv_page == org_vec->bv_page)
137 continue;
138
139 dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
140 mempool_free(bvec->bv_page, pool);
141 }
142
143 bio_endio(bio_orig, bio_orig->bi_size, err);
144 bio_put(bio);
145}
146
147static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
148{
149 if (bio->bi_size)
150 return 1;
151
152 bounce_end_io(bio, page_pool, err);
153 return 0;
154}
155
156static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
157{
158 if (bio->bi_size)
159 return 1;
160
161 bounce_end_io(bio, isa_page_pool, err);
162 return 0;
163}
164
165static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
166{
167 struct bio *bio_orig = bio->bi_private;
168
169 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
170 copy_to_high_bio_irq(bio_orig, bio);
171
172 bounce_end_io(bio, pool, err);
173}
174
175static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
176{
177 if (bio->bi_size)
178 return 1;
179
180 __bounce_end_io_read(bio, page_pool, err);
181 return 0;
182}
183
184static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
185{
186 if (bio->bi_size)
187 return 1;
188
189 __bounce_end_io_read(bio, isa_page_pool, err);
190 return 0;
191}
192
193static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
194 mempool_t *pool)
195{
196 struct page *page;
197 struct bio *bio = NULL;
198 int i, rw = bio_data_dir(*bio_orig);
199 struct bio_vec *to, *from;
200
201 bio_for_each_segment(from, *bio_orig, i) {
202 page = from->bv_page;
203
204 /*
205 * is destination page below bounce pfn?
206 */
207 if (page_to_pfn(page) < q->bounce_pfn)
208 continue;
209
210 /*
211 * irk, bounce it
212 */
213 if (!bio)
214 bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
215
216 to = bio->bi_io_vec + i;
217
218 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
219 to->bv_len = from->bv_len;
220 to->bv_offset = from->bv_offset;
221 inc_zone_page_state(to->bv_page, NR_BOUNCE);
222
223 if (rw == WRITE) {
224 char *vto, *vfrom;
225
226 flush_dcache_page(from->bv_page);
227 vto = page_address(to->bv_page) + to->bv_offset;
228 vfrom = kmap(from->bv_page) + from->bv_offset;
229 memcpy(vto, vfrom, to->bv_len);
230 kunmap(from->bv_page);
231 }
232 }
233
234 /*
235 * no pages bounced
236 */
237 if (!bio)
238 return;
239
240 /*
241 * at least one page was bounced, fill in possible non-highmem
242 * pages
243 */
244 __bio_for_each_segment(from, *bio_orig, i, 0) {
245 to = bio_iovec_idx(bio, i);
246 if (!to->bv_page) {
247 to->bv_page = from->bv_page;
248 to->bv_len = from->bv_len;
249 to->bv_offset = from->bv_offset;
250 }
251 }
252
253 bio->bi_bdev = (*bio_orig)->bi_bdev;
254 bio->bi_flags |= (1 << BIO_BOUNCED);
255 bio->bi_sector = (*bio_orig)->bi_sector;
256 bio->bi_rw = (*bio_orig)->bi_rw;
257
258 bio->bi_vcnt = (*bio_orig)->bi_vcnt;
259 bio->bi_idx = (*bio_orig)->bi_idx;
260 bio->bi_size = (*bio_orig)->bi_size;
261
262 if (pool == page_pool) {
263 bio->bi_end_io = bounce_end_io_write;
264 if (rw == READ)
265 bio->bi_end_io = bounce_end_io_read;
266 } else {
267 bio->bi_end_io = bounce_end_io_write_isa;
268 if (rw == READ)
269 bio->bi_end_io = bounce_end_io_read_isa;
270 }
271
272 bio->bi_private = *bio_orig;
273 *bio_orig = bio;
274}
275
276void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
277{
278 mempool_t *pool;
279
280 /*
281 * for non-isa bounce case, just check if the bounce pfn is equal
282 * to or bigger than the highest pfn in the system -- in that case,
283 * don't waste time iterating over bio segments
284 */
285 if (!(q->bounce_gfp & GFP_DMA)) {
286 if (q->bounce_pfn >= blk_max_pfn)
287 return;
288 pool = page_pool;
289 } else {
290 BUG_ON(!isa_page_pool);
291 pool = isa_page_pool;
292 }
293
294 blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
295
296 /*
297 * slow path
298 */
299 __blk_queue_bounce(q, bio_orig, pool);
300}
301
302EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/mm/filemap.c b/mm/filemap.c
index 3277f3b23524..c4fe97f5ace0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
2020 if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) 2020 if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
2021 *count = inode->i_sb->s_maxbytes - *pos; 2021 *count = inode->i_sb->s_maxbytes - *pos;
2022 } else { 2022 } else {
2023#ifdef CONFIG_BLOCK
2023 loff_t isize; 2024 loff_t isize;
2024 if (bdev_read_only(I_BDEV(inode))) 2025 if (bdev_read_only(I_BDEV(inode)))
2025 return -EPERM; 2026 return -EPERM;
@@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
2031 2032
2032 if (*pos + *count > isize) 2033 if (*pos + *count > isize)
2033 *count = isize - *pos; 2034 *count = isize - *pos;
2035#else
2036 return -EPERM;
2037#endif
2034 } 2038 }
2035 return 0; 2039 return 0;
2036} 2040}
@@ -2491,3 +2495,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2491 } 2495 }
2492 return retval; 2496 return retval;
2493} 2497}
2498
2499/**
2500 * try_to_release_page() - release old fs-specific metadata on a page
2501 *
2502 * @page: the page which the kernel is trying to free
2503 * @gfp_mask: memory allocation flags (and I/O mode)
2504 *
2505 * The address_space is to try to release any data against the page
2506 * (presumably at page->private). If the release was successful, return `1'.
2507 * Otherwise return zero.
2508 *
2509 * The @gfp_mask argument specifies whether I/O may be performed to release
2510 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
2511 *
2512 * NOTE: @gfp_mask may go away, and this function may become non-blocking.
2513 */
2514int try_to_release_page(struct page *page, gfp_t gfp_mask)
2515{
2516 struct address_space * const mapping = page->mapping;
2517
2518 BUG_ON(!PageLocked(page));
2519 if (PageWriteback(page))
2520 return 0;
2521
2522 if (mapping && mapping->a_ops->releasepage)
2523 return mapping->a_ops->releasepage(page, gfp_mask);
2524 return try_to_free_buffers(page);
2525}
2526
2527EXPORT_SYMBOL(try_to_release_page);
diff --git a/mm/highmem.c b/mm/highmem.c
index ee5519b176ee..0206e7e5018c 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,13 +29,6 @@
29#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
30#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
31 31
32static mempool_t *page_pool, *isa_page_pool;
33
34static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
35{
36 return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
37}
38
39/* 32/*
40 * Virtual_count is not a pure "count". 33 * Virtual_count is not a pure "count".
41 * 0 means that it is not mapped, and has not been mapped 34 * 0 means that it is not mapped, and has not been mapped
@@ -217,282 +210,8 @@ void fastcall kunmap_high(struct page *page)
217} 210}
218 211
219EXPORT_SYMBOL(kunmap_high); 212EXPORT_SYMBOL(kunmap_high);
220
221#define POOL_SIZE 64
222
223static __init int init_emergency_pool(void)
224{
225 struct sysinfo i;
226 si_meminfo(&i);
227 si_swapinfo(&i);
228
229 if (!i.totalhigh)
230 return 0;
231
232 page_pool = mempool_create_page_pool(POOL_SIZE, 0);
233 BUG_ON(!page_pool);
234 printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
235
236 return 0;
237}
238
239__initcall(init_emergency_pool);
240
241/*
242 * highmem version, map in to vec
243 */
244static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
245{
246 unsigned long flags;
247 unsigned char *vto;
248
249 local_irq_save(flags);
250 vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
251 memcpy(vto + to->bv_offset, vfrom, to->bv_len);
252 kunmap_atomic(vto, KM_BOUNCE_READ);
253 local_irq_restore(flags);
254}
255
256#else /* CONFIG_HIGHMEM */
257
258#define bounce_copy_vec(to, vfrom) \
259 memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
260
261#endif 213#endif
262 214
263#define ISA_POOL_SIZE 16
264
265/*
266 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
267 * as the max address, so check if the pool has already been created.
268 */
269int init_emergency_isa_pool(void)
270{
271 if (isa_page_pool)
272 return 0;
273
274 isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
275 mempool_free_pages, (void *) 0);
276 BUG_ON(!isa_page_pool);
277
278 printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
279 return 0;
280}
281
282/*
283 * Simple bounce buffer support for highmem pages. Depending on the
284 * queue gfp mask set, *to may or may not be a highmem page. kmap it
285 * always, it will do the Right Thing
286 */
287static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
288{
289 unsigned char *vfrom;
290 struct bio_vec *tovec, *fromvec;
291 int i;
292
293 __bio_for_each_segment(tovec, to, i, 0) {
294 fromvec = from->bi_io_vec + i;
295
296 /*
297 * not bounced
298 */
299 if (tovec->bv_page == fromvec->bv_page)
300 continue;
301
302 /*
303 * fromvec->bv_offset and fromvec->bv_len might have been
304 * modified by the block layer, so use the original copy,
305 * bounce_copy_vec already uses tovec->bv_len
306 */
307 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
308
309 flush_dcache_page(tovec->bv_page);
310 bounce_copy_vec(tovec, vfrom);
311 }
312}
313
314static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
315{
316 struct bio *bio_orig = bio->bi_private;
317 struct bio_vec *bvec, *org_vec;
318 int i;
319
320 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
321 set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
322
323 /*
324 * free up bounce indirect pages used
325 */
326 __bio_for_each_segment(bvec, bio, i, 0) {
327 org_vec = bio_orig->bi_io_vec + i;
328 if (bvec->bv_page == org_vec->bv_page)
329 continue;
330
331 dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
332 mempool_free(bvec->bv_page, pool);
333 }
334
335 bio_endio(bio_orig, bio_orig->bi_size, err);
336 bio_put(bio);
337}
338
339static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
340{
341 if (bio->bi_size)
342 return 1;
343
344 bounce_end_io(bio, page_pool, err);
345 return 0;
346}
347
348static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
349{
350 if (bio->bi_size)
351 return 1;
352
353 bounce_end_io(bio, isa_page_pool, err);
354 return 0;
355}
356
357static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
358{
359 struct bio *bio_orig = bio->bi_private;
360
361 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
362 copy_to_high_bio_irq(bio_orig, bio);
363
364 bounce_end_io(bio, pool, err);
365}
366
367static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
368{
369 if (bio->bi_size)
370 return 1;
371
372 __bounce_end_io_read(bio, page_pool, err);
373 return 0;
374}
375
376static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
377{
378 if (bio->bi_size)
379 return 1;
380
381 __bounce_end_io_read(bio, isa_page_pool, err);
382 return 0;
383}
384
385static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
386 mempool_t *pool)
387{
388 struct page *page;
389 struct bio *bio = NULL;
390 int i, rw = bio_data_dir(*bio_orig);
391 struct bio_vec *to, *from;
392
393 bio_for_each_segment(from, *bio_orig, i) {
394 page = from->bv_page;
395
396 /*
397 * is destination page below bounce pfn?
398 */
399 if (page_to_pfn(page) < q->bounce_pfn)
400 continue;
401
402 /*
403 * irk, bounce it
404 */
405 if (!bio)
406 bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
407
408 to = bio->bi_io_vec + i;
409
410 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
411 to->bv_len = from->bv_len;
412 to->bv_offset = from->bv_offset;
413 inc_zone_page_state(to->bv_page, NR_BOUNCE);
414
415 if (rw == WRITE) {
416 char *vto, *vfrom;
417
418 flush_dcache_page(from->bv_page);
419 vto = page_address(to->bv_page) + to->bv_offset;
420 vfrom = kmap(from->bv_page) + from->bv_offset;
421 memcpy(vto, vfrom, to->bv_len);
422 kunmap(from->bv_page);
423 }
424 }
425
426 /*
427 * no pages bounced
428 */
429 if (!bio)
430 return;
431
432 /*
433 * at least one page was bounced, fill in possible non-highmem
434 * pages
435 */
436 __bio_for_each_segment(from, *bio_orig, i, 0) {
437 to = bio_iovec_idx(bio, i);
438 if (!to->bv_page) {
439 to->bv_page = from->bv_page;
440 to->bv_len = from->bv_len;
441 to->bv_offset = from->bv_offset;
442 }
443 }
444
445 bio->bi_bdev = (*bio_orig)->bi_bdev;
446 bio->bi_flags |= (1 << BIO_BOUNCED);
447 bio->bi_sector = (*bio_orig)->bi_sector;
448 bio->bi_rw = (*bio_orig)->bi_rw;
449
450 bio->bi_vcnt = (*bio_orig)->bi_vcnt;
451 bio->bi_idx = (*bio_orig)->bi_idx;
452 bio->bi_size = (*bio_orig)->bi_size;
453
454 if (pool == page_pool) {
455 bio->bi_end_io = bounce_end_io_write;
456 if (rw == READ)
457 bio->bi_end_io = bounce_end_io_read;
458 } else {
459 bio->bi_end_io = bounce_end_io_write_isa;
460 if (rw == READ)
461 bio->bi_end_io = bounce_end_io_read_isa;
462 }
463
464 bio->bi_private = *bio_orig;
465 *bio_orig = bio;
466}
467
468void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
469{
470 mempool_t *pool;
471
472 /*
473 * for non-isa bounce case, just check if the bounce pfn is equal
474 * to or bigger than the highest pfn in the system -- in that case,
475 * don't waste time iterating over bio segments
476 */
477 if (!(q->bounce_gfp & GFP_DMA)) {
478 if (q->bounce_pfn >= blk_max_pfn)
479 return;
480 pool = page_pool;
481 } else {
482 BUG_ON(!isa_page_pool);
483 pool = isa_page_pool;
484 }
485
486 blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
487
488 /*
489 * slow path
490 */
491 __blk_queue_bounce(q, bio_orig, pool);
492}
493
494EXPORT_SYMBOL(blk_queue_bounce);
495
496#if defined(HASHED_PAGE_VIRTUAL) 215#if defined(HASHED_PAGE_VIRTUAL)
497 216
498#define PA_HASH_ORDER 7 217#define PA_HASH_ORDER 7
diff --git a/mm/migrate.c b/mm/migrate.c
index 20a8c2687b1e..ba2453f9483d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping,
409} 409}
410EXPORT_SYMBOL(migrate_page); 410EXPORT_SYMBOL(migrate_page);
411 411
412#ifdef CONFIG_BLOCK
412/* 413/*
413 * Migration function for pages with buffers. This function can only be used 414 * Migration function for pages with buffers. This function can only be used
414 * if the underlying filesystem guarantees that no other references to "page" 415 * if the underlying filesystem guarantees that no other references to "page"
@@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping,
466 return 0; 467 return 0;
467} 468}
468EXPORT_SYMBOL(buffer_migrate_page); 469EXPORT_SYMBOL(buffer_migrate_page);
470#endif
469 471
470/* 472/*
471 * Writeback a page to clean the dirty state 473 * Writeback a page to clean the dirty state
@@ -525,7 +527,7 @@ static int fallback_migrate_page(struct address_space *mapping,
525 * Buffers may be managed in a filesystem specific way. 527 * Buffers may be managed in a filesystem specific way.
526 * We must have no buffers or drop them. 528 * We must have no buffers or drop them.
527 */ 529 */
528 if (page_has_buffers(page) && 530 if (PagePrivate(page) &&
529 !try_to_release_page(page, GFP_KERNEL)) 531 !try_to_release_page(page, GFP_KERNEL))
530 return -EAGAIN; 532 return -EAGAIN;
531 533
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 488b7088557c..c0d4ce144dec 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -30,6 +30,8 @@
30#include <linux/sysctl.h> 30#include <linux/sysctl.h>
31#include <linux/cpu.h> 31#include <linux/cpu.h>
32#include <linux/syscalls.h> 32#include <linux/syscalls.h>
33#include <linux/buffer_head.h>
34#include <linux/pagevec.h>
33 35
34/* 36/*
35 * The maximum number of pages to writeout in a single bdflush/kupdate 37 * The maximum number of pages to writeout in a single bdflush/kupdate
@@ -550,6 +552,139 @@ void __init page_writeback_init(void)
550 register_cpu_notifier(&ratelimit_nb); 552 register_cpu_notifier(&ratelimit_nb);
551} 553}
552 554
555/**
556 * generic_writepages - walk the list of dirty pages of the given
557 * address space and writepage() all of them.
558 *
559 * @mapping: address space structure to write
560 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
561 *
562 * This is a library function, which implements the writepages()
563 * address_space_operation.
564 *
565 * If a page is already under I/O, generic_writepages() skips it, even
566 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
567 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
568 * and msync() need to guarantee that all the data which was dirty at the time
569 * the call was made get new I/O started against them. If wbc->sync_mode is
570 * WB_SYNC_ALL then we were called for data integrity and we must wait for
571 * existing IO to complete.
572 *
573 * Derived from mpage_writepages() - if you fix this you should check that
574 * also!
575 */
576int generic_writepages(struct address_space *mapping,
577 struct writeback_control *wbc)
578{
579 struct backing_dev_info *bdi = mapping->backing_dev_info;
580 int ret = 0;
581 int done = 0;
582 int (*writepage)(struct page *page, struct writeback_control *wbc);
583 struct pagevec pvec;
584 int nr_pages;
585 pgoff_t index;
586 pgoff_t end; /* Inclusive */
587 int scanned = 0;
588 int range_whole = 0;
589
590 if (wbc->nonblocking && bdi_write_congested(bdi)) {
591 wbc->encountered_congestion = 1;
592 return 0;
593 }
594
595 writepage = mapping->a_ops->writepage;
596
597 /* deal with chardevs and other special file */
598 if (!writepage)
599 return 0;
600
601 pagevec_init(&pvec, 0);
602 if (wbc->range_cyclic) {
603 index = mapping->writeback_index; /* Start from prev offset */
604 end = -1;
605 } else {
606 index = wbc->range_start >> PAGE_CACHE_SHIFT;
607 end = wbc->range_end >> PAGE_CACHE_SHIFT;
608 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
609 range_whole = 1;
610 scanned = 1;
611 }
612retry:
613 while (!done && (index <= end) &&
614 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
615 PAGECACHE_TAG_DIRTY,
616 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
617 unsigned i;
618
619 scanned = 1;
620 for (i = 0; i < nr_pages; i++) {
621 struct page *page = pvec.pages[i];
622
623 /*
624 * At this point we hold neither mapping->tree_lock nor
625 * lock on the page itself: the page may be truncated or
626 * invalidated (changing page->mapping to NULL), or even
627 * swizzled back from swapper_space to tmpfs file
628 * mapping
629 */
630 lock_page(page);
631
632 if (unlikely(page->mapping != mapping)) {
633 unlock_page(page);
634 continue;
635 }
636
637 if (!wbc->range_cyclic && page->index > end) {
638 done = 1;
639 unlock_page(page);
640 continue;
641 }
642
643 if (wbc->sync_mode != WB_SYNC_NONE)
644 wait_on_page_writeback(page);
645
646 if (PageWriteback(page) ||
647 !clear_page_dirty_for_io(page)) {
648 unlock_page(page);
649 continue;
650 }
651
652 ret = (*writepage)(page, wbc);
653 if (ret) {
654 if (ret == -ENOSPC)
655 set_bit(AS_ENOSPC, &mapping->flags);
656 else
657 set_bit(AS_EIO, &mapping->flags);
658 }
659
660 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
661 unlock_page(page);
662 if (ret || (--(wbc->nr_to_write) <= 0))
663 done = 1;
664 if (wbc->nonblocking && bdi_write_congested(bdi)) {
665 wbc->encountered_congestion = 1;
666 done = 1;
667 }
668 }
669 pagevec_release(&pvec);
670 cond_resched();
671 }
672 if (!scanned && !done) {
673 /*
674 * We hit the last page and there is more work to be done: wrap
675 * back to the start of the file
676 */
677 scanned = 1;
678 index = 0;
679 goto retry;
680 }
681 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
682 mapping->writeback_index = index;
683 return ret;
684}
685
686EXPORT_SYMBOL(generic_writepages);
687
553int do_writepages(struct address_space *mapping, struct writeback_control *wbc) 688int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
554{ 689{
555 int ret; 690 int ret;
@@ -672,9 +807,11 @@ int fastcall set_page_dirty(struct page *page)
672 807
673 if (likely(mapping)) { 808 if (likely(mapping)) {
674 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; 809 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
675 if (spd) 810#ifdef CONFIG_BLOCK
676 return (*spd)(page); 811 if (!spd)
677 return __set_page_dirty_buffers(page); 812 spd = __set_page_dirty_buffers;
813#endif
814 return (*spd)(page);
678 } 815 }
679 if (!PageDirty(page)) { 816 if (!PageDirty(page)) {
680 if (!TestSetPageDirty(page)) 817 if (!TestSetPageDirty(page))
diff --git a/mm/truncate.c b/mm/truncate.c
index a654928323dc..8fde6580657e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -17,6 +17,32 @@
17 do_invalidatepage */ 17 do_invalidatepage */
18 18
19 19
20/**
21 * do_invalidatepage - invalidate part of all of a page
22 * @page: the page which is affected
23 * @offset: the index of the truncation point
24 *
25 * do_invalidatepage() is called when all or part of the page has become
26 * invalidated by a truncate operation.
27 *
28 * do_invalidatepage() does not have to release all buffers, but it must
29 * ensure that no dirty buffer is left outside @offset and that no I/O
30 * is underway against any of the blocks which are outside the truncation
31 * point. Because the caller is about to free (and possibly reuse) those
32 * blocks on-disk.
33 */
34void do_invalidatepage(struct page *page, unsigned long offset)
35{
36 void (*invalidatepage)(struct page *, unsigned long);
37 invalidatepage = page->mapping->a_ops->invalidatepage;
38#ifdef CONFIG_BLOCK
39 if (!invalidatepage)
40 invalidatepage = block_invalidatepage;
41#endif
42 if (invalidatepage)
43 (*invalidatepage)(page, offset);
44}
45
20static inline void truncate_partial_page(struct page *page, unsigned partial) 46static inline void truncate_partial_page(struct page *page, unsigned partial)
21{ 47{
22 memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); 48 memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);