aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2006-06-13 02:26:10 -0400
committerJens Axboe <axboe@nelson.home.kernel.dk>2006-06-23 11:10:39 -0400
commitb31dc66a54ad986b6b73bdc49c8efc17cbad1833 (patch)
tree5591383c1cbffe11512da889c971f899333f1a44
parent271f18f102c789f59644bb6c53a69da1df72b2f4 (diff)
[PATCH] Kill PF_SYNCWRITE flag
A process flag to indicate whether we are doing sync io is incredibly ugly. It also causes performance problems when one does a lot of async io and then proceeds to sync it. Part of the io will go out as async, and the other part as sync. This causes a disconnect between the previously submitted io and the synced io. For io schedulers such as CFQ, this will cause us lost merges and suboptimal behaviour in scheduling. Remove PF_SYNCWRITE completely from the fsync/msync paths, and let the O_DIRECT path just directly indicate that the writes are sync by using WRITE_SYNC instead. Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r--block/as-iosched.c2
-rw-r--r--block/cfq-iosched.c4
-rw-r--r--block/ll_rw_blk.c3
-rw-r--r--drivers/usb/gadget/file_storage.c2
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/direct-io.c18
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/sched.h11
-rw-r--r--mm/msync.c3
10 files changed, 20 insertions, 29 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 9b13d72ffefa..56c99fa037df 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1339,7 +1339,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
1339 arq->state = AS_RQ_NEW; 1339 arq->state = AS_RQ_NEW;
1340 1340
1341 if (rq_data_dir(arq->request) == READ 1341 if (rq_data_dir(arq->request) == READ
1342 || current->flags&PF_SYNCWRITE) 1342 || (arq->request->flags & REQ_RW_SYNC))
1343 arq->is_sync = 1; 1343 arq->is_sync = 1;
1344 else 1344 else
1345 arq->is_sync = 0; 1345 arq->is_sync = 0;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c88f161d3fb3..4c4e9cc3ae26 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -277,8 +277,6 @@ static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsi
277static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); 277static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
278static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); 278static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
279 279
280#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
281
282/* 280/*
283 * lots of deadline iosched dupes, can be abstracted later... 281 * lots of deadline iosched dupes, can be abstracted later...
284 */ 282 */
@@ -334,7 +332,7 @@ static int cfq_queue_empty(request_queue_t *q)
334 332
335static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) 333static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
336{ 334{
337 if (rw == READ || process_sync(task)) 335 if (rw == READ || rw == WRITE_SYNC)
338 return task->pid; 336 return task->pid;
339 337
340 return CFQ_KEY_ASYNC; 338 return CFQ_KEY_ASYNC;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 17c42ddd31db..2270bb451385 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2827,6 +2827,9 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
2827 if (unlikely(bio_barrier(bio))) 2827 if (unlikely(bio_barrier(bio)))
2828 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 2828 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2829 2829
2830 if (bio_sync(bio))
2831 req->flags |= REQ_RW_SYNC;
2832
2830 req->errors = 0; 2833 req->errors = 0;
2831 req->hard_sector = req->sector = bio->bi_sector; 2834 req->hard_sector = req->sector = bio->bi_sector;
2832 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); 2835 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 6f887478b148..a43dc908ac59 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -1906,7 +1906,6 @@ static int fsync_sub(struct lun *curlun)
1906 1906
1907 inode = filp->f_dentry->d_inode; 1907 inode = filp->f_dentry->d_inode;
1908 mutex_lock(&inode->i_mutex); 1908 mutex_lock(&inode->i_mutex);
1909 current->flags |= PF_SYNCWRITE;
1910 rc = filemap_fdatawrite(inode->i_mapping); 1909 rc = filemap_fdatawrite(inode->i_mapping);
1911 err = filp->f_op->fsync(filp, filp->f_dentry, 1); 1910 err = filp->f_op->fsync(filp, filp->f_dentry, 1);
1912 if (!rc) 1911 if (!rc)
@@ -1914,7 +1913,6 @@ static int fsync_sub(struct lun *curlun)
1914 err = filemap_fdatawait(inode->i_mapping); 1913 err = filemap_fdatawait(inode->i_mapping);
1915 if (!rc) 1914 if (!rc)
1916 rc = err; 1915 rc = err;
1917 current->flags &= ~PF_SYNCWRITE;
1918 mutex_unlock(&inode->i_mutex); 1916 mutex_unlock(&inode->i_mutex);
1919 VLDBG(curlun, "fdatasync -> %d\n", rc); 1917 VLDBG(curlun, "fdatasync -> %d\n", rc);
1920 return rc; 1918 return rc;
diff --git a/fs/buffer.c b/fs/buffer.c
index 23f1f3a68077..373bb6292bdc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -331,7 +331,6 @@ long do_fsync(struct file *file, int datasync)
331 goto out; 331 goto out;
332 } 332 }
333 333
334 current->flags |= PF_SYNCWRITE;
335 ret = filemap_fdatawrite(mapping); 334 ret = filemap_fdatawrite(mapping);
336 335
337 /* 336 /*
@@ -346,7 +345,6 @@ long do_fsync(struct file *file, int datasync)
346 err = filemap_fdatawait(mapping); 345 err = filemap_fdatawait(mapping);
347 if (!ret) 346 if (!ret)
348 ret = err; 347 ret = err;
349 current->flags &= ~PF_SYNCWRITE;
350out: 348out:
351 return ret; 349 return ret;
352} 350}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b05d1b218776..538fb0418fba 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -162,7 +162,7 @@ static int dio_refill_pages(struct dio *dio)
162 NULL); /* vmas */ 162 NULL); /* vmas */
163 up_read(&current->mm->mmap_sem); 163 up_read(&current->mm->mmap_sem);
164 164
165 if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) { 165 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
166 struct page *page = ZERO_PAGE(dio->curr_user_address); 166 struct page *page = ZERO_PAGE(dio->curr_user_address);
167 /* 167 /*
168 * A memory fault, but the filesystem has some outstanding 168 * A memory fault, but the filesystem has some outstanding
@@ -535,7 +535,7 @@ static int get_more_blocks(struct dio *dio)
535 map_bh->b_state = 0; 535 map_bh->b_state = 0;
536 map_bh->b_size = fs_count << dio->inode->i_blkbits; 536 map_bh->b_size = fs_count << dio->inode->i_blkbits;
537 537
538 create = dio->rw == WRITE; 538 create = dio->rw & WRITE;
539 if (dio->lock_type == DIO_LOCKING) { 539 if (dio->lock_type == DIO_LOCKING) {
540 if (dio->block_in_file < (i_size_read(dio->inode) >> 540 if (dio->block_in_file < (i_size_read(dio->inode) >>
541 dio->blkbits)) 541 dio->blkbits))
@@ -867,7 +867,7 @@ do_holes:
867 loff_t i_size_aligned; 867 loff_t i_size_aligned;
868 868
869 /* AKPM: eargh, -ENOTBLK is a hack */ 869 /* AKPM: eargh, -ENOTBLK is a hack */
870 if (dio->rw == WRITE) { 870 if (dio->rw & WRITE) {
871 page_cache_release(page); 871 page_cache_release(page);
872 return -ENOTBLK; 872 return -ENOTBLK;
873 } 873 }
@@ -1045,7 +1045,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1045 } 1045 }
1046 } /* end iovec loop */ 1046 } /* end iovec loop */
1047 1047
1048 if (ret == -ENOTBLK && rw == WRITE) { 1048 if (ret == -ENOTBLK && (rw & WRITE)) {
1049 /* 1049 /*
1050 * The remaining part of the request will be 1050 * The remaining part of the request will be
1051 * be handled by buffered I/O when we return 1051 * be handled by buffered I/O when we return
@@ -1089,7 +1089,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1089 if (dio->is_async) { 1089 if (dio->is_async) {
1090 int should_wait = 0; 1090 int should_wait = 0;
1091 1091
1092 if (dio->result < dio->size && rw == WRITE) { 1092 if (dio->result < dio->size && (rw & WRITE)) {
1093 dio->waiter = current; 1093 dio->waiter = current;
1094 should_wait = 1; 1094 should_wait = 1;
1095 } 1095 }
@@ -1142,7 +1142,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1142 ret = transferred; 1142 ret = transferred;
1143 1143
1144 /* We could have also come here on an AIO file extend */ 1144 /* We could have also come here on an AIO file extend */
1145 if (!is_sync_kiocb(iocb) && rw == WRITE && 1145 if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
1146 ret >= 0 && dio->result == dio->size) 1146 ret >= 0 && dio->result == dio->size)
1147 /* 1147 /*
1148 * For AIO writes where we have completed the 1148 * For AIO writes where we have completed the
@@ -1194,7 +1194,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1194 int acquire_i_mutex = 0; 1194 int acquire_i_mutex = 0;
1195 1195
1196 if (rw & WRITE) 1196 if (rw & WRITE)
1197 current->flags |= PF_SYNCWRITE; 1197 rw = WRITE_SYNC;
1198 1198
1199 if (bdev) 1199 if (bdev)
1200 bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); 1200 bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
@@ -1270,7 +1270,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1270 * even for AIO, we need to wait for i/o to complete before 1270 * even for AIO, we need to wait for i/o to complete before
1271 * returning in this case. 1271 * returning in this case.
1272 */ 1272 */
1273 dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) && 1273 dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
1274 (end > i_size_read(inode))); 1274 (end > i_size_read(inode)));
1275 1275
1276 retval = direct_io_worker(rw, iocb, inode, iov, offset, 1276 retval = direct_io_worker(rw, iocb, inode, iov, offset,
@@ -1284,8 +1284,6 @@ out:
1284 mutex_unlock(&inode->i_mutex); 1284 mutex_unlock(&inode->i_mutex);
1285 else if (acquire_i_mutex) 1285 else if (acquire_i_mutex)
1286 mutex_lock(&inode->i_mutex); 1286 mutex_lock(&inode->i_mutex);
1287 if (rw & WRITE)
1288 current->flags &= ~PF_SYNCWRITE;
1289 return retval; 1287 return retval;
1290} 1288}
1291EXPORT_SYMBOL(__blockdev_direct_IO); 1289EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6db95cf3aaa2..031b27a4bc9a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -623,7 +623,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
623 int need_write_inode_now = 0; 623 int need_write_inode_now = 0;
624 int err2; 624 int err2;
625 625
626 current->flags |= PF_SYNCWRITE;
627 if (what & OSYNC_DATA) 626 if (what & OSYNC_DATA)
628 err = filemap_fdatawrite(mapping); 627 err = filemap_fdatawrite(mapping);
629 if (what & (OSYNC_METADATA|OSYNC_DATA)) { 628 if (what & (OSYNC_METADATA|OSYNC_DATA)) {
@@ -636,7 +635,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
636 if (!err) 635 if (!err)
637 err = err2; 636 err = err2;
638 } 637 }
639 current->flags &= ~PF_SYNCWRITE;
640 638
641 spin_lock(&inode_lock); 639 spin_lock(&inode_lock);
642 if ((inode->i_state & I_DIRTY) && 640 if ((inode->i_state & I_DIRTY) &&
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3457e7b97363..482a21d67627 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -241,6 +241,7 @@ enum rq_flag_bits {
241 __REQ_PM_RESUME, /* resume request */ 241 __REQ_PM_RESUME, /* resume request */
242 __REQ_PM_SHUTDOWN, /* shutdown request */ 242 __REQ_PM_SHUTDOWN, /* shutdown request */
243 __REQ_ORDERED_COLOR, /* is before or after barrier */ 243 __REQ_ORDERED_COLOR, /* is before or after barrier */
244 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
244 __REQ_NR_BITS, /* stops here */ 245 __REQ_NR_BITS, /* stops here */
245}; 246};
246 247
@@ -270,6 +271,7 @@ enum rq_flag_bits {
270#define REQ_PM_RESUME (1 << __REQ_PM_RESUME) 271#define REQ_PM_RESUME (1 << __REQ_PM_RESUME)
271#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) 272#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN)
272#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) 273#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
274#define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
273 275
274/* 276/*
275 * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME 277 * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a9d23c7d1b25..38b4791e6a5d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -941,12 +941,11 @@ static inline void put_task_struct(struct task_struct *t)
941#define PF_KSWAPD 0x00040000 /* I am kswapd */ 941#define PF_KSWAPD 0x00040000 /* I am kswapd */
942#define PF_SWAPOFF 0x00080000 /* I am in swapoff */ 942#define PF_SWAPOFF 0x00080000 /* I am in swapoff */
943#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ 943#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
944#define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ 944#define PF_BORROWED_MM 0x00200000 /* I am a kthread doing use_mm */
945#define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ 945#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
946#define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ 946#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
947#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ 947#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
948#define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */ 948#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
949#define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */
950#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ 949#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
951 950
952/* 951/*
diff --git a/mm/msync.c b/mm/msync.c
index bc6c95376366..d083544df21b 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -170,8 +170,6 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
170 * just ignore them, but return -ENOMEM at the end. 170 * just ignore them, but return -ENOMEM at the end.
171 */ 171 */
172 down_read(&current->mm->mmap_sem); 172 down_read(&current->mm->mmap_sem);
173 if (flags & MS_SYNC)
174 current->flags |= PF_SYNCWRITE;
175 vma = find_vma(current->mm, start); 173 vma = find_vma(current->mm, start);
176 if (!vma) { 174 if (!vma) {
177 error = -ENOMEM; 175 error = -ENOMEM;
@@ -228,7 +226,6 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
228 } 226 }
229 } while (vma && !done); 227 } while (vma && !done);
230out_unlock: 228out_unlock:
231 current->flags &= ~PF_SYNCWRITE;
232 up_read(&current->mm->mmap_sem); 229 up_read(&current->mm->mmap_sem);
233out: 230out:
234 return error; 231 return error;