diff options
author | Jens Axboe <axboe@suse.de> | 2006-06-13 02:26:10 -0400 |
---|---|---|
committer | Jens Axboe <axboe@nelson.home.kernel.dk> | 2006-06-23 11:10:39 -0400 |
commit | b31dc66a54ad986b6b73bdc49c8efc17cbad1833 (patch) | |
tree | 5591383c1cbffe11512da889c971f899333f1a44 | |
parent | 271f18f102c789f59644bb6c53a69da1df72b2f4 (diff) |
[PATCH] Kill PF_SYNCWRITE flag
A process flag to indicate whether we are doing sync io is incredibly
ugly. It also causes performance problems when one does a lot of async
io and then proceeds to sync it. Part of the io will go out as async,
and the other part as sync. This causes a disconnect between the
previously submitted io and the synced io. For io schedulers such as CFQ,
this will cause us lost merges and suboptimal behaviour in scheduling.
Remove PF_SYNCWRITE completely from the fsync/msync paths, and let
the O_DIRECT path just directly indicate that the writes are sync
by using WRITE_SYNC instead.
Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r-- | block/as-iosched.c | 2 | ||||
-rw-r--r-- | block/cfq-iosched.c | 4 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 3 | ||||
-rw-r--r-- | drivers/usb/gadget/file_storage.c | 2 | ||||
-rw-r--r-- | fs/buffer.c | 2 | ||||
-rw-r--r-- | fs/direct-io.c | 18 | ||||
-rw-r--r-- | fs/fs-writeback.c | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 11 | ||||
-rw-r--r-- | mm/msync.c | 3 |
10 files changed, 20 insertions, 29 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c index 9b13d72ffefa..56c99fa037df 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -1339,7 +1339,7 @@ static void as_add_request(request_queue_t *q, struct request *rq) | |||
1339 | arq->state = AS_RQ_NEW; | 1339 | arq->state = AS_RQ_NEW; |
1340 | 1340 | ||
1341 | if (rq_data_dir(arq->request) == READ | 1341 | if (rq_data_dir(arq->request) == READ |
1342 | || current->flags&PF_SYNCWRITE) | 1342 | || (arq->request->flags & REQ_RW_SYNC)) |
1343 | arq->is_sync = 1; | 1343 | arq->is_sync = 1; |
1344 | else | 1344 | else |
1345 | arq->is_sync = 0; | 1345 | arq->is_sync = 0; |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c88f161d3fb3..4c4e9cc3ae26 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -277,8 +277,6 @@ static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsi | |||
277 | static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); | 277 | static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); |
278 | static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); | 278 | static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); |
279 | 279 | ||
280 | #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) | ||
281 | |||
282 | /* | 280 | /* |
283 | * lots of deadline iosched dupes, can be abstracted later... | 281 | * lots of deadline iosched dupes, can be abstracted later... |
284 | */ | 282 | */ |
@@ -334,7 +332,7 @@ static int cfq_queue_empty(request_queue_t *q) | |||
334 | 332 | ||
335 | static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) | 333 | static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) |
336 | { | 334 | { |
337 | if (rw == READ || process_sync(task)) | 335 | if (rw == READ || rw == WRITE_SYNC) |
338 | return task->pid; | 336 | return task->pid; |
339 | 337 | ||
340 | return CFQ_KEY_ASYNC; | 338 | return CFQ_KEY_ASYNC; |
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 17c42ddd31db..2270bb451385 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -2827,6 +2827,9 @@ static void init_request_from_bio(struct request *req, struct bio *bio) | |||
2827 | if (unlikely(bio_barrier(bio))) | 2827 | if (unlikely(bio_barrier(bio))) |
2828 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | 2828 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); |
2829 | 2829 | ||
2830 | if (bio_sync(bio)) | ||
2831 | req->flags |= REQ_RW_SYNC; | ||
2832 | |||
2830 | req->errors = 0; | 2833 | req->errors = 0; |
2831 | req->hard_sector = req->sector = bio->bi_sector; | 2834 | req->hard_sector = req->sector = bio->bi_sector; |
2832 | req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); | 2835 | req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); |
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 6f887478b148..a43dc908ac59 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c | |||
@@ -1906,7 +1906,6 @@ static int fsync_sub(struct lun *curlun) | |||
1906 | 1906 | ||
1907 | inode = filp->f_dentry->d_inode; | 1907 | inode = filp->f_dentry->d_inode; |
1908 | mutex_lock(&inode->i_mutex); | 1908 | mutex_lock(&inode->i_mutex); |
1909 | current->flags |= PF_SYNCWRITE; | ||
1910 | rc = filemap_fdatawrite(inode->i_mapping); | 1909 | rc = filemap_fdatawrite(inode->i_mapping); |
1911 | err = filp->f_op->fsync(filp, filp->f_dentry, 1); | 1910 | err = filp->f_op->fsync(filp, filp->f_dentry, 1); |
1912 | if (!rc) | 1911 | if (!rc) |
@@ -1914,7 +1913,6 @@ static int fsync_sub(struct lun *curlun) | |||
1914 | err = filemap_fdatawait(inode->i_mapping); | 1913 | err = filemap_fdatawait(inode->i_mapping); |
1915 | if (!rc) | 1914 | if (!rc) |
1916 | rc = err; | 1915 | rc = err; |
1917 | current->flags &= ~PF_SYNCWRITE; | ||
1918 | mutex_unlock(&inode->i_mutex); | 1916 | mutex_unlock(&inode->i_mutex); |
1919 | VLDBG(curlun, "fdatasync -> %d\n", rc); | 1917 | VLDBG(curlun, "fdatasync -> %d\n", rc); |
1920 | return rc; | 1918 | return rc; |
diff --git a/fs/buffer.c b/fs/buffer.c index 23f1f3a68077..373bb6292bdc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -331,7 +331,6 @@ long do_fsync(struct file *file, int datasync) | |||
331 | goto out; | 331 | goto out; |
332 | } | 332 | } |
333 | 333 | ||
334 | current->flags |= PF_SYNCWRITE; | ||
335 | ret = filemap_fdatawrite(mapping); | 334 | ret = filemap_fdatawrite(mapping); |
336 | 335 | ||
337 | /* | 336 | /* |
@@ -346,7 +345,6 @@ long do_fsync(struct file *file, int datasync) | |||
346 | err = filemap_fdatawait(mapping); | 345 | err = filemap_fdatawait(mapping); |
347 | if (!ret) | 346 | if (!ret) |
348 | ret = err; | 347 | ret = err; |
349 | current->flags &= ~PF_SYNCWRITE; | ||
350 | out: | 348 | out: |
351 | return ret; | 349 | return ret; |
352 | } | 350 | } |
diff --git a/fs/direct-io.c b/fs/direct-io.c index b05d1b218776..538fb0418fba 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -162,7 +162,7 @@ static int dio_refill_pages(struct dio *dio) | |||
162 | NULL); /* vmas */ | 162 | NULL); /* vmas */ |
163 | up_read(¤t->mm->mmap_sem); | 163 | up_read(¤t->mm->mmap_sem); |
164 | 164 | ||
165 | if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) { | 165 | if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { |
166 | struct page *page = ZERO_PAGE(dio->curr_user_address); | 166 | struct page *page = ZERO_PAGE(dio->curr_user_address); |
167 | /* | 167 | /* |
168 | * A memory fault, but the filesystem has some outstanding | 168 | * A memory fault, but the filesystem has some outstanding |
@@ -535,7 +535,7 @@ static int get_more_blocks(struct dio *dio) | |||
535 | map_bh->b_state = 0; | 535 | map_bh->b_state = 0; |
536 | map_bh->b_size = fs_count << dio->inode->i_blkbits; | 536 | map_bh->b_size = fs_count << dio->inode->i_blkbits; |
537 | 537 | ||
538 | create = dio->rw == WRITE; | 538 | create = dio->rw & WRITE; |
539 | if (dio->lock_type == DIO_LOCKING) { | 539 | if (dio->lock_type == DIO_LOCKING) { |
540 | if (dio->block_in_file < (i_size_read(dio->inode) >> | 540 | if (dio->block_in_file < (i_size_read(dio->inode) >> |
541 | dio->blkbits)) | 541 | dio->blkbits)) |
@@ -867,7 +867,7 @@ do_holes: | |||
867 | loff_t i_size_aligned; | 867 | loff_t i_size_aligned; |
868 | 868 | ||
869 | /* AKPM: eargh, -ENOTBLK is a hack */ | 869 | /* AKPM: eargh, -ENOTBLK is a hack */ |
870 | if (dio->rw == WRITE) { | 870 | if (dio->rw & WRITE) { |
871 | page_cache_release(page); | 871 | page_cache_release(page); |
872 | return -ENOTBLK; | 872 | return -ENOTBLK; |
873 | } | 873 | } |
@@ -1045,7 +1045,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1045 | } | 1045 | } |
1046 | } /* end iovec loop */ | 1046 | } /* end iovec loop */ |
1047 | 1047 | ||
1048 | if (ret == -ENOTBLK && rw == WRITE) { | 1048 | if (ret == -ENOTBLK && (rw & WRITE)) { |
1049 | /* | 1049 | /* |
1050 | * The remaining part of the request will be | 1050 | * The remaining part of the request will be |
1051 | * be handled by buffered I/O when we return | 1051 | * be handled by buffered I/O when we return |
@@ -1089,7 +1089,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1089 | if (dio->is_async) { | 1089 | if (dio->is_async) { |
1090 | int should_wait = 0; | 1090 | int should_wait = 0; |
1091 | 1091 | ||
1092 | if (dio->result < dio->size && rw == WRITE) { | 1092 | if (dio->result < dio->size && (rw & WRITE)) { |
1093 | dio->waiter = current; | 1093 | dio->waiter = current; |
1094 | should_wait = 1; | 1094 | should_wait = 1; |
1095 | } | 1095 | } |
@@ -1142,7 +1142,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1142 | ret = transferred; | 1142 | ret = transferred; |
1143 | 1143 | ||
1144 | /* We could have also come here on an AIO file extend */ | 1144 | /* We could have also come here on an AIO file extend */ |
1145 | if (!is_sync_kiocb(iocb) && rw == WRITE && | 1145 | if (!is_sync_kiocb(iocb) && (rw & WRITE) && |
1146 | ret >= 0 && dio->result == dio->size) | 1146 | ret >= 0 && dio->result == dio->size) |
1147 | /* | 1147 | /* |
1148 | * For AIO writes where we have completed the | 1148 | * For AIO writes where we have completed the |
@@ -1194,7 +1194,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1194 | int acquire_i_mutex = 0; | 1194 | int acquire_i_mutex = 0; |
1195 | 1195 | ||
1196 | if (rw & WRITE) | 1196 | if (rw & WRITE) |
1197 | current->flags |= PF_SYNCWRITE; | 1197 | rw = WRITE_SYNC; |
1198 | 1198 | ||
1199 | if (bdev) | 1199 | if (bdev) |
1200 | bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); | 1200 | bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); |
@@ -1270,7 +1270,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1270 | * even for AIO, we need to wait for i/o to complete before | 1270 | * even for AIO, we need to wait for i/o to complete before |
1271 | * returning in this case. | 1271 | * returning in this case. |
1272 | */ | 1272 | */ |
1273 | dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) && | 1273 | dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && |
1274 | (end > i_size_read(inode))); | 1274 | (end > i_size_read(inode))); |
1275 | 1275 | ||
1276 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1276 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
@@ -1284,8 +1284,6 @@ out: | |||
1284 | mutex_unlock(&inode->i_mutex); | 1284 | mutex_unlock(&inode->i_mutex); |
1285 | else if (acquire_i_mutex) | 1285 | else if (acquire_i_mutex) |
1286 | mutex_lock(&inode->i_mutex); | 1286 | mutex_lock(&inode->i_mutex); |
1287 | if (rw & WRITE) | ||
1288 | current->flags &= ~PF_SYNCWRITE; | ||
1289 | return retval; | 1287 | return retval; |
1290 | } | 1288 | } |
1291 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1289 | EXPORT_SYMBOL(__blockdev_direct_IO); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6db95cf3aaa2..031b27a4bc9a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -623,7 +623,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int | |||
623 | int need_write_inode_now = 0; | 623 | int need_write_inode_now = 0; |
624 | int err2; | 624 | int err2; |
625 | 625 | ||
626 | current->flags |= PF_SYNCWRITE; | ||
627 | if (what & OSYNC_DATA) | 626 | if (what & OSYNC_DATA) |
628 | err = filemap_fdatawrite(mapping); | 627 | err = filemap_fdatawrite(mapping); |
629 | if (what & (OSYNC_METADATA|OSYNC_DATA)) { | 628 | if (what & (OSYNC_METADATA|OSYNC_DATA)) { |
@@ -636,7 +635,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int | |||
636 | if (!err) | 635 | if (!err) |
637 | err = err2; | 636 | err = err2; |
638 | } | 637 | } |
639 | current->flags &= ~PF_SYNCWRITE; | ||
640 | 638 | ||
641 | spin_lock(&inode_lock); | 639 | spin_lock(&inode_lock); |
642 | if ((inode->i_state & I_DIRTY) && | 640 | if ((inode->i_state & I_DIRTY) && |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3457e7b97363..482a21d67627 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -241,6 +241,7 @@ enum rq_flag_bits { | |||
241 | __REQ_PM_RESUME, /* resume request */ | 241 | __REQ_PM_RESUME, /* resume request */ |
242 | __REQ_PM_SHUTDOWN, /* shutdown request */ | 242 | __REQ_PM_SHUTDOWN, /* shutdown request */ |
243 | __REQ_ORDERED_COLOR, /* is before or after barrier */ | 243 | __REQ_ORDERED_COLOR, /* is before or after barrier */ |
244 | __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ | ||
244 | __REQ_NR_BITS, /* stops here */ | 245 | __REQ_NR_BITS, /* stops here */ |
245 | }; | 246 | }; |
246 | 247 | ||
@@ -270,6 +271,7 @@ enum rq_flag_bits { | |||
270 | #define REQ_PM_RESUME (1 << __REQ_PM_RESUME) | 271 | #define REQ_PM_RESUME (1 << __REQ_PM_RESUME) |
271 | #define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) | 272 | #define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) |
272 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) | 273 | #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) |
274 | #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) | ||
273 | 275 | ||
274 | /* | 276 | /* |
275 | * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME | 277 | * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME |
diff --git a/include/linux/sched.h b/include/linux/sched.h index a9d23c7d1b25..38b4791e6a5d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -941,12 +941,11 @@ static inline void put_task_struct(struct task_struct *t) | |||
941 | #define PF_KSWAPD 0x00040000 /* I am kswapd */ | 941 | #define PF_KSWAPD 0x00040000 /* I am kswapd */ |
942 | #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ | 942 | #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ |
943 | #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ | 943 | #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ |
944 | #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ | 944 | #define PF_BORROWED_MM 0x00200000 /* I am a kthread doing use_mm */ |
945 | #define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ | 945 | #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ |
946 | #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ | 946 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
947 | #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ | 947 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
948 | #define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */ | 948 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
949 | #define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */ | ||
950 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | 949 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
951 | 950 | ||
952 | /* | 951 | /* |
diff --git a/mm/msync.c b/mm/msync.c index bc6c95376366..d083544df21b 100644 --- a/mm/msync.c +++ b/mm/msync.c | |||
@@ -170,8 +170,6 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) | |||
170 | * just ignore them, but return -ENOMEM at the end. | 170 | * just ignore them, but return -ENOMEM at the end. |
171 | */ | 171 | */ |
172 | down_read(¤t->mm->mmap_sem); | 172 | down_read(¤t->mm->mmap_sem); |
173 | if (flags & MS_SYNC) | ||
174 | current->flags |= PF_SYNCWRITE; | ||
175 | vma = find_vma(current->mm, start); | 173 | vma = find_vma(current->mm, start); |
176 | if (!vma) { | 174 | if (!vma) { |
177 | error = -ENOMEM; | 175 | error = -ENOMEM; |
@@ -228,7 +226,6 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) | |||
228 | } | 226 | } |
229 | } while (vma && !done); | 227 | } while (vma && !done); |
230 | out_unlock: | 228 | out_unlock: |
231 | current->flags &= ~PF_SYNCWRITE; | ||
232 | up_read(¤t->mm->mmap_sem); | 229 | up_read(¤t->mm->mmap_sem); |
233 | out: | 230 | out: |
234 | return error; | 231 | return error; |