diff options
| -rw-r--r-- | Documentation/block/biodoc.txt | 19 | ||||
| -rw-r--r-- | block/as-iosched.c | 116 | ||||
| -rw-r--r-- | block/blk-barrier.c | 3 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 4 | ||||
| -rw-r--r-- | block/blk.h | 4 | ||||
| -rw-r--r-- | block/cfq-iosched.c | 270 | ||||
| -rw-r--r-- | block/elevator.c | 8 | ||||
| -rw-r--r-- | block/ioctl.c | 2 | ||||
| -rw-r--r-- | block/scsi_ioctl.c | 6 | ||||
| -rw-r--r-- | drivers/block/brd.c | 5 | ||||
| -rw-r--r-- | drivers/md/dm-bio-list.h | 117 | ||||
| -rw-r--r-- | drivers/md/dm-delay.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-mpath.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm-raid1.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm-region-hash.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm-snap.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm.c | 1 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 1 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 1 | ||||
| -rw-r--r-- | fs/bio.c | 18 | ||||
| -rw-r--r-- | fs/buffer.c | 11 | ||||
| -rw-r--r-- | fs/direct-io.c | 2 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 2 | ||||
| -rw-r--r-- | fs/gfs2/ops_fstype.c | 5 | ||||
| -rw-r--r-- | fs/inode.c | 36 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 94 | ||||
| -rw-r--r-- | fs/pipe.c | 42 | ||||
| -rw-r--r-- | fs/splice.c | 371 | ||||
| -rw-r--r-- | include/linux/bio.h | 109 | ||||
| -rw-r--r-- | include/linux/fs.h | 64 | ||||
| -rw-r--r-- | include/linux/pipe_fs_i.h | 5 | ||||
| -rw-r--r-- | include/linux/splice.h | 12 | ||||
| -rw-r--r-- | kernel/power/swap.c | 2 |
33 files changed, 814 insertions, 522 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index ecad6ee75705..6fab97ea7e6b 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
| @@ -1040,23 +1040,21 @@ Front merges are handled by the binary trees in AS and deadline schedulers. | |||
| 1040 | iii. Plugging the queue to batch requests in anticipation of opportunities for | 1040 | iii. Plugging the queue to batch requests in anticipation of opportunities for |
| 1041 | merge/sort optimizations | 1041 | merge/sort optimizations |
| 1042 | 1042 | ||
| 1043 | This is just the same as in 2.4 so far, though per-device unplugging | ||
| 1044 | support is anticipated for 2.5. Also with a priority-based i/o scheduler, | ||
| 1045 | such decisions could be based on request priorities. | ||
| 1046 | |||
| 1047 | Plugging is an approach that the current i/o scheduling algorithm resorts to so | 1043 | Plugging is an approach that the current i/o scheduling algorithm resorts to so |
| 1048 | that it collects up enough requests in the queue to be able to take | 1044 | that it collects up enough requests in the queue to be able to take |
| 1049 | advantage of the sorting/merging logic in the elevator. If the | 1045 | advantage of the sorting/merging logic in the elevator. If the |
| 1050 | queue is empty when a request comes in, then it plugs the request queue | 1046 | queue is empty when a request comes in, then it plugs the request queue |
| 1051 | (sort of like plugging the bottom of a vessel to get fluid to build up) | 1047 | (sort of like plugging the bath tub of a vessel to get fluid to build up) |
| 1052 | till it fills up with a few more requests, before starting to service | 1048 | till it fills up with a few more requests, before starting to service |
| 1053 | the requests. This provides an opportunity to merge/sort the requests before | 1049 | the requests. This provides an opportunity to merge/sort the requests before |
| 1054 | passing them down to the device. There are various conditions when the queue is | 1050 | passing them down to the device. There are various conditions when the queue is |
| 1055 | unplugged (to open up the flow again), either through a scheduled task or | 1051 | unplugged (to open up the flow again), either through a scheduled task or |
| 1056 | could be on demand. For example wait_on_buffer sets the unplugging going | 1052 | could be on demand. For example wait_on_buffer sets the unplugging going |
| 1057 | (by running tq_disk) so the read gets satisfied soon. So in the read case, | 1053 | through sync_buffer() running blk_run_address_space(mapping). Or the caller |
| 1058 | the queue gets explicitly unplugged as part of waiting for completion, | 1054 | can do it explicity through blk_unplug(bdev). So in the read case, |
| 1059 | in fact all queues get unplugged as a side-effect. | 1055 | the queue gets explicitly unplugged as part of waiting for completion on that |
| 1056 | buffer. For page driven IO, the address space ->sync_page() takes care of | ||
| 1057 | doing the blk_run_address_space(). | ||
| 1060 | 1058 | ||
| 1061 | Aside: | 1059 | Aside: |
| 1062 | This is kind of controversial territory, as it's not clear if plugging is | 1060 | This is kind of controversial territory, as it's not clear if plugging is |
| @@ -1067,11 +1065,6 @@ Aside: | |||
| 1067 | multi-page bios being queued in one shot, we may not need to wait to merge | 1065 | multi-page bios being queued in one shot, we may not need to wait to merge |
| 1068 | a big request from the broken up pieces coming by. | 1066 | a big request from the broken up pieces coming by. |
| 1069 | 1067 | ||
| 1070 | Per-queue granularity unplugging (still a Todo) may help reduce some of the | ||
| 1071 | concerns with just a single tq_disk flush approach. Something like | ||
| 1072 | blk_kick_queue() to unplug a specific queue (right away ?) | ||
| 1073 | or optionally, all queues, is in the plan. | ||
| 1074 | |||
| 1075 | 4.4 I/O contexts | 1068 | 4.4 I/O contexts |
| 1076 | I/O contexts provide a dynamically allocated per process data area. They may | 1069 | I/O contexts provide a dynamically allocated per process data area. They may |
| 1077 | be used in I/O schedulers, and in the block layer (could be used for IO statis, | 1070 | be used in I/O schedulers, and in the block layer (could be used for IO statis, |
diff --git a/block/as-iosched.c b/block/as-iosched.c index 631f6f44460a..c48fa670d221 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
| @@ -17,9 +17,6 @@ | |||
| 17 | #include <linux/rbtree.h> | 17 | #include <linux/rbtree.h> |
| 18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
| 19 | 19 | ||
| 20 | #define REQ_SYNC 1 | ||
| 21 | #define REQ_ASYNC 0 | ||
| 22 | |||
| 23 | /* | 20 | /* |
| 24 | * See Documentation/block/as-iosched.txt | 21 | * See Documentation/block/as-iosched.txt |
| 25 | */ | 22 | */ |
| @@ -93,7 +90,7 @@ struct as_data { | |||
| 93 | struct list_head fifo_list[2]; | 90 | struct list_head fifo_list[2]; |
| 94 | 91 | ||
| 95 | struct request *next_rq[2]; /* next in sort order */ | 92 | struct request *next_rq[2]; /* next in sort order */ |
| 96 | sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ | 93 | sector_t last_sector[2]; /* last SYNC & ASYNC sectors */ |
| 97 | 94 | ||
| 98 | unsigned long exit_prob; /* probability a task will exit while | 95 | unsigned long exit_prob; /* probability a task will exit while |
| 99 | being waited on */ | 96 | being waited on */ |
| @@ -109,7 +106,7 @@ struct as_data { | |||
| 109 | unsigned long last_check_fifo[2]; | 106 | unsigned long last_check_fifo[2]; |
| 110 | int changed_batch; /* 1: waiting for old batch to end */ | 107 | int changed_batch; /* 1: waiting for old batch to end */ |
| 111 | int new_batch; /* 1: waiting on first read complete */ | 108 | int new_batch; /* 1: waiting on first read complete */ |
| 112 | int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ | 109 | int batch_data_dir; /* current batch SYNC / ASYNC */ |
| 113 | int write_batch_count; /* max # of reqs in a write batch */ | 110 | int write_batch_count; /* max # of reqs in a write batch */ |
| 114 | int current_write_count; /* how many requests left this batch */ | 111 | int current_write_count; /* how many requests left this batch */ |
| 115 | int write_batch_idled; /* has the write batch gone idle? */ | 112 | int write_batch_idled; /* has the write batch gone idle? */ |
| @@ -554,7 +551,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, | |||
| 554 | if (aic == NULL) | 551 | if (aic == NULL) |
| 555 | return; | 552 | return; |
| 556 | 553 | ||
| 557 | if (data_dir == REQ_SYNC) { | 554 | if (data_dir == BLK_RW_SYNC) { |
| 558 | unsigned long in_flight = atomic_read(&aic->nr_queued) | 555 | unsigned long in_flight = atomic_read(&aic->nr_queued) |
| 559 | + atomic_read(&aic->nr_dispatched); | 556 | + atomic_read(&aic->nr_dispatched); |
| 560 | spin_lock(&aic->lock); | 557 | spin_lock(&aic->lock); |
| @@ -811,7 +808,7 @@ static void as_update_rq(struct as_data *ad, struct request *rq) | |||
| 811 | */ | 808 | */ |
| 812 | static void update_write_batch(struct as_data *ad) | 809 | static void update_write_batch(struct as_data *ad) |
| 813 | { | 810 | { |
| 814 | unsigned long batch = ad->batch_expire[REQ_ASYNC]; | 811 | unsigned long batch = ad->batch_expire[BLK_RW_ASYNC]; |
| 815 | long write_time; | 812 | long write_time; |
| 816 | 813 | ||
| 817 | write_time = (jiffies - ad->current_batch_expires) + batch; | 814 | write_time = (jiffies - ad->current_batch_expires) + batch; |
| @@ -855,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq) | |||
| 855 | kblockd_schedule_work(q, &ad->antic_work); | 852 | kblockd_schedule_work(q, &ad->antic_work); |
| 856 | ad->changed_batch = 0; | 853 | ad->changed_batch = 0; |
| 857 | 854 | ||
| 858 | if (ad->batch_data_dir == REQ_SYNC) | 855 | if (ad->batch_data_dir == BLK_RW_SYNC) |
| 859 | ad->new_batch = 1; | 856 | ad->new_batch = 1; |
| 860 | } | 857 | } |
| 861 | WARN_ON(ad->nr_dispatched == 0); | 858 | WARN_ON(ad->nr_dispatched == 0); |
| @@ -869,7 +866,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq) | |||
| 869 | if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { | 866 | if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { |
| 870 | update_write_batch(ad); | 867 | update_write_batch(ad); |
| 871 | ad->current_batch_expires = jiffies + | 868 | ad->current_batch_expires = jiffies + |
| 872 | ad->batch_expire[REQ_SYNC]; | 869 | ad->batch_expire[BLK_RW_SYNC]; |
| 873 | ad->new_batch = 0; | 870 | ad->new_batch = 0; |
| 874 | } | 871 | } |
| 875 | 872 | ||
| @@ -960,7 +957,7 @@ static inline int as_batch_expired(struct as_data *ad) | |||
| 960 | if (ad->changed_batch || ad->new_batch) | 957 | if (ad->changed_batch || ad->new_batch) |
| 961 | return 0; | 958 | return 0; |
| 962 | 959 | ||
| 963 | if (ad->batch_data_dir == REQ_SYNC) | 960 | if (ad->batch_data_dir == BLK_RW_SYNC) |
| 964 | /* TODO! add a check so a complete fifo gets written? */ | 961 | /* TODO! add a check so a complete fifo gets written? */ |
| 965 | return time_after(jiffies, ad->current_batch_expires); | 962 | return time_after(jiffies, ad->current_batch_expires); |
| 966 | 963 | ||
| @@ -986,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq) | |||
| 986 | */ | 983 | */ |
| 987 | ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; | 984 | ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; |
| 988 | 985 | ||
| 989 | if (data_dir == REQ_SYNC) { | 986 | if (data_dir == BLK_RW_SYNC) { |
| 990 | struct io_context *ioc = RQ_IOC(rq); | 987 | struct io_context *ioc = RQ_IOC(rq); |
| 991 | /* In case we have to anticipate after this */ | 988 | /* In case we have to anticipate after this */ |
| 992 | copy_io_context(&ad->io_context, &ioc); | 989 | copy_io_context(&ad->io_context, &ioc); |
| @@ -1025,41 +1022,41 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq) | |||
| 1025 | static int as_dispatch_request(struct request_queue *q, int force) | 1022 | static int as_dispatch_request(struct request_queue *q, int force) |
| 1026 | { | 1023 | { |
| 1027 | struct as_data *ad = q->elevator->elevator_data; | 1024 | struct as_data *ad = q->elevator->elevator_data; |
| 1028 | const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); | 1025 | const int reads = !list_empty(&ad->fifo_list[BLK_RW_SYNC]); |
| 1029 | const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); | 1026 | const int writes = !list_empty(&ad->fifo_list[BLK_RW_ASYNC]); |
| 1030 | struct request *rq; | 1027 | struct request *rq; |
| 1031 | 1028 | ||
| 1032 | if (unlikely(force)) { | 1029 | if (unlikely(force)) { |
| 1033 | /* | 1030 | /* |
| 1034 | * Forced dispatch, accounting is useless. Reset | 1031 | * Forced dispatch, accounting is useless. Reset |
| 1035 | * accounting states and dump fifo_lists. Note that | 1032 | * accounting states and dump fifo_lists. Note that |
| 1036 | * batch_data_dir is reset to REQ_SYNC to avoid | 1033 | * batch_data_dir is reset to BLK_RW_SYNC to avoid |
| 1037 | * screwing write batch accounting as write batch | 1034 | * screwing write batch accounting as write batch |
| 1038 | * accounting occurs on W->R transition. | 1035 | * accounting occurs on W->R transition. |
| 1039 | */ | 1036 | */ |
| 1040 | int dispatched = 0; | 1037 | int dispatched = 0; |
| 1041 | 1038 | ||
| 1042 | ad->batch_data_dir = REQ_SYNC; | 1039 | ad->batch_data_dir = BLK_RW_SYNC; |
| 1043 | ad->changed_batch = 0; | 1040 | ad->changed_batch = 0; |
| 1044 | ad->new_batch = 0; | 1041 | ad->new_batch = 0; |
| 1045 | 1042 | ||
| 1046 | while (ad->next_rq[REQ_SYNC]) { | 1043 | while (ad->next_rq[BLK_RW_SYNC]) { |
| 1047 | as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]); | 1044 | as_move_to_dispatch(ad, ad->next_rq[BLK_RW_SYNC]); |
| 1048 | dispatched++; | 1045 | dispatched++; |
| 1049 | } | 1046 | } |
| 1050 | ad->last_check_fifo[REQ_SYNC] = jiffies; | 1047 | ad->last_check_fifo[BLK_RW_SYNC] = jiffies; |
| 1051 | 1048 | ||
| 1052 | while (ad->next_rq[REQ_ASYNC]) { | 1049 | while (ad->next_rq[BLK_RW_ASYNC]) { |
| 1053 | as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]); | 1050 | as_move_to_dispatch(ad, ad->next_rq[BLK_RW_ASYNC]); |
| 1054 | dispatched++; | 1051 | dispatched++; |
| 1055 | } | 1052 | } |
| 1056 | ad->last_check_fifo[REQ_ASYNC] = jiffies; | 1053 | ad->last_check_fifo[BLK_RW_ASYNC] = jiffies; |
| 1057 | 1054 | ||
| 1058 | return dispatched; | 1055 | return dispatched; |
| 1059 | } | 1056 | } |
| 1060 | 1057 | ||
| 1061 | /* Signal that the write batch was uncontended, so we can't time it */ | 1058 | /* Signal that the write batch was uncontended, so we can't time it */ |
| 1062 | if (ad->batch_data_dir == REQ_ASYNC && !reads) { | 1059 | if (ad->batch_data_dir == BLK_RW_ASYNC && !reads) { |
| 1063 | if (ad->current_write_count == 0 || !writes) | 1060 | if (ad->current_write_count == 0 || !writes) |
| 1064 | ad->write_batch_idled = 1; | 1061 | ad->write_batch_idled = 1; |
| 1065 | } | 1062 | } |
| @@ -1076,8 +1073,8 @@ static int as_dispatch_request(struct request_queue *q, int force) | |||
| 1076 | */ | 1073 | */ |
| 1077 | rq = ad->next_rq[ad->batch_data_dir]; | 1074 | rq = ad->next_rq[ad->batch_data_dir]; |
| 1078 | 1075 | ||
| 1079 | if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { | 1076 | if (ad->batch_data_dir == BLK_RW_SYNC && ad->antic_expire) { |
| 1080 | if (as_fifo_expired(ad, REQ_SYNC)) | 1077 | if (as_fifo_expired(ad, BLK_RW_SYNC)) |
| 1081 | goto fifo_expired; | 1078 | goto fifo_expired; |
| 1082 | 1079 | ||
| 1083 | if (as_can_anticipate(ad, rq)) { | 1080 | if (as_can_anticipate(ad, rq)) { |
| @@ -1090,7 +1087,7 @@ static int as_dispatch_request(struct request_queue *q, int force) | |||
| 1090 | /* we have a "next request" */ | 1087 | /* we have a "next request" */ |
| 1091 | if (reads && !writes) | 1088 | if (reads && !writes) |
| 1092 | ad->current_batch_expires = | 1089 | ad->current_batch_expires = |
| 1093 | jiffies + ad->batch_expire[REQ_SYNC]; | 1090 | jiffies + ad->batch_expire[BLK_RW_SYNC]; |
| 1094 | goto dispatch_request; | 1091 | goto dispatch_request; |
| 1095 | } | 1092 | } |
| 1096 | } | 1093 | } |
| @@ -1101,20 +1098,20 @@ static int as_dispatch_request(struct request_queue *q, int force) | |||
| 1101 | */ | 1098 | */ |
| 1102 | 1099 | ||
| 1103 | if (reads) { | 1100 | if (reads) { |
| 1104 | BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC])); | 1101 | BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_SYNC])); |
| 1105 | 1102 | ||
| 1106 | if (writes && ad->batch_data_dir == REQ_SYNC) | 1103 | if (writes && ad->batch_data_dir == BLK_RW_SYNC) |
| 1107 | /* | 1104 | /* |
| 1108 | * Last batch was a read, switch to writes | 1105 | * Last batch was a read, switch to writes |
| 1109 | */ | 1106 | */ |
| 1110 | goto dispatch_writes; | 1107 | goto dispatch_writes; |
| 1111 | 1108 | ||
| 1112 | if (ad->batch_data_dir == REQ_ASYNC) { | 1109 | if (ad->batch_data_dir == BLK_RW_ASYNC) { |
| 1113 | WARN_ON(ad->new_batch); | 1110 | WARN_ON(ad->new_batch); |
| 1114 | ad->changed_batch = 1; | 1111 | ad->changed_batch = 1; |
| 1115 | } | 1112 | } |
| 1116 | ad->batch_data_dir = REQ_SYNC; | 1113 | ad->batch_data_dir = BLK_RW_SYNC; |
| 1117 | rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next); | 1114 | rq = rq_entry_fifo(ad->fifo_list[BLK_RW_SYNC].next); |
| 1118 | ad->last_check_fifo[ad->batch_data_dir] = jiffies; | 1115 | ad->last_check_fifo[ad->batch_data_dir] = jiffies; |
| 1119 | goto dispatch_request; | 1116 | goto dispatch_request; |
| 1120 | } | 1117 | } |
| @@ -1125,9 +1122,9 @@ static int as_dispatch_request(struct request_queue *q, int force) | |||
| 1125 | 1122 | ||
| 1126 | if (writes) { | 1123 | if (writes) { |
| 1127 | dispatch_writes: | 1124 | dispatch_writes: |
| 1128 | BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC])); | 1125 | BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_ASYNC])); |
| 1129 | 1126 | ||
| 1130 | if (ad->batch_data_dir == REQ_SYNC) { | 1127 | if (ad->batch_data_dir == BLK_RW_SYNC) { |
| 1131 | ad->changed_batch = 1; | 1128 | ad->changed_batch = 1; |
| 1132 | 1129 | ||
| 1133 | /* | 1130 | /* |
| @@ -1137,11 +1134,11 @@ dispatch_writes: | |||
| 1137 | */ | 1134 | */ |
| 1138 | ad->new_batch = 0; | 1135 | ad->new_batch = 0; |
| 1139 | } | 1136 | } |
| 1140 | ad->batch_data_dir = REQ_ASYNC; | 1137 | ad->batch_data_dir = BLK_RW_ASYNC; |
| 1141 | ad->current_write_count = ad->write_batch_count; | 1138 | ad->current_write_count = ad->write_batch_count; |
| 1142 | ad->write_batch_idled = 0; | 1139 | ad->write_batch_idled = 0; |
| 1143 | rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next); | 1140 | rq = rq_entry_fifo(ad->fifo_list[BLK_RW_ASYNC].next); |
| 1144 | ad->last_check_fifo[REQ_ASYNC] = jiffies; | 1141 | ad->last_check_fifo[BLK_RW_ASYNC] = jiffies; |
| 1145 | goto dispatch_request; | 1142 | goto dispatch_request; |
| 1146 | } | 1143 | } |
| 1147 | 1144 | ||
| @@ -1164,9 +1161,9 @@ fifo_expired: | |||
| 1164 | if (ad->nr_dispatched) | 1161 | if (ad->nr_dispatched) |
| 1165 | return 0; | 1162 | return 0; |
| 1166 | 1163 | ||
| 1167 | if (ad->batch_data_dir == REQ_ASYNC) | 1164 | if (ad->batch_data_dir == BLK_RW_ASYNC) |
| 1168 | ad->current_batch_expires = jiffies + | 1165 | ad->current_batch_expires = jiffies + |
| 1169 | ad->batch_expire[REQ_ASYNC]; | 1166 | ad->batch_expire[BLK_RW_ASYNC]; |
| 1170 | else | 1167 | else |
| 1171 | ad->new_batch = 1; | 1168 | ad->new_batch = 1; |
| 1172 | 1169 | ||
| @@ -1238,8 +1235,8 @@ static int as_queue_empty(struct request_queue *q) | |||
| 1238 | { | 1235 | { |
| 1239 | struct as_data *ad = q->elevator->elevator_data; | 1236 | struct as_data *ad = q->elevator->elevator_data; |
| 1240 | 1237 | ||
| 1241 | return list_empty(&ad->fifo_list[REQ_ASYNC]) | 1238 | return list_empty(&ad->fifo_list[BLK_RW_ASYNC]) |
| 1242 | && list_empty(&ad->fifo_list[REQ_SYNC]); | 1239 | && list_empty(&ad->fifo_list[BLK_RW_SYNC]); |
| 1243 | } | 1240 | } |
| 1244 | 1241 | ||
| 1245 | static int | 1242 | static int |
| @@ -1346,8 +1343,8 @@ static void as_exit_queue(struct elevator_queue *e) | |||
| 1346 | del_timer_sync(&ad->antic_timer); | 1343 | del_timer_sync(&ad->antic_timer); |
| 1347 | cancel_work_sync(&ad->antic_work); | 1344 | cancel_work_sync(&ad->antic_work); |
| 1348 | 1345 | ||
| 1349 | BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); | 1346 | BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_SYNC])); |
| 1350 | BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); | 1347 | BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_ASYNC])); |
| 1351 | 1348 | ||
| 1352 | put_io_context(ad->io_context); | 1349 | put_io_context(ad->io_context); |
| 1353 | kfree(ad); | 1350 | kfree(ad); |
| @@ -1372,18 +1369,18 @@ static void *as_init_queue(struct request_queue *q) | |||
| 1372 | init_timer(&ad->antic_timer); | 1369 | init_timer(&ad->antic_timer); |
| 1373 | INIT_WORK(&ad->antic_work, as_work_handler); | 1370 | INIT_WORK(&ad->antic_work, as_work_handler); |
| 1374 | 1371 | ||
| 1375 | INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); | 1372 | INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_SYNC]); |
| 1376 | INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); | 1373 | INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_ASYNC]); |
| 1377 | ad->sort_list[REQ_SYNC] = RB_ROOT; | 1374 | ad->sort_list[BLK_RW_SYNC] = RB_ROOT; |
| 1378 | ad->sort_list[REQ_ASYNC] = RB_ROOT; | 1375 | ad->sort_list[BLK_RW_ASYNC] = RB_ROOT; |
| 1379 | ad->fifo_expire[REQ_SYNC] = default_read_expire; | 1376 | ad->fifo_expire[BLK_RW_SYNC] = default_read_expire; |
| 1380 | ad->fifo_expire[REQ_ASYNC] = default_write_expire; | 1377 | ad->fifo_expire[BLK_RW_ASYNC] = default_write_expire; |
| 1381 | ad->antic_expire = default_antic_expire; | 1378 | ad->antic_expire = default_antic_expire; |
| 1382 | ad->batch_expire[REQ_SYNC] = default_read_batch_expire; | 1379 | ad->batch_expire[BLK_RW_SYNC] = default_read_batch_expire; |
| 1383 | ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; | 1380 | ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire; |
| 1384 | 1381 | ||
| 1385 | ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; | 1382 | ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC]; |
| 1386 | ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; | 1383 | ad->write_batch_count = ad->batch_expire[BLK_RW_ASYNC] / 10; |
| 1387 | if (ad->write_batch_count < 2) | 1384 | if (ad->write_batch_count < 2) |
| 1388 | ad->write_batch_count = 2; | 1385 | ad->write_batch_count = 2; |
| 1389 | 1386 | ||
| @@ -1432,11 +1429,11 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \ | |||
| 1432 | struct as_data *ad = e->elevator_data; \ | 1429 | struct as_data *ad = e->elevator_data; \ |
| 1433 | return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ | 1430 | return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ |
| 1434 | } | 1431 | } |
| 1435 | SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]); | 1432 | SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[BLK_RW_SYNC]); |
| 1436 | SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]); | 1433 | SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[BLK_RW_ASYNC]); |
| 1437 | SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire); | 1434 | SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire); |
| 1438 | SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]); | 1435 | SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[BLK_RW_SYNC]); |
| 1439 | SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); | 1436 | SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[BLK_RW_ASYNC]); |
| 1440 | #undef SHOW_FUNCTION | 1437 | #undef SHOW_FUNCTION |
| 1441 | 1438 | ||
| 1442 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ | 1439 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ |
| @@ -1451,13 +1448,14 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) | |||
| 1451 | *(__PTR) = msecs_to_jiffies(*(__PTR)); \ | 1448 | *(__PTR) = msecs_to_jiffies(*(__PTR)); \ |
| 1452 | return ret; \ | 1449 | return ret; \ |
| 1453 | } | 1450 | } |
| 1454 | STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); | 1451 | STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[BLK_RW_SYNC], 0, INT_MAX); |
| 1455 | STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); | 1452 | STORE_FUNCTION(as_write_expire_store, |
| 1453 | &ad->fifo_expire[BLK_RW_ASYNC], 0, INT_MAX); | ||
| 1456 | STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX); | 1454 | STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX); |
| 1457 | STORE_FUNCTION(as_read_batch_expire_store, | 1455 | STORE_FUNCTION(as_read_batch_expire_store, |
| 1458 | &ad->batch_expire[REQ_SYNC], 0, INT_MAX); | 1456 | &ad->batch_expire[BLK_RW_SYNC], 0, INT_MAX); |
| 1459 | STORE_FUNCTION(as_write_batch_expire_store, | 1457 | STORE_FUNCTION(as_write_batch_expire_store, |
| 1460 | &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); | 1458 | &ad->batch_expire[BLK_RW_ASYNC], 0, INT_MAX); |
| 1461 | #undef STORE_FUNCTION | 1459 | #undef STORE_FUNCTION |
| 1462 | 1460 | ||
| 1463 | #define AS_ATTR(name) \ | 1461 | #define AS_ATTR(name) \ |
diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f7dae57e6cab..20b4111fa050 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c | |||
| @@ -319,9 +319,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
| 319 | return -ENXIO; | 319 | return -ENXIO; |
| 320 | 320 | ||
| 321 | bio = bio_alloc(GFP_KERNEL, 0); | 321 | bio = bio_alloc(GFP_KERNEL, 0); |
| 322 | if (!bio) | ||
| 323 | return -ENOMEM; | ||
| 324 | |||
| 325 | bio->bi_end_io = bio_end_empty_barrier; | 322 | bio->bi_end_io = bio_end_empty_barrier; |
| 326 | bio->bi_private = &wait; | 323 | bio->bi_private = &wait; |
| 327 | bio->bi_bdev = bdev; | 324 | bio->bi_bdev = bdev; |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 73f36beff5cd..cac4e9febe6a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -209,14 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, | |||
| 209 | ssize_t ret = queue_var_store(&stats, page, count); | 209 | ssize_t ret = queue_var_store(&stats, page, count); |
| 210 | 210 | ||
| 211 | spin_lock_irq(q->queue_lock); | 211 | spin_lock_irq(q->queue_lock); |
| 212 | elv_quisce_start(q); | 212 | elv_quiesce_start(q); |
| 213 | 213 | ||
| 214 | if (stats) | 214 | if (stats) |
| 215 | queue_flag_set(QUEUE_FLAG_IO_STAT, q); | 215 | queue_flag_set(QUEUE_FLAG_IO_STAT, q); |
| 216 | else | 216 | else |
| 217 | queue_flag_clear(QUEUE_FLAG_IO_STAT, q); | 217 | queue_flag_clear(QUEUE_FLAG_IO_STAT, q); |
| 218 | 218 | ||
| 219 | elv_quisce_end(q); | 219 | elv_quiesce_end(q); |
| 220 | spin_unlock_irq(q->queue_lock); | 220 | spin_unlock_irq(q->queue_lock); |
| 221 | 221 | ||
| 222 | return ret; | 222 | return ret; |
diff --git a/block/blk.h b/block/blk.h index 24fcaeeaf620..5dfc41267a08 100644 --- a/block/blk.h +++ b/block/blk.h | |||
| @@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q); | |||
| 70 | 70 | ||
| 71 | int blk_dev_init(void); | 71 | int blk_dev_init(void); |
| 72 | 72 | ||
| 73 | void elv_quisce_start(struct request_queue *q); | 73 | void elv_quiesce_start(struct request_queue *q); |
| 74 | void elv_quisce_end(struct request_queue *q); | 74 | void elv_quiesce_end(struct request_queue *q); |
| 75 | 75 | ||
| 76 | 76 | ||
| 77 | /* | 77 | /* |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a4809de6fea6..0d3b70de3d80 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
| @@ -56,9 +56,6 @@ static DEFINE_SPINLOCK(ioc_gone_lock); | |||
| 56 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) | 56 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) |
| 57 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) | 57 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) |
| 58 | 58 | ||
| 59 | #define ASYNC (0) | ||
| 60 | #define SYNC (1) | ||
| 61 | |||
| 62 | #define sample_valid(samples) ((samples) > 80) | 59 | #define sample_valid(samples) ((samples) > 80) |
| 63 | 60 | ||
| 64 | /* | 61 | /* |
| @@ -83,6 +80,14 @@ struct cfq_data { | |||
| 83 | * rr list of queues with requests and the count of them | 80 | * rr list of queues with requests and the count of them |
| 84 | */ | 81 | */ |
| 85 | struct cfq_rb_root service_tree; | 82 | struct cfq_rb_root service_tree; |
| 83 | |||
| 84 | /* | ||
| 85 | * Each priority tree is sorted by next_request position. These | ||
| 86 | * trees are used when determining if two or more queues are | ||
| 87 | * interleaving requests (see cfq_close_cooperator). | ||
| 88 | */ | ||
| 89 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; | ||
| 90 | |||
| 86 | unsigned int busy_queues; | 91 | unsigned int busy_queues; |
| 87 | /* | 92 | /* |
| 88 | * Used to track any pending rt requests so we can pre-empt current | 93 | * Used to track any pending rt requests so we can pre-empt current |
| @@ -147,6 +152,8 @@ struct cfq_queue { | |||
| 147 | struct rb_node rb_node; | 152 | struct rb_node rb_node; |
| 148 | /* service_tree key */ | 153 | /* service_tree key */ |
| 149 | unsigned long rb_key; | 154 | unsigned long rb_key; |
| 155 | /* prio tree member */ | ||
| 156 | struct rb_node p_node; | ||
| 150 | /* sorted list of pending requests */ | 157 | /* sorted list of pending requests */ |
| 151 | struct rb_root sort_list; | 158 | struct rb_root sort_list; |
| 152 | /* if fifo isn't expired, next request to serve */ | 159 | /* if fifo isn't expired, next request to serve */ |
| @@ -185,6 +192,7 @@ enum cfqq_state_flags { | |||
| 185 | CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ | 192 | CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ |
| 186 | CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ | 193 | CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ |
| 187 | CFQ_CFQQ_FLAG_sync, /* synchronous queue */ | 194 | CFQ_CFQQ_FLAG_sync, /* synchronous queue */ |
| 195 | CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */ | ||
| 188 | }; | 196 | }; |
| 189 | 197 | ||
| 190 | #define CFQ_CFQQ_FNS(name) \ | 198 | #define CFQ_CFQQ_FNS(name) \ |
| @@ -211,6 +219,7 @@ CFQ_CFQQ_FNS(idle_window); | |||
| 211 | CFQ_CFQQ_FNS(prio_changed); | 219 | CFQ_CFQQ_FNS(prio_changed); |
| 212 | CFQ_CFQQ_FNS(slice_new); | 220 | CFQ_CFQQ_FNS(slice_new); |
| 213 | CFQ_CFQQ_FNS(sync); | 221 | CFQ_CFQQ_FNS(sync); |
| 222 | CFQ_CFQQ_FNS(coop); | ||
| 214 | #undef CFQ_CFQQ_FNS | 223 | #undef CFQ_CFQQ_FNS |
| 215 | 224 | ||
| 216 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 225 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
| @@ -419,13 +428,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) | |||
| 419 | return NULL; | 428 | return NULL; |
| 420 | } | 429 | } |
| 421 | 430 | ||
| 431 | static void rb_erase_init(struct rb_node *n, struct rb_root *root) | ||
| 432 | { | ||
| 433 | rb_erase(n, root); | ||
| 434 | RB_CLEAR_NODE(n); | ||
| 435 | } | ||
| 436 | |||
| 422 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) | 437 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) |
| 423 | { | 438 | { |
| 424 | if (root->left == n) | 439 | if (root->left == n) |
| 425 | root->left = NULL; | 440 | root->left = NULL; |
| 426 | 441 | rb_erase_init(n, &root->rb); | |
| 427 | rb_erase(n, &root->rb); | ||
| 428 | RB_CLEAR_NODE(n); | ||
| 429 | } | 442 | } |
| 430 | 443 | ||
| 431 | /* | 444 | /* |
| @@ -470,8 +483,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | |||
| 470 | * requests waiting to be processed. It is sorted in the order that | 483 | * requests waiting to be processed. It is sorted in the order that |
| 471 | * we will service the queues. | 484 | * we will service the queues. |
| 472 | */ | 485 | */ |
| 473 | static void cfq_service_tree_add(struct cfq_data *cfqd, | 486 | static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
| 474 | struct cfq_queue *cfqq, int add_front) | 487 | int add_front) |
| 475 | { | 488 | { |
| 476 | struct rb_node **p, *parent; | 489 | struct rb_node **p, *parent; |
| 477 | struct cfq_queue *__cfqq; | 490 | struct cfq_queue *__cfqq; |
| @@ -544,6 +557,63 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, | |||
| 544 | rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); | 557 | rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); |
| 545 | } | 558 | } |
| 546 | 559 | ||
| 560 | static struct cfq_queue * | ||
| 561 | cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector, | ||
| 562 | struct rb_node **ret_parent, struct rb_node ***rb_link) | ||
| 563 | { | ||
| 564 | struct rb_root *root = &cfqd->prio_trees[ioprio]; | ||
| 565 | struct rb_node **p, *parent; | ||
| 566 | struct cfq_queue *cfqq = NULL; | ||
| 567 | |||
| 568 | parent = NULL; | ||
| 569 | p = &root->rb_node; | ||
| 570 | while (*p) { | ||
| 571 | struct rb_node **n; | ||
| 572 | |||
| 573 | parent = *p; | ||
| 574 | cfqq = rb_entry(parent, struct cfq_queue, p_node); | ||
| 575 | |||
| 576 | /* | ||
| 577 | * Sort strictly based on sector. Smallest to the left, | ||
| 578 | * largest to the right. | ||
| 579 | */ | ||
| 580 | if (sector > cfqq->next_rq->sector) | ||
| 581 | n = &(*p)->rb_right; | ||
| 582 | else if (sector < cfqq->next_rq->sector) | ||
| 583 | n = &(*p)->rb_left; | ||
| 584 | else | ||
| 585 | break; | ||
| 586 | p = n; | ||
| 587 | } | ||
| 588 | |||
| 589 | *ret_parent = parent; | ||
| 590 | if (rb_link) | ||
| 591 | *rb_link = p; | ||
| 592 | return NULL; | ||
| 593 | } | ||
| 594 | |||
| 595 | static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
| 596 | { | ||
| 597 | struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio]; | ||
| 598 | struct rb_node **p, *parent; | ||
| 599 | struct cfq_queue *__cfqq; | ||
| 600 | |||
| 601 | if (!RB_EMPTY_NODE(&cfqq->p_node)) | ||
| 602 | rb_erase_init(&cfqq->p_node, root); | ||
| 603 | |||
| 604 | if (cfq_class_idle(cfqq)) | ||
| 605 | return; | ||
| 606 | if (!cfqq->next_rq) | ||
| 607 | return; | ||
| 608 | |||
| 609 | __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector, | ||
| 610 | &parent, &p); | ||
| 611 | BUG_ON(__cfqq); | ||
| 612 | |||
| 613 | rb_link_node(&cfqq->p_node, parent, p); | ||
| 614 | rb_insert_color(&cfqq->p_node, root); | ||
| 615 | } | ||
| 616 | |||
| 547 | /* | 617 | /* |
| 548 | * Update cfqq's position in the service tree. | 618 | * Update cfqq's position in the service tree. |
| 549 | */ | 619 | */ |
| @@ -552,8 +622,10 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
| 552 | /* | 622 | /* |
| 553 | * Resorting requires the cfqq to be on the RR list already. | 623 | * Resorting requires the cfqq to be on the RR list already. |
| 554 | */ | 624 | */ |
| 555 | if (cfq_cfqq_on_rr(cfqq)) | 625 | if (cfq_cfqq_on_rr(cfqq)) { |
| 556 | cfq_service_tree_add(cfqd, cfqq, 0); | 626 | cfq_service_tree_add(cfqd, cfqq, 0); |
| 627 | cfq_prio_tree_add(cfqd, cfqq); | ||
| 628 | } | ||
| 557 | } | 629 | } |
| 558 | 630 | ||
| 559 | /* | 631 | /* |
| @@ -584,6 +656,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
| 584 | 656 | ||
| 585 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | 657 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) |
| 586 | cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); | 658 | cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); |
| 659 | if (!RB_EMPTY_NODE(&cfqq->p_node)) | ||
| 660 | rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]); | ||
| 587 | 661 | ||
| 588 | BUG_ON(!cfqd->busy_queues); | 662 | BUG_ON(!cfqd->busy_queues); |
| 589 | cfqd->busy_queues--; | 663 | cfqd->busy_queues--; |
| @@ -613,7 +687,7 @@ static void cfq_add_rq_rb(struct request *rq) | |||
| 613 | { | 687 | { |
| 614 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 688 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
| 615 | struct cfq_data *cfqd = cfqq->cfqd; | 689 | struct cfq_data *cfqd = cfqq->cfqd; |
| 616 | struct request *__alias; | 690 | struct request *__alias, *prev; |
| 617 | 691 | ||
| 618 | cfqq->queued[rq_is_sync(rq)]++; | 692 | cfqq->queued[rq_is_sync(rq)]++; |
| 619 | 693 | ||
| @@ -630,7 +704,15 @@ static void cfq_add_rq_rb(struct request *rq) | |||
| 630 | /* | 704 | /* |
| 631 | * check if this request is a better next-serve candidate | 705 | * check if this request is a better next-serve candidate |
| 632 | */ | 706 | */ |
| 707 | prev = cfqq->next_rq; | ||
| 633 | cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); | 708 | cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); |
| 709 | |||
| 710 | /* | ||
| 711 | * adjust priority tree position, if ->next_rq changes | ||
| 712 | */ | ||
| 713 | if (prev != cfqq->next_rq) | ||
| 714 | cfq_prio_tree_add(cfqd, cfqq); | ||
| 715 | |||
| 634 | BUG_ON(!cfqq->next_rq); | 716 | BUG_ON(!cfqq->next_rq); |
| 635 | } | 717 | } |
| 636 | 718 | ||
| @@ -843,11 +925,15 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | |||
| 843 | /* | 925 | /* |
| 844 | * Get and set a new active queue for service. | 926 | * Get and set a new active queue for service. |
| 845 | */ | 927 | */ |
| 846 | static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) | 928 | static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd, |
| 929 | struct cfq_queue *cfqq) | ||
| 847 | { | 930 | { |
| 848 | struct cfq_queue *cfqq; | 931 | if (!cfqq) { |
| 932 | cfqq = cfq_get_next_queue(cfqd); | ||
| 933 | if (cfqq) | ||
| 934 | cfq_clear_cfqq_coop(cfqq); | ||
| 935 | } | ||
| 849 | 936 | ||
| 850 | cfqq = cfq_get_next_queue(cfqd); | ||
| 851 | __cfq_set_active_queue(cfqd, cfqq); | 937 | __cfq_set_active_queue(cfqd, cfqq); |
| 852 | return cfqq; | 938 | return cfqq; |
| 853 | } | 939 | } |
| @@ -871,17 +957,89 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) | |||
| 871 | return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; | 957 | return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; |
| 872 | } | 958 | } |
| 873 | 959 | ||
| 874 | static int cfq_close_cooperator(struct cfq_data *cfq_data, | 960 | static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, |
| 875 | struct cfq_queue *cfqq) | 961 | struct cfq_queue *cur_cfqq) |
| 962 | { | ||
| 963 | struct rb_root *root = &cfqd->prio_trees[cur_cfqq->ioprio]; | ||
| 964 | struct rb_node *parent, *node; | ||
| 965 | struct cfq_queue *__cfqq; | ||
| 966 | sector_t sector = cfqd->last_position; | ||
| 967 | |||
| 968 | if (RB_EMPTY_ROOT(root)) | ||
| 969 | return NULL; | ||
| 970 | |||
| 971 | /* | ||
| 972 | * First, if we find a request starting at the end of the last | ||
| 973 | * request, choose it. | ||
| 974 | */ | ||
| 975 | __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio, | ||
| 976 | sector, &parent, NULL); | ||
| 977 | if (__cfqq) | ||
| 978 | return __cfqq; | ||
| 979 | |||
| 980 | /* | ||
| 981 | * If the exact sector wasn't found, the parent of the NULL leaf | ||
| 982 | * will contain the closest sector. | ||
| 983 | */ | ||
| 984 | __cfqq = rb_entry(parent, struct cfq_queue, p_node); | ||
| 985 | if (cfq_rq_close(cfqd, __cfqq->next_rq)) | ||
| 986 | return __cfqq; | ||
| 987 | |||
| 988 | if (__cfqq->next_rq->sector < sector) | ||
| 989 | node = rb_next(&__cfqq->p_node); | ||
| 990 | else | ||
| 991 | node = rb_prev(&__cfqq->p_node); | ||
| 992 | if (!node) | ||
| 993 | return NULL; | ||
| 994 | |||
| 995 | __cfqq = rb_entry(node, struct cfq_queue, p_node); | ||
| 996 | if (cfq_rq_close(cfqd, __cfqq->next_rq)) | ||
| 997 | return __cfqq; | ||
| 998 | |||
| 999 | return NULL; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | /* | ||
| 1003 | * cfqd - obvious | ||
| 1004 | * cur_cfqq - passed in so that we don't decide that the current queue is | ||
| 1005 | * closely cooperating with itself. | ||
| 1006 | * | ||
| 1007 | * So, basically we're assuming that that cur_cfqq has dispatched at least | ||
| 1008 | * one request, and that cfqd->last_position reflects a position on the disk | ||
| 1009 | * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid | ||
| 1010 | * assumption. | ||
| 1011 | */ | ||
| 1012 | static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, | ||
| 1013 | struct cfq_queue *cur_cfqq, | ||
| 1014 | int probe) | ||
| 876 | { | 1015 | { |
| 1016 | struct cfq_queue *cfqq; | ||
| 1017 | |||
| 1018 | /* | ||
| 1019 | * A valid cfq_io_context is necessary to compare requests against | ||
| 1020 | * the seek_mean of the current cfqq. | ||
| 1021 | */ | ||
| 1022 | if (!cfqd->active_cic) | ||
| 1023 | return NULL; | ||
| 1024 | |||
| 877 | /* | 1025 | /* |
| 878 | * We should notice if some of the queues are cooperating, eg | 1026 | * We should notice if some of the queues are cooperating, eg |
| 879 | * working closely on the same area of the disk. In that case, | 1027 | * working closely on the same area of the disk. In that case, |
| 880 | * we can group them together and don't waste time idling. | 1028 | * we can group them together and don't waste time idling. |
| 881 | */ | 1029 | */ |
| 882 | return 0; | 1030 | cfqq = cfqq_close(cfqd, cur_cfqq); |
| 1031 | if (!cfqq) | ||
| 1032 | return NULL; | ||
| 1033 | |||
| 1034 | if (cfq_cfqq_coop(cfqq)) | ||
| 1035 | return NULL; | ||
| 1036 | |||
| 1037 | if (!probe) | ||
| 1038 | cfq_mark_cfqq_coop(cfqq); | ||
| 1039 | return cfqq; | ||
| 883 | } | 1040 | } |
| 884 | 1041 | ||
| 1042 | |||
| 885 | #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) | 1043 | #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) |
| 886 | 1044 | ||
| 887 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 1045 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
| @@ -920,13 +1078,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
| 920 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) | 1078 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) |
| 921 | return; | 1079 | return; |
| 922 | 1080 | ||
| 923 | /* | ||
| 924 | * See if this prio level has a good candidate | ||
| 925 | */ | ||
| 926 | if (cfq_close_cooperator(cfqd, cfqq) && | ||
| 927 | (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2)) | ||
| 928 | return; | ||
| 929 | |||
| 930 | cfq_mark_cfqq_wait_request(cfqq); | 1081 | cfq_mark_cfqq_wait_request(cfqq); |
| 931 | 1082 | ||
| 932 | /* | 1083 | /* |
| @@ -939,7 +1090,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
| 939 | sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); | 1090 | sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); |
| 940 | 1091 | ||
| 941 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 1092 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); |
| 942 | cfq_log(cfqd, "arm_idle: %lu", sl); | 1093 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); |
| 943 | } | 1094 | } |
| 944 | 1095 | ||
| 945 | /* | 1096 | /* |
| @@ -1003,7 +1154,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
| 1003 | */ | 1154 | */ |
| 1004 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | 1155 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) |
| 1005 | { | 1156 | { |
| 1006 | struct cfq_queue *cfqq; | 1157 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
| 1007 | 1158 | ||
| 1008 | cfqq = cfqd->active_queue; | 1159 | cfqq = cfqd->active_queue; |
| 1009 | if (!cfqq) | 1160 | if (!cfqq) |
| @@ -1037,6 +1188,16 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
| 1037 | goto keep_queue; | 1188 | goto keep_queue; |
| 1038 | 1189 | ||
| 1039 | /* | 1190 | /* |
| 1191 | * If another queue has a request waiting within our mean seek | ||
| 1192 | * distance, let it run. The expire code will check for close | ||
| 1193 | * cooperators and put the close queue at the front of the service | ||
| 1194 | * tree. | ||
| 1195 | */ | ||
| 1196 | new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0); | ||
| 1197 | if (new_cfqq) | ||
| 1198 | goto expire; | ||
| 1199 | |||
| 1200 | /* | ||
| 1040 | * No requests pending. If the active queue still has requests in | 1201 | * No requests pending. If the active queue still has requests in |
| 1041 | * flight or is idling for a new request, allow either of these | 1202 | * flight or is idling for a new request, allow either of these |
| 1042 | * conditions to happen (or time out) before selecting a new queue. | 1203 | * conditions to happen (or time out) before selecting a new queue. |
| @@ -1050,7 +1211,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
| 1050 | expire: | 1211 | expire: |
| 1051 | cfq_slice_expired(cfqd, 0); | 1212 | cfq_slice_expired(cfqd, 0); |
| 1052 | new_queue: | 1213 | new_queue: |
| 1053 | cfqq = cfq_set_active_queue(cfqd); | 1214 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); |
| 1054 | keep_queue: | 1215 | keep_queue: |
| 1055 | return cfqq; | 1216 | return cfqq; |
| 1056 | } | 1217 | } |
| @@ -1333,14 +1494,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
| 1333 | if (ioc->ioc_data == cic) | 1494 | if (ioc->ioc_data == cic) |
| 1334 | rcu_assign_pointer(ioc->ioc_data, NULL); | 1495 | rcu_assign_pointer(ioc->ioc_data, NULL); |
| 1335 | 1496 | ||
| 1336 | if (cic->cfqq[ASYNC]) { | 1497 | if (cic->cfqq[BLK_RW_ASYNC]) { |
| 1337 | cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); | 1498 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); |
| 1338 | cic->cfqq[ASYNC] = NULL; | 1499 | cic->cfqq[BLK_RW_ASYNC] = NULL; |
| 1339 | } | 1500 | } |
| 1340 | 1501 | ||
| 1341 | if (cic->cfqq[SYNC]) { | 1502 | if (cic->cfqq[BLK_RW_SYNC]) { |
| 1342 | cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); | 1503 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]); |
| 1343 | cic->cfqq[SYNC] = NULL; | 1504 | cic->cfqq[BLK_RW_SYNC] = NULL; |
| 1344 | } | 1505 | } |
| 1345 | } | 1506 | } |
| 1346 | 1507 | ||
| @@ -1449,17 +1610,18 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) | |||
| 1449 | 1610 | ||
| 1450 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | 1611 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); |
| 1451 | 1612 | ||
| 1452 | cfqq = cic->cfqq[ASYNC]; | 1613 | cfqq = cic->cfqq[BLK_RW_ASYNC]; |
| 1453 | if (cfqq) { | 1614 | if (cfqq) { |
| 1454 | struct cfq_queue *new_cfqq; | 1615 | struct cfq_queue *new_cfqq; |
| 1455 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); | 1616 | new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc, |
| 1617 | GFP_ATOMIC); | ||
| 1456 | if (new_cfqq) { | 1618 | if (new_cfqq) { |
| 1457 | cic->cfqq[ASYNC] = new_cfqq; | 1619 | cic->cfqq[BLK_RW_ASYNC] = new_cfqq; |
| 1458 | cfq_put_queue(cfqq); | 1620 | cfq_put_queue(cfqq); |
| 1459 | } | 1621 | } |
| 1460 | } | 1622 | } |
| 1461 | 1623 | ||
| 1462 | cfqq = cic->cfqq[SYNC]; | 1624 | cfqq = cic->cfqq[BLK_RW_SYNC]; |
| 1463 | if (cfqq) | 1625 | if (cfqq) |
| 1464 | cfq_mark_cfqq_prio_changed(cfqq); | 1626 | cfq_mark_cfqq_prio_changed(cfqq); |
| 1465 | 1627 | ||
| @@ -1510,6 +1672,7 @@ retry: | |||
| 1510 | } | 1672 | } |
| 1511 | 1673 | ||
| 1512 | RB_CLEAR_NODE(&cfqq->rb_node); | 1674 | RB_CLEAR_NODE(&cfqq->rb_node); |
| 1675 | RB_CLEAR_NODE(&cfqq->p_node); | ||
| 1513 | INIT_LIST_HEAD(&cfqq->fifo); | 1676 | INIT_LIST_HEAD(&cfqq->fifo); |
| 1514 | 1677 | ||
| 1515 | atomic_set(&cfqq->ref, 0); | 1678 | atomic_set(&cfqq->ref, 0); |
| @@ -1905,10 +2068,20 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
| 1905 | * Remember that we saw a request from this process, but | 2068 | * Remember that we saw a request from this process, but |
| 1906 | * don't start queuing just yet. Otherwise we risk seeing lots | 2069 | * don't start queuing just yet. Otherwise we risk seeing lots |
| 1907 | * of tiny requests, because we disrupt the normal plugging | 2070 | * of tiny requests, because we disrupt the normal plugging |
| 1908 | * and merging. | 2071 | * and merging. If the request is already larger than a single |
| 2072 | * page, let it rip immediately. For that case we assume that | ||
| 2073 | * merging is already done. Ditto for a busy system that | ||
| 2074 | * has other work pending, don't risk delaying until the | ||
| 2075 | * idle timer unplug to continue working. | ||
| 1909 | */ | 2076 | */ |
| 1910 | if (cfq_cfqq_wait_request(cfqq)) | 2077 | if (cfq_cfqq_wait_request(cfqq)) { |
| 2078 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || | ||
| 2079 | cfqd->busy_queues > 1) { | ||
| 2080 | del_timer(&cfqd->idle_slice_timer); | ||
| 2081 | blk_start_queueing(cfqd->queue); | ||
| 2082 | } | ||
| 1911 | cfq_mark_cfqq_must_dispatch(cfqq); | 2083 | cfq_mark_cfqq_must_dispatch(cfqq); |
| 2084 | } | ||
| 1912 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { | 2085 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { |
| 1913 | /* | 2086 | /* |
| 1914 | * not the active queue - expire current slice if it is | 2087 | * not the active queue - expire current slice if it is |
| @@ -1992,16 +2165,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
| 1992 | * or if we want to idle in case it has no pending requests. | 2165 | * or if we want to idle in case it has no pending requests. |
| 1993 | */ | 2166 | */ |
| 1994 | if (cfqd->active_queue == cfqq) { | 2167 | if (cfqd->active_queue == cfqq) { |
| 2168 | const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list); | ||
| 2169 | |||
| 1995 | if (cfq_cfqq_slice_new(cfqq)) { | 2170 | if (cfq_cfqq_slice_new(cfqq)) { |
| 1996 | cfq_set_prio_slice(cfqd, cfqq); | 2171 | cfq_set_prio_slice(cfqd, cfqq); |
| 1997 | cfq_clear_cfqq_slice_new(cfqq); | 2172 | cfq_clear_cfqq_slice_new(cfqq); |
| 1998 | } | 2173 | } |
| 2174 | /* | ||
| 2175 | * If there are no requests waiting in this queue, and | ||
| 2176 | * there are other queues ready to issue requests, AND | ||
| 2177 | * those other queues are issuing requests within our | ||
| 2178 | * mean seek distance, give them a chance to run instead | ||
| 2179 | * of idling. | ||
| 2180 | */ | ||
| 1999 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) | 2181 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) |
| 2000 | cfq_slice_expired(cfqd, 1); | 2182 | cfq_slice_expired(cfqd, 1); |
| 2001 | else if (sync && !rq_noidle(rq) && | 2183 | else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) && |
| 2002 | RB_EMPTY_ROOT(&cfqq->sort_list)) { | 2184 | sync && !rq_noidle(rq)) |
| 2003 | cfq_arm_slice_timer(cfqd); | 2185 | cfq_arm_slice_timer(cfqd); |
| 2004 | } | ||
| 2005 | } | 2186 | } |
| 2006 | 2187 | ||
| 2007 | if (!cfqd->rq_in_driver) | 2188 | if (!cfqd->rq_in_driver) |
| @@ -2062,7 +2243,7 @@ static int cfq_may_queue(struct request_queue *q, int rw) | |||
| 2062 | if (!cic) | 2243 | if (!cic) |
| 2063 | return ELV_MQUEUE_MAY; | 2244 | return ELV_MQUEUE_MAY; |
| 2064 | 2245 | ||
| 2065 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); | 2246 | cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); |
| 2066 | if (cfqq) { | 2247 | if (cfqq) { |
| 2067 | cfq_init_prio_data(cfqq, cic->ioc); | 2248 | cfq_init_prio_data(cfqq, cic->ioc); |
| 2068 | cfq_prio_boost(cfqq); | 2249 | cfq_prio_boost(cfqq); |
| @@ -2152,11 +2333,10 @@ static void cfq_kick_queue(struct work_struct *work) | |||
| 2152 | struct cfq_data *cfqd = | 2333 | struct cfq_data *cfqd = |
| 2153 | container_of(work, struct cfq_data, unplug_work); | 2334 | container_of(work, struct cfq_data, unplug_work); |
| 2154 | struct request_queue *q = cfqd->queue; | 2335 | struct request_queue *q = cfqd->queue; |
| 2155 | unsigned long flags; | ||
| 2156 | 2336 | ||
| 2157 | spin_lock_irqsave(q->queue_lock, flags); | 2337 | spin_lock_irq(q->queue_lock); |
| 2158 | blk_start_queueing(q); | 2338 | blk_start_queueing(q); |
| 2159 | spin_unlock_irqrestore(q->queue_lock, flags); | 2339 | spin_unlock_irq(q->queue_lock); |
| 2160 | } | 2340 | } |
| 2161 | 2341 | ||
| 2162 | /* | 2342 | /* |
diff --git a/block/elevator.c b/block/elevator.c index fb81bcc14a8c..7073a9072577 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
| @@ -590,7 +590,7 @@ void elv_drain_elevator(struct request_queue *q) | |||
| 590 | /* | 590 | /* |
| 591 | * Call with queue lock held, interrupts disabled | 591 | * Call with queue lock held, interrupts disabled |
| 592 | */ | 592 | */ |
| 593 | void elv_quisce_start(struct request_queue *q) | 593 | void elv_quiesce_start(struct request_queue *q) |
| 594 | { | 594 | { |
| 595 | queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); | 595 | queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); |
| 596 | 596 | ||
| @@ -607,7 +607,7 @@ void elv_quisce_start(struct request_queue *q) | |||
| 607 | } | 607 | } |
| 608 | } | 608 | } |
| 609 | 609 | ||
| 610 | void elv_quisce_end(struct request_queue *q) | 610 | void elv_quiesce_end(struct request_queue *q) |
| 611 | { | 611 | { |
| 612 | queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); | 612 | queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); |
| 613 | } | 613 | } |
| @@ -1126,7 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | |||
| 1126 | * Turn on BYPASS and drain all requests w/ elevator private data | 1126 | * Turn on BYPASS and drain all requests w/ elevator private data |
| 1127 | */ | 1127 | */ |
| 1128 | spin_lock_irq(q->queue_lock); | 1128 | spin_lock_irq(q->queue_lock); |
| 1129 | elv_quisce_start(q); | 1129 | elv_quiesce_start(q); |
| 1130 | 1130 | ||
| 1131 | /* | 1131 | /* |
| 1132 | * Remember old elevator. | 1132 | * Remember old elevator. |
| @@ -1150,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | |||
| 1150 | */ | 1150 | */ |
| 1151 | elevator_exit(old_elevator); | 1151 | elevator_exit(old_elevator); |
| 1152 | spin_lock_irq(q->queue_lock); | 1152 | spin_lock_irq(q->queue_lock); |
| 1153 | elv_quisce_end(q); | 1153 | elv_quiesce_end(q); |
| 1154 | spin_unlock_irq(q->queue_lock); | 1154 | spin_unlock_irq(q->queue_lock); |
| 1155 | 1155 | ||
| 1156 | blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); | 1156 | blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); |
diff --git a/block/ioctl.c b/block/ioctl.c index 0f22e629b13c..ad474d4bbcce 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
| @@ -146,8 +146,6 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | |||
| 146 | struct bio *bio; | 146 | struct bio *bio; |
| 147 | 147 | ||
| 148 | bio = bio_alloc(GFP_KERNEL, 0); | 148 | bio = bio_alloc(GFP_KERNEL, 0); |
| 149 | if (!bio) | ||
| 150 | return -ENOMEM; | ||
| 151 | 149 | ||
| 152 | bio->bi_end_io = blk_ioc_discard_endio; | 150 | bio->bi_end_io = blk_ioc_discard_endio; |
| 153 | bio->bi_bdev = bdev; | 151 | bio->bi_bdev = bdev; |
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 626ee274c5c4..84b7f8709f41 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
| @@ -217,7 +217,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, | |||
| 217 | static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, | 217 | static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, |
| 218 | struct bio *bio) | 218 | struct bio *bio) |
| 219 | { | 219 | { |
| 220 | int ret = 0; | 220 | int r, ret = 0; |
| 221 | 221 | ||
| 222 | /* | 222 | /* |
| 223 | * fill in all the output members | 223 | * fill in all the output members |
| @@ -242,7 +242,9 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, | |||
| 242 | ret = -EFAULT; | 242 | ret = -EFAULT; |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | blk_rq_unmap_user(bio); | 245 | r = blk_rq_unmap_user(bio); |
| 246 | if (!ret) | ||
| 247 | ret = r; | ||
| 246 | blk_put_request(rq); | 248 | blk_put_request(rq); |
| 247 | 249 | ||
| 248 | return ret; | 250 | return ret; |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index bdd4f5f45575..5f7e64ba87e5 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
| @@ -275,8 +275,10 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, | |||
| 275 | if (rw == READ) { | 275 | if (rw == READ) { |
| 276 | copy_from_brd(mem + off, brd, sector, len); | 276 | copy_from_brd(mem + off, brd, sector, len); |
| 277 | flush_dcache_page(page); | 277 | flush_dcache_page(page); |
| 278 | } else | 278 | } else { |
| 279 | flush_dcache_page(page); | ||
| 279 | copy_to_brd(brd, mem + off, sector, len); | 280 | copy_to_brd(brd, mem + off, sector, len); |
| 281 | } | ||
| 280 | kunmap_atomic(mem, KM_USER0); | 282 | kunmap_atomic(mem, KM_USER0); |
| 281 | 283 | ||
| 282 | out: | 284 | out: |
| @@ -436,6 +438,7 @@ static struct brd_device *brd_alloc(int i) | |||
| 436 | if (!brd->brd_queue) | 438 | if (!brd->brd_queue) |
| 437 | goto out_free_dev; | 439 | goto out_free_dev; |
| 438 | blk_queue_make_request(brd->brd_queue, brd_make_request); | 440 | blk_queue_make_request(brd->brd_queue, brd_make_request); |
| 441 | blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL); | ||
| 439 | blk_queue_max_sectors(brd->brd_queue, 1024); | 442 | blk_queue_max_sectors(brd->brd_queue, 1024); |
| 440 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); | 443 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); |
| 441 | 444 | ||
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h deleted file mode 100644 index 345098b4ca77..000000000000 --- a/drivers/md/dm-bio-list.h +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2004 Red Hat UK Ltd. | ||
| 3 | * | ||
| 4 | * This file is released under the GPL. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #ifndef DM_BIO_LIST_H | ||
| 8 | #define DM_BIO_LIST_H | ||
| 9 | |||
| 10 | #include <linux/bio.h> | ||
| 11 | |||
| 12 | #ifdef CONFIG_BLOCK | ||
| 13 | |||
| 14 | struct bio_list { | ||
| 15 | struct bio *head; | ||
| 16 | struct bio *tail; | ||
| 17 | }; | ||
| 18 | |||
| 19 | static inline int bio_list_empty(const struct bio_list *bl) | ||
| 20 | { | ||
| 21 | return bl->head == NULL; | ||
| 22 | } | ||
| 23 | |||
| 24 | static inline void bio_list_init(struct bio_list *bl) | ||
| 25 | { | ||
| 26 | bl->head = bl->tail = NULL; | ||
| 27 | } | ||
| 28 | |||
| 29 | #define bio_list_for_each(bio, bl) \ | ||
| 30 | for (bio = (bl)->head; bio; bio = bio->bi_next) | ||
| 31 | |||
| 32 | static inline unsigned bio_list_size(const struct bio_list *bl) | ||
| 33 | { | ||
| 34 | unsigned sz = 0; | ||
| 35 | struct bio *bio; | ||
| 36 | |||
| 37 | bio_list_for_each(bio, bl) | ||
| 38 | sz++; | ||
| 39 | |||
| 40 | return sz; | ||
| 41 | } | ||
| 42 | |||
| 43 | static inline void bio_list_add(struct bio_list *bl, struct bio *bio) | ||
| 44 | { | ||
| 45 | bio->bi_next = NULL; | ||
| 46 | |||
| 47 | if (bl->tail) | ||
| 48 | bl->tail->bi_next = bio; | ||
| 49 | else | ||
| 50 | bl->head = bio; | ||
| 51 | |||
| 52 | bl->tail = bio; | ||
| 53 | } | ||
| 54 | |||
| 55 | static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) | ||
| 56 | { | ||
| 57 | bio->bi_next = bl->head; | ||
| 58 | |||
| 59 | bl->head = bio; | ||
| 60 | |||
| 61 | if (!bl->tail) | ||
| 62 | bl->tail = bio; | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) | ||
| 66 | { | ||
| 67 | if (!bl2->head) | ||
| 68 | return; | ||
| 69 | |||
| 70 | if (bl->tail) | ||
| 71 | bl->tail->bi_next = bl2->head; | ||
| 72 | else | ||
| 73 | bl->head = bl2->head; | ||
| 74 | |||
| 75 | bl->tail = bl2->tail; | ||
| 76 | } | ||
| 77 | |||
| 78 | static inline void bio_list_merge_head(struct bio_list *bl, | ||
| 79 | struct bio_list *bl2) | ||
| 80 | { | ||
| 81 | if (!bl2->head) | ||
| 82 | return; | ||
| 83 | |||
| 84 | if (bl->head) | ||
| 85 | bl2->tail->bi_next = bl->head; | ||
| 86 | else | ||
| 87 | bl->tail = bl2->tail; | ||
| 88 | |||
| 89 | bl->head = bl2->head; | ||
| 90 | } | ||
| 91 | |||
| 92 | static inline struct bio *bio_list_pop(struct bio_list *bl) | ||
| 93 | { | ||
| 94 | struct bio *bio = bl->head; | ||
| 95 | |||
| 96 | if (bio) { | ||
| 97 | bl->head = bl->head->bi_next; | ||
| 98 | if (!bl->head) | ||
| 99 | bl->tail = NULL; | ||
| 100 | |||
| 101 | bio->bi_next = NULL; | ||
| 102 | } | ||
| 103 | |||
| 104 | return bio; | ||
| 105 | } | ||
| 106 | |||
| 107 | static inline struct bio *bio_list_get(struct bio_list *bl) | ||
| 108 | { | ||
| 109 | struct bio *bio = bl->head; | ||
| 110 | |||
| 111 | bl->head = bl->tail = NULL; | ||
| 112 | |||
| 113 | return bio; | ||
| 114 | } | ||
| 115 | |||
| 116 | #endif /* CONFIG_BLOCK */ | ||
| 117 | #endif | ||
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 59ee1b015d2d..559dbb52bc85 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
| @@ -15,8 +15,6 @@ | |||
| 15 | 15 | ||
| 16 | #include <linux/device-mapper.h> | 16 | #include <linux/device-mapper.h> |
| 17 | 17 | ||
| 18 | #include "dm-bio-list.h" | ||
| 19 | |||
| 20 | #define DM_MSG_PREFIX "delay" | 18 | #define DM_MSG_PREFIX "delay" |
| 21 | 19 | ||
| 22 | struct delay_c { | 20 | struct delay_c { |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 095f77bf9681..6a386ab4f7eb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <linux/device-mapper.h> | 8 | #include <linux/device-mapper.h> |
| 9 | 9 | ||
| 10 | #include "dm-path-selector.h" | 10 | #include "dm-path-selector.h" |
| 11 | #include "dm-bio-list.h" | ||
| 12 | #include "dm-bio-record.h" | 11 | #include "dm-bio-record.h" |
| 13 | #include "dm-uevent.h" | 12 | #include "dm-uevent.h" |
| 14 | 13 | ||
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 536ef0bef154..076fbb4e967a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include "dm-bio-list.h" | ||
| 9 | #include "dm-bio-record.h" | 8 | #include "dm-bio-record.h" |
| 10 | 9 | ||
| 11 | #include <linux/init.h> | 10 | #include <linux/init.h> |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 59f8d9df9e1a..7b899be0b087 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
| 15 | 15 | ||
| 16 | #include "dm.h" | 16 | #include "dm.h" |
| 17 | #include "dm-bio-list.h" | ||
| 18 | 17 | ||
| 19 | #define DM_MSG_PREFIX "region hash" | 18 | #define DM_MSG_PREFIX "region hash" |
| 20 | 19 | ||
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 981a0413068f..d73f17fc7778 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
| @@ -22,7 +22,6 @@ | |||
| 22 | #include <linux/workqueue.h> | 22 | #include <linux/workqueue.h> |
| 23 | 23 | ||
| 24 | #include "dm-exception-store.h" | 24 | #include "dm-exception-store.h" |
| 25 | #include "dm-bio-list.h" | ||
| 26 | 25 | ||
| 27 | #define DM_MSG_PREFIX "snapshots" | 26 | #define DM_MSG_PREFIX "snapshots" |
| 28 | 27 | ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a994be035ba..424f7b048c30 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include "dm.h" | 8 | #include "dm.h" |
| 9 | #include "dm-bio-list.h" | ||
| 10 | #include "dm-uevent.h" | 9 | #include "dm-uevent.h" |
| 11 | 10 | ||
| 12 | #include <linux/init.h> | 11 | #include <linux/init.h> |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 274b491a11c1..36df9109cde1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -35,7 +35,6 @@ | |||
| 35 | #include <linux/blkdev.h> | 35 | #include <linux/blkdev.h> |
| 36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
| 37 | #include "md.h" | 37 | #include "md.h" |
| 38 | #include "dm-bio-list.h" | ||
| 39 | #include "raid1.h" | 38 | #include "raid1.h" |
| 40 | #include "bitmap.h" | 39 | #include "bitmap.h" |
| 41 | 40 | ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e293d92641ac..81a54f17417e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -22,7 +22,6 @@ | |||
| 22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
| 23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
| 24 | #include "md.h" | 24 | #include "md.h" |
| 25 | #include "dm-bio-list.h" | ||
| 26 | #include "raid10.h" | 25 | #include "raid10.h" |
| 27 | #include "bitmap.h" | 26 | #include "bitmap.h" |
| 28 | 27 | ||
| @@ -348,6 +348,24 @@ err: | |||
| 348 | return NULL; | 348 | return NULL; |
| 349 | } | 349 | } |
| 350 | 350 | ||
| 351 | /** | ||
| 352 | * bio_alloc - allocate a bio for I/O | ||
| 353 | * @gfp_mask: the GFP_ mask given to the slab allocator | ||
| 354 | * @nr_iovecs: number of iovecs to pre-allocate | ||
| 355 | * | ||
| 356 | * Description: | ||
| 357 | * bio_alloc will allocate a bio and associated bio_vec array that can hold | ||
| 358 | * at least @nr_iovecs entries. Allocations will be done from the | ||
| 359 | * fs_bio_set. Also see @bio_alloc_bioset. | ||
| 360 | * | ||
| 361 | * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate | ||
| 362 | * a bio. This is due to the mempool guarantees. To make this work, callers | ||
| 363 | * must never allocate more than 1 bio at the time from this pool. Callers | ||
| 364 | * that need to allocate more than 1 bio must always submit the previously | ||
| 365 | * allocate bio for IO before attempting to allocate a new one. Failure to | ||
| 366 | * do so can cause livelocks under memory pressure. | ||
| 367 | * | ||
| 368 | **/ | ||
| 351 | struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | 369 | struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) |
| 352 | { | 370 | { |
| 353 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | 371 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); |
diff --git a/fs/buffer.c b/fs/buffer.c index 13edf7ad3ff1..ff8bb1f2333a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -547,7 +547,7 @@ repeat: | |||
| 547 | return err; | 547 | return err; |
| 548 | } | 548 | } |
| 549 | 549 | ||
| 550 | void do_thaw_all(unsigned long unused) | 550 | void do_thaw_all(struct work_struct *work) |
| 551 | { | 551 | { |
| 552 | struct super_block *sb; | 552 | struct super_block *sb; |
| 553 | char b[BDEVNAME_SIZE]; | 553 | char b[BDEVNAME_SIZE]; |
| @@ -567,6 +567,7 @@ restart: | |||
| 567 | goto restart; | 567 | goto restart; |
| 568 | } | 568 | } |
| 569 | spin_unlock(&sb_lock); | 569 | spin_unlock(&sb_lock); |
| 570 | kfree(work); | ||
| 570 | printk(KERN_WARNING "Emergency Thaw complete\n"); | 571 | printk(KERN_WARNING "Emergency Thaw complete\n"); |
| 571 | } | 572 | } |
| 572 | 573 | ||
| @@ -577,7 +578,13 @@ restart: | |||
| 577 | */ | 578 | */ |
| 578 | void emergency_thaw_all(void) | 579 | void emergency_thaw_all(void) |
| 579 | { | 580 | { |
| 580 | pdflush_operation(do_thaw_all, 0); | 581 | struct work_struct *work; |
| 582 | |||
| 583 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
| 584 | if (work) { | ||
| 585 | INIT_WORK(work, do_thaw_all); | ||
| 586 | schedule_work(work); | ||
| 587 | } | ||
| 581 | } | 588 | } |
| 582 | 589 | ||
| 583 | /** | 590 | /** |
diff --git a/fs/direct-io.c b/fs/direct-io.c index da258e7249cc..05763bbc2050 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
| 307 | struct bio *bio; | 307 | struct bio *bio; |
| 308 | 308 | ||
| 309 | bio = bio_alloc(GFP_KERNEL, nr_vecs); | 309 | bio = bio_alloc(GFP_KERNEL, nr_vecs); |
| 310 | if (bio == NULL) | ||
| 311 | return -ENOMEM; | ||
| 312 | 310 | ||
| 313 | bio->bi_bdev = bdev; | 311 | bio->bi_bdev = bdev; |
| 314 | bio->bi_sector = first_sector; | 312 | bio->bi_sector = first_sector; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 6132353dcf62..2a1cb0979768 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -2416,8 +2416,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
| 2416 | len = ee_len; | 2416 | len = ee_len; |
| 2417 | 2417 | ||
| 2418 | bio = bio_alloc(GFP_NOIO, len); | 2418 | bio = bio_alloc(GFP_NOIO, len); |
| 2419 | if (!bio) | ||
| 2420 | return -ENOMEM; | ||
| 2421 | bio->bi_sector = ee_pblock; | 2419 | bio->bi_sector = ee_pblock; |
| 2422 | bio->bi_bdev = inode->i_sb->s_bdev; | 2420 | bio->bi_bdev = inode->i_sb->s_bdev; |
| 2423 | 2421 | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 51883b3ad89c..650a730707b7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | |||
| 272 | lock_page(page); | 272 | lock_page(page); |
| 273 | 273 | ||
| 274 | bio = bio_alloc(GFP_NOFS, 1); | 274 | bio = bio_alloc(GFP_NOFS, 1); |
| 275 | if (unlikely(!bio)) { | ||
| 276 | __free_page(page); | ||
| 277 | return -ENOBUFS; | ||
| 278 | } | ||
| 279 | |||
| 280 | bio->bi_sector = sector * (sb->s_blocksize >> 9); | 275 | bio->bi_sector = sector * (sb->s_blocksize >> 9); |
| 281 | bio->bi_bdev = sb->s_bdev; | 276 | bio->bi_bdev = sb->s_bdev; |
| 282 | bio_add_page(bio, page, PAGE_SIZE, 0); | 277 | bio_add_page(bio, page, PAGE_SIZE, 0); |
diff --git a/fs/inode.c b/fs/inode.c index d06d6d268de9..6ad14a1cd8c9 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode) | |||
| 1470 | spin_lock(&inode_lock); | 1470 | spin_lock(&inode_lock); |
| 1471 | } | 1471 | } |
| 1472 | 1472 | ||
| 1473 | /* | ||
| 1474 | * We rarely want to lock two inodes that do not have a parent/child | ||
| 1475 | * relationship (such as directory, child inode) simultaneously. The | ||
| 1476 | * vast majority of file systems should be able to get along fine | ||
| 1477 | * without this. Do not use these functions except as a last resort. | ||
| 1478 | */ | ||
| 1479 | void inode_double_lock(struct inode *inode1, struct inode *inode2) | ||
| 1480 | { | ||
| 1481 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | ||
| 1482 | if (inode1) | ||
| 1483 | mutex_lock(&inode1->i_mutex); | ||
| 1484 | else if (inode2) | ||
| 1485 | mutex_lock(&inode2->i_mutex); | ||
| 1486 | return; | ||
| 1487 | } | ||
| 1488 | |||
| 1489 | if (inode1 < inode2) { | ||
| 1490 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | ||
| 1491 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | ||
| 1492 | } else { | ||
| 1493 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | ||
| 1494 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | ||
| 1495 | } | ||
| 1496 | } | ||
| 1497 | EXPORT_SYMBOL(inode_double_lock); | ||
| 1498 | |||
| 1499 | void inode_double_unlock(struct inode *inode1, struct inode *inode2) | ||
| 1500 | { | ||
| 1501 | if (inode1) | ||
| 1502 | mutex_unlock(&inode1->i_mutex); | ||
| 1503 | |||
| 1504 | if (inode2 && inode2 != inode1) | ||
| 1505 | mutex_unlock(&inode2->i_mutex); | ||
| 1506 | } | ||
| 1507 | EXPORT_SYMBOL(inode_double_unlock); | ||
| 1508 | |||
| 1509 | static __initdata unsigned long ihash_entries; | 1473 | static __initdata unsigned long ihash_entries; |
| 1510 | static int __init set_ihash_entries(char *str) | 1474 | static int __init set_ihash_entries(char *str) |
| 1511 | { | 1475 | { |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8672b9536039..c2a87c885b73 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -1912,6 +1912,22 @@ out_sems: | |||
| 1912 | return written ? written : ret; | 1912 | return written ? written : ret; |
| 1913 | } | 1913 | } |
| 1914 | 1914 | ||
| 1915 | static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | ||
| 1916 | struct file *out, | ||
| 1917 | struct splice_desc *sd) | ||
| 1918 | { | ||
| 1919 | int ret; | ||
| 1920 | |||
| 1921 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | ||
| 1922 | sd->total_len, 0, NULL); | ||
| 1923 | if (ret < 0) { | ||
| 1924 | mlog_errno(ret); | ||
| 1925 | return ret; | ||
| 1926 | } | ||
| 1927 | |||
| 1928 | return splice_from_pipe_feed(pipe, sd, pipe_to_file); | ||
| 1929 | } | ||
| 1930 | |||
| 1915 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | 1931 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, |
| 1916 | struct file *out, | 1932 | struct file *out, |
| 1917 | loff_t *ppos, | 1933 | loff_t *ppos, |
| @@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
| 1919 | unsigned int flags) | 1935 | unsigned int flags) |
| 1920 | { | 1936 | { |
| 1921 | int ret; | 1937 | int ret; |
| 1922 | struct inode *inode = out->f_path.dentry->d_inode; | 1938 | struct address_space *mapping = out->f_mapping; |
| 1939 | struct inode *inode = mapping->host; | ||
| 1940 | struct splice_desc sd = { | ||
| 1941 | .total_len = len, | ||
| 1942 | .flags = flags, | ||
| 1943 | .pos = *ppos, | ||
| 1944 | .u.file = out, | ||
| 1945 | }; | ||
| 1923 | 1946 | ||
| 1924 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, | 1947 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, |
| 1925 | (unsigned int)len, | 1948 | (unsigned int)len, |
| 1926 | out->f_path.dentry->d_name.len, | 1949 | out->f_path.dentry->d_name.len, |
| 1927 | out->f_path.dentry->d_name.name); | 1950 | out->f_path.dentry->d_name.name); |
| 1928 | 1951 | ||
| 1929 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | 1952 | if (pipe->inode) |
| 1953 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); | ||
| 1930 | 1954 | ||
| 1931 | ret = ocfs2_rw_lock(inode, 1); | 1955 | splice_from_pipe_begin(&sd); |
| 1932 | if (ret < 0) { | 1956 | do { |
| 1933 | mlog_errno(ret); | 1957 | ret = splice_from_pipe_next(pipe, &sd); |
| 1934 | goto out; | 1958 | if (ret <= 0) |
| 1935 | } | 1959 | break; |
| 1936 | 1960 | ||
| 1937 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, | 1961 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
| 1938 | NULL); | 1962 | ret = ocfs2_rw_lock(inode, 1); |
| 1939 | if (ret < 0) { | 1963 | if (ret < 0) |
| 1940 | mlog_errno(ret); | 1964 | mlog_errno(ret); |
| 1941 | goto out_unlock; | 1965 | else { |
| 1942 | } | 1966 | ret = ocfs2_splice_to_file(pipe, out, &sd); |
| 1967 | ocfs2_rw_unlock(inode, 1); | ||
| 1968 | } | ||
| 1969 | mutex_unlock(&inode->i_mutex); | ||
| 1970 | } while (ret > 0); | ||
| 1971 | splice_from_pipe_end(pipe, &sd); | ||
| 1943 | 1972 | ||
| 1944 | if (pipe->inode) | 1973 | if (pipe->inode) |
| 1945 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
| 1946 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); | ||
| 1947 | if (pipe->inode) | ||
| 1948 | mutex_unlock(&pipe->inode->i_mutex); | 1974 | mutex_unlock(&pipe->inode->i_mutex); |
| 1949 | 1975 | ||
| 1950 | out_unlock: | 1976 | if (sd.num_spliced) |
| 1951 | ocfs2_rw_unlock(inode, 1); | 1977 | ret = sd.num_spliced; |
| 1952 | out: | 1978 | |
| 1953 | mutex_unlock(&inode->i_mutex); | 1979 | if (ret > 0) { |
| 1980 | unsigned long nr_pages; | ||
| 1981 | |||
| 1982 | *ppos += ret; | ||
| 1983 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
| 1984 | |||
| 1985 | /* | ||
| 1986 | * If file or inode is SYNC and we actually wrote some data, | ||
| 1987 | * sync it. | ||
| 1988 | */ | ||
| 1989 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
| 1990 | int err; | ||
| 1991 | |||
| 1992 | mutex_lock(&inode->i_mutex); | ||
| 1993 | err = ocfs2_rw_lock(inode, 1); | ||
| 1994 | if (err < 0) { | ||
| 1995 | mlog_errno(err); | ||
| 1996 | } else { | ||
| 1997 | err = generic_osync_inode(inode, mapping, | ||
| 1998 | OSYNC_METADATA|OSYNC_DATA); | ||
| 1999 | ocfs2_rw_unlock(inode, 1); | ||
| 2000 | } | ||
| 2001 | mutex_unlock(&inode->i_mutex); | ||
| 2002 | |||
| 2003 | if (err) | ||
| 2004 | ret = err; | ||
| 2005 | } | ||
| 2006 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | ||
| 2007 | } | ||
| 1954 | 2008 | ||
| 1955 | mlog_exit(ret); | 2009 | mlog_exit(ret); |
| 1956 | return ret; | 2010 | return ret; |
| @@ -37,6 +37,42 @@ | |||
| 37 | * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 | 37 | * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 |
| 38 | */ | 38 | */ |
| 39 | 39 | ||
| 40 | static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) | ||
| 41 | { | ||
| 42 | if (pipe->inode) | ||
| 43 | mutex_lock_nested(&pipe->inode->i_mutex, subclass); | ||
| 44 | } | ||
| 45 | |||
| 46 | void pipe_lock(struct pipe_inode_info *pipe) | ||
| 47 | { | ||
| 48 | /* | ||
| 49 | * pipe_lock() nests non-pipe inode locks (for writing to a file) | ||
| 50 | */ | ||
| 51 | pipe_lock_nested(pipe, I_MUTEX_PARENT); | ||
| 52 | } | ||
| 53 | EXPORT_SYMBOL(pipe_lock); | ||
| 54 | |||
| 55 | void pipe_unlock(struct pipe_inode_info *pipe) | ||
| 56 | { | ||
| 57 | if (pipe->inode) | ||
| 58 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 59 | } | ||
| 60 | EXPORT_SYMBOL(pipe_unlock); | ||
| 61 | |||
| 62 | void pipe_double_lock(struct pipe_inode_info *pipe1, | ||
| 63 | struct pipe_inode_info *pipe2) | ||
| 64 | { | ||
| 65 | BUG_ON(pipe1 == pipe2); | ||
| 66 | |||
| 67 | if (pipe1 < pipe2) { | ||
| 68 | pipe_lock_nested(pipe1, I_MUTEX_PARENT); | ||
| 69 | pipe_lock_nested(pipe2, I_MUTEX_CHILD); | ||
| 70 | } else { | ||
| 71 | pipe_lock_nested(pipe2, I_MUTEX_CHILD); | ||
| 72 | pipe_lock_nested(pipe1, I_MUTEX_PARENT); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 40 | /* Drop the inode semaphore and wait for a pipe event, atomically */ | 76 | /* Drop the inode semaphore and wait for a pipe event, atomically */ |
| 41 | void pipe_wait(struct pipe_inode_info *pipe) | 77 | void pipe_wait(struct pipe_inode_info *pipe) |
| 42 | { | 78 | { |
| @@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe) | |||
| 47 | * is considered a noninteractive wait: | 83 | * is considered a noninteractive wait: |
| 48 | */ | 84 | */ |
| 49 | prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); | 85 | prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); |
| 50 | if (pipe->inode) | 86 | pipe_unlock(pipe); |
| 51 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 52 | schedule(); | 87 | schedule(); |
| 53 | finish_wait(&pipe->wait, &wait); | 88 | finish_wait(&pipe->wait, &wait); |
| 54 | if (pipe->inode) | 89 | pipe_lock(pipe); |
| 55 | mutex_lock(&pipe->inode->i_mutex); | ||
| 56 | } | 90 | } |
| 57 | 91 | ||
| 58 | static int | 92 | static int |
diff --git a/fs/splice.c b/fs/splice.c index c18aa7e03e2b..5384a90665d0 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
| 182 | do_wakeup = 0; | 182 | do_wakeup = 0; |
| 183 | page_nr = 0; | 183 | page_nr = 0; |
| 184 | 184 | ||
| 185 | if (pipe->inode) | 185 | pipe_lock(pipe); |
| 186 | mutex_lock(&pipe->inode->i_mutex); | ||
| 187 | 186 | ||
| 188 | for (;;) { | 187 | for (;;) { |
| 189 | if (!pipe->readers) { | 188 | if (!pipe->readers) { |
| @@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
| 245 | pipe->waiting_writers--; | 244 | pipe->waiting_writers--; |
| 246 | } | 245 | } |
| 247 | 246 | ||
| 248 | if (pipe->inode) { | 247 | pipe_unlock(pipe); |
| 249 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 250 | 248 | ||
| 251 | if (do_wakeup) { | 249 | if (do_wakeup) { |
| 252 | smp_mb(); | 250 | smp_mb(); |
| 253 | if (waitqueue_active(&pipe->wait)) | 251 | if (waitqueue_active(&pipe->wait)) |
| 254 | wake_up_interruptible(&pipe->wait); | 252 | wake_up_interruptible(&pipe->wait); |
| 255 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 253 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
| 256 | } | ||
| 257 | } | 254 | } |
| 258 | 255 | ||
| 259 | while (page_nr < spd_pages) | 256 | while (page_nr < spd_pages) |
| @@ -555,8 +552,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, | |||
| 555 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create | 552 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create |
| 556 | * a new page in the output file page cache and fill/dirty that. | 553 | * a new page in the output file page cache and fill/dirty that. |
| 557 | */ | 554 | */ |
| 558 | static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | 555 | int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
| 559 | struct splice_desc *sd) | 556 | struct splice_desc *sd) |
| 560 | { | 557 | { |
| 561 | struct file *file = sd->u.file; | 558 | struct file *file = sd->u.file; |
| 562 | struct address_space *mapping = file->f_mapping; | 559 | struct address_space *mapping = file->f_mapping; |
| @@ -600,108 +597,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | |||
| 600 | out: | 597 | out: |
| 601 | return ret; | 598 | return ret; |
| 602 | } | 599 | } |
| 600 | EXPORT_SYMBOL(pipe_to_file); | ||
| 601 | |||
| 602 | static void wakeup_pipe_writers(struct pipe_inode_info *pipe) | ||
| 603 | { | ||
| 604 | smp_mb(); | ||
| 605 | if (waitqueue_active(&pipe->wait)) | ||
| 606 | wake_up_interruptible(&pipe->wait); | ||
| 607 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | ||
| 608 | } | ||
| 603 | 609 | ||
| 604 | /** | 610 | /** |
| 605 | * __splice_from_pipe - splice data from a pipe to given actor | 611 | * splice_from_pipe_feed - feed available data from a pipe to a file |
| 606 | * @pipe: pipe to splice from | 612 | * @pipe: pipe to splice from |
| 607 | * @sd: information to @actor | 613 | * @sd: information to @actor |
| 608 | * @actor: handler that splices the data | 614 | * @actor: handler that splices the data |
| 609 | * | 615 | * |
| 610 | * Description: | 616 | * Description: |
| 611 | * This function does little more than loop over the pipe and call | 617 | |
| 612 | * @actor to do the actual moving of a single struct pipe_buffer to | 618 | * This function loops over the pipe and calls @actor to do the |
| 613 | * the desired destination. See pipe_to_file, pipe_to_sendpage, or | 619 | * actual moving of a single struct pipe_buffer to the desired |
| 614 | * pipe_to_user. | 620 | * destination. It returns when there's no more buffers left in |
| 621 | * the pipe or if the requested number of bytes (@sd->total_len) | ||
| 622 | * have been copied. It returns a positive number (one) if the | ||
| 623 | * pipe needs to be filled with more data, zero if the required | ||
| 624 | * number of bytes have been copied and -errno on error. | ||
| 615 | * | 625 | * |
| 626 | * This, together with splice_from_pipe_{begin,end,next}, may be | ||
| 627 | * used to implement the functionality of __splice_from_pipe() when | ||
| 628 | * locking is required around copying the pipe buffers to the | ||
| 629 | * destination. | ||
| 616 | */ | 630 | */ |
| 617 | ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, | 631 | int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, |
| 618 | splice_actor *actor) | 632 | splice_actor *actor) |
| 619 | { | 633 | { |
| 620 | int ret, do_wakeup, err; | 634 | int ret; |
| 621 | |||
| 622 | ret = 0; | ||
| 623 | do_wakeup = 0; | ||
| 624 | |||
| 625 | for (;;) { | ||
| 626 | if (pipe->nrbufs) { | ||
| 627 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | ||
| 628 | const struct pipe_buf_operations *ops = buf->ops; | ||
| 629 | 635 | ||
| 630 | sd->len = buf->len; | 636 | while (pipe->nrbufs) { |
| 631 | if (sd->len > sd->total_len) | 637 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; |
| 632 | sd->len = sd->total_len; | 638 | const struct pipe_buf_operations *ops = buf->ops; |
| 633 | 639 | ||
| 634 | err = actor(pipe, buf, sd); | 640 | sd->len = buf->len; |
| 635 | if (err <= 0) { | 641 | if (sd->len > sd->total_len) |
| 636 | if (!ret && err != -ENODATA) | 642 | sd->len = sd->total_len; |
| 637 | ret = err; | ||
| 638 | 643 | ||
| 639 | break; | 644 | ret = actor(pipe, buf, sd); |
| 640 | } | 645 | if (ret <= 0) { |
| 646 | if (ret == -ENODATA) | ||
| 647 | ret = 0; | ||
| 648 | return ret; | ||
| 649 | } | ||
| 650 | buf->offset += ret; | ||
| 651 | buf->len -= ret; | ||
| 641 | 652 | ||
| 642 | ret += err; | 653 | sd->num_spliced += ret; |
| 643 | buf->offset += err; | 654 | sd->len -= ret; |
| 644 | buf->len -= err; | 655 | sd->pos += ret; |
| 656 | sd->total_len -= ret; | ||
| 645 | 657 | ||
| 646 | sd->len -= err; | 658 | if (!buf->len) { |
| 647 | sd->pos += err; | 659 | buf->ops = NULL; |
| 648 | sd->total_len -= err; | 660 | ops->release(pipe, buf); |
| 649 | if (sd->len) | 661 | pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); |
| 650 | continue; | 662 | pipe->nrbufs--; |
| 663 | if (pipe->inode) | ||
| 664 | sd->need_wakeup = true; | ||
| 665 | } | ||
| 651 | 666 | ||
| 652 | if (!buf->len) { | 667 | if (!sd->total_len) |
| 653 | buf->ops = NULL; | 668 | return 0; |
| 654 | ops->release(pipe, buf); | 669 | } |
| 655 | pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); | ||
| 656 | pipe->nrbufs--; | ||
| 657 | if (pipe->inode) | ||
| 658 | do_wakeup = 1; | ||
| 659 | } | ||
| 660 | 670 | ||
| 661 | if (!sd->total_len) | 671 | return 1; |
| 662 | break; | 672 | } |
| 663 | } | 673 | EXPORT_SYMBOL(splice_from_pipe_feed); |
| 664 | 674 | ||
| 665 | if (pipe->nrbufs) | 675 | /** |
| 666 | continue; | 676 | * splice_from_pipe_next - wait for some data to splice from |
| 677 | * @pipe: pipe to splice from | ||
| 678 | * @sd: information about the splice operation | ||
| 679 | * | ||
| 680 | * Description: | ||
| 681 | * This function will wait for some data and return a positive | ||
| 682 | * value (one) if pipe buffers are available. It will return zero | ||
| 683 | * or -errno if no more data needs to be spliced. | ||
| 684 | */ | ||
| 685 | int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) | ||
| 686 | { | ||
| 687 | while (!pipe->nrbufs) { | ||
| 667 | if (!pipe->writers) | 688 | if (!pipe->writers) |
| 668 | break; | 689 | return 0; |
| 669 | if (!pipe->waiting_writers) { | ||
| 670 | if (ret) | ||
| 671 | break; | ||
| 672 | } | ||
| 673 | 690 | ||
| 674 | if (sd->flags & SPLICE_F_NONBLOCK) { | 691 | if (!pipe->waiting_writers && sd->num_spliced) |
| 675 | if (!ret) | 692 | return 0; |
| 676 | ret = -EAGAIN; | ||
| 677 | break; | ||
| 678 | } | ||
| 679 | 693 | ||
| 680 | if (signal_pending(current)) { | 694 | if (sd->flags & SPLICE_F_NONBLOCK) |
| 681 | if (!ret) | 695 | return -EAGAIN; |
| 682 | ret = -ERESTARTSYS; | ||
| 683 | break; | ||
| 684 | } | ||
| 685 | 696 | ||
| 686 | if (do_wakeup) { | 697 | if (signal_pending(current)) |
| 687 | smp_mb(); | 698 | return -ERESTARTSYS; |
| 688 | if (waitqueue_active(&pipe->wait)) | 699 | |
| 689 | wake_up_interruptible_sync(&pipe->wait); | 700 | if (sd->need_wakeup) { |
| 690 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | 701 | wakeup_pipe_writers(pipe); |
| 691 | do_wakeup = 0; | 702 | sd->need_wakeup = false; |
| 692 | } | 703 | } |
| 693 | 704 | ||
| 694 | pipe_wait(pipe); | 705 | pipe_wait(pipe); |
| 695 | } | 706 | } |
| 696 | 707 | ||
| 697 | if (do_wakeup) { | 708 | return 1; |
| 698 | smp_mb(); | 709 | } |
| 699 | if (waitqueue_active(&pipe->wait)) | 710 | EXPORT_SYMBOL(splice_from_pipe_next); |
| 700 | wake_up_interruptible(&pipe->wait); | ||
| 701 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | ||
| 702 | } | ||
| 703 | 711 | ||
| 704 | return ret; | 712 | /** |
| 713 | * splice_from_pipe_begin - start splicing from pipe | ||
| 714 | * @pipe: pipe to splice from | ||
| 715 | * | ||
| 716 | * Description: | ||
| 717 | * This function should be called before a loop containing | ||
| 718 | * splice_from_pipe_next() and splice_from_pipe_feed() to | ||
| 719 | * initialize the necessary fields of @sd. | ||
| 720 | */ | ||
| 721 | void splice_from_pipe_begin(struct splice_desc *sd) | ||
| 722 | { | ||
| 723 | sd->num_spliced = 0; | ||
| 724 | sd->need_wakeup = false; | ||
| 725 | } | ||
| 726 | EXPORT_SYMBOL(splice_from_pipe_begin); | ||
| 727 | |||
| 728 | /** | ||
| 729 | * splice_from_pipe_end - finish splicing from pipe | ||
| 730 | * @pipe: pipe to splice from | ||
| 731 | * @sd: information about the splice operation | ||
| 732 | * | ||
| 733 | * Description: | ||
| 734 | * This function will wake up pipe writers if necessary. It should | ||
| 735 | * be called after a loop containing splice_from_pipe_next() and | ||
| 736 | * splice_from_pipe_feed(). | ||
| 737 | */ | ||
| 738 | void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) | ||
| 739 | { | ||
| 740 | if (sd->need_wakeup) | ||
| 741 | wakeup_pipe_writers(pipe); | ||
| 742 | } | ||
| 743 | EXPORT_SYMBOL(splice_from_pipe_end); | ||
| 744 | |||
| 745 | /** | ||
| 746 | * __splice_from_pipe - splice data from a pipe to given actor | ||
| 747 | * @pipe: pipe to splice from | ||
| 748 | * @sd: information to @actor | ||
| 749 | * @actor: handler that splices the data | ||
| 750 | * | ||
| 751 | * Description: | ||
| 752 | * This function does little more than loop over the pipe and call | ||
| 753 | * @actor to do the actual moving of a single struct pipe_buffer to | ||
| 754 | * the desired destination. See pipe_to_file, pipe_to_sendpage, or | ||
| 755 | * pipe_to_user. | ||
| 756 | * | ||
| 757 | */ | ||
| 758 | ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, | ||
| 759 | splice_actor *actor) | ||
| 760 | { | ||
| 761 | int ret; | ||
| 762 | |||
| 763 | splice_from_pipe_begin(sd); | ||
| 764 | do { | ||
| 765 | ret = splice_from_pipe_next(pipe, sd); | ||
| 766 | if (ret > 0) | ||
| 767 | ret = splice_from_pipe_feed(pipe, sd, actor); | ||
| 768 | } while (ret > 0); | ||
| 769 | splice_from_pipe_end(pipe, sd); | ||
| 770 | |||
| 771 | return sd->num_spliced ? sd->num_spliced : ret; | ||
| 705 | } | 772 | } |
| 706 | EXPORT_SYMBOL(__splice_from_pipe); | 773 | EXPORT_SYMBOL(__splice_from_pipe); |
| 707 | 774 | ||
| @@ -715,7 +782,7 @@ EXPORT_SYMBOL(__splice_from_pipe); | |||
| 715 | * @actor: handler that splices the data | 782 | * @actor: handler that splices the data |
| 716 | * | 783 | * |
| 717 | * Description: | 784 | * Description: |
| 718 | * See __splice_from_pipe. This function locks the input and output inodes, | 785 | * See __splice_from_pipe. This function locks the pipe inode, |
| 719 | * otherwise it's identical to __splice_from_pipe(). | 786 | * otherwise it's identical to __splice_from_pipe(). |
| 720 | * | 787 | * |
| 721 | */ | 788 | */ |
| @@ -724,7 +791,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
| 724 | splice_actor *actor) | 791 | splice_actor *actor) |
| 725 | { | 792 | { |
| 726 | ssize_t ret; | 793 | ssize_t ret; |
| 727 | struct inode *inode = out->f_mapping->host; | ||
| 728 | struct splice_desc sd = { | 794 | struct splice_desc sd = { |
| 729 | .total_len = len, | 795 | .total_len = len, |
| 730 | .flags = flags, | 796 | .flags = flags, |
| @@ -732,30 +798,15 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
| 732 | .u.file = out, | 798 | .u.file = out, |
| 733 | }; | 799 | }; |
| 734 | 800 | ||
| 735 | /* | 801 | pipe_lock(pipe); |
| 736 | * The actor worker might be calling ->write_begin and | ||
| 737 | * ->write_end. Most of the time, these expect i_mutex to | ||
| 738 | * be held. Since this may result in an ABBA deadlock with | ||
| 739 | * pipe->inode, we have to order lock acquiry here. | ||
| 740 | * | ||
| 741 | * Outer lock must be inode->i_mutex, as pipe_wait() will | ||
| 742 | * release and reacquire pipe->inode->i_mutex, AND inode must | ||
| 743 | * never be a pipe. | ||
| 744 | */ | ||
| 745 | WARN_ON(S_ISFIFO(inode->i_mode)); | ||
| 746 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
| 747 | if (pipe->inode) | ||
| 748 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
| 749 | ret = __splice_from_pipe(pipe, &sd, actor); | 802 | ret = __splice_from_pipe(pipe, &sd, actor); |
| 750 | if (pipe->inode) | 803 | pipe_unlock(pipe); |
| 751 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 752 | mutex_unlock(&inode->i_mutex); | ||
| 753 | 804 | ||
| 754 | return ret; | 805 | return ret; |
| 755 | } | 806 | } |
| 756 | 807 | ||
| 757 | /** | 808 | /** |
| 758 | * generic_file_splice_write_nolock - generic_file_splice_write without mutexes | 809 | * generic_file_splice_write - splice data from a pipe to a file |
| 759 | * @pipe: pipe info | 810 | * @pipe: pipe info |
| 760 | * @out: file to write to | 811 | * @out: file to write to |
| 761 | * @ppos: position in @out | 812 | * @ppos: position in @out |
| @@ -764,13 +815,12 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
| 764 | * | 815 | * |
| 765 | * Description: | 816 | * Description: |
| 766 | * Will either move or copy pages (determined by @flags options) from | 817 | * Will either move or copy pages (determined by @flags options) from |
| 767 | * the given pipe inode to the given file. The caller is responsible | 818 | * the given pipe inode to the given file. |
| 768 | * for acquiring i_mutex on both inodes. | ||
| 769 | * | 819 | * |
| 770 | */ | 820 | */ |
| 771 | ssize_t | 821 | ssize_t |
| 772 | generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, | 822 | generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, |
| 773 | loff_t *ppos, size_t len, unsigned int flags) | 823 | loff_t *ppos, size_t len, unsigned int flags) |
| 774 | { | 824 | { |
| 775 | struct address_space *mapping = out->f_mapping; | 825 | struct address_space *mapping = out->f_mapping; |
| 776 | struct inode *inode = mapping->host; | 826 | struct inode *inode = mapping->host; |
| @@ -781,76 +831,28 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, | |||
| 781 | .u.file = out, | 831 | .u.file = out, |
| 782 | }; | 832 | }; |
| 783 | ssize_t ret; | 833 | ssize_t ret; |
| 784 | int err; | ||
| 785 | |||
| 786 | err = file_remove_suid(out); | ||
| 787 | if (unlikely(err)) | ||
| 788 | return err; | ||
| 789 | |||
| 790 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); | ||
| 791 | if (ret > 0) { | ||
| 792 | unsigned long nr_pages; | ||
| 793 | 834 | ||
| 794 | *ppos += ret; | 835 | pipe_lock(pipe); |
| 795 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
| 796 | 836 | ||
| 797 | /* | 837 | splice_from_pipe_begin(&sd); |
| 798 | * If file or inode is SYNC and we actually wrote some data, | 838 | do { |
| 799 | * sync it. | 839 | ret = splice_from_pipe_next(pipe, &sd); |
| 800 | */ | 840 | if (ret <= 0) |
| 801 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | 841 | break; |
| 802 | err = generic_osync_inode(inode, mapping, | ||
| 803 | OSYNC_METADATA|OSYNC_DATA); | ||
| 804 | |||
| 805 | if (err) | ||
| 806 | ret = err; | ||
| 807 | } | ||
| 808 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | ||
| 809 | } | ||
| 810 | 842 | ||
| 811 | return ret; | 843 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
| 812 | } | 844 | ret = file_remove_suid(out); |
| 845 | if (!ret) | ||
| 846 | ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); | ||
| 847 | mutex_unlock(&inode->i_mutex); | ||
| 848 | } while (ret > 0); | ||
| 849 | splice_from_pipe_end(pipe, &sd); | ||
| 813 | 850 | ||
| 814 | EXPORT_SYMBOL(generic_file_splice_write_nolock); | 851 | pipe_unlock(pipe); |
| 815 | 852 | ||
| 816 | /** | 853 | if (sd.num_spliced) |
| 817 | * generic_file_splice_write - splice data from a pipe to a file | 854 | ret = sd.num_spliced; |
| 818 | * @pipe: pipe info | ||
| 819 | * @out: file to write to | ||
| 820 | * @ppos: position in @out | ||
| 821 | * @len: number of bytes to splice | ||
| 822 | * @flags: splice modifier flags | ||
| 823 | * | ||
| 824 | * Description: | ||
| 825 | * Will either move or copy pages (determined by @flags options) from | ||
| 826 | * the given pipe inode to the given file. | ||
| 827 | * | ||
| 828 | */ | ||
| 829 | ssize_t | ||
| 830 | generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | ||
| 831 | loff_t *ppos, size_t len, unsigned int flags) | ||
| 832 | { | ||
| 833 | struct address_space *mapping = out->f_mapping; | ||
| 834 | struct inode *inode = mapping->host; | ||
| 835 | struct splice_desc sd = { | ||
| 836 | .total_len = len, | ||
| 837 | .flags = flags, | ||
| 838 | .pos = *ppos, | ||
| 839 | .u.file = out, | ||
| 840 | }; | ||
| 841 | ssize_t ret; | ||
| 842 | 855 | ||
| 843 | WARN_ON(S_ISFIFO(inode->i_mode)); | ||
| 844 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
| 845 | ret = file_remove_suid(out); | ||
| 846 | if (likely(!ret)) { | ||
| 847 | if (pipe->inode) | ||
| 848 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
| 849 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); | ||
| 850 | if (pipe->inode) | ||
| 851 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 852 | } | ||
| 853 | mutex_unlock(&inode->i_mutex); | ||
| 854 | if (ret > 0) { | 856 | if (ret > 0) { |
| 855 | unsigned long nr_pages; | 857 | unsigned long nr_pages; |
| 856 | 858 | ||
| @@ -1339,8 +1341,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, | |||
| 1339 | if (!pipe) | 1341 | if (!pipe) |
| 1340 | return -EBADF; | 1342 | return -EBADF; |
| 1341 | 1343 | ||
| 1342 | if (pipe->inode) | 1344 | pipe_lock(pipe); |
| 1343 | mutex_lock(&pipe->inode->i_mutex); | ||
| 1344 | 1345 | ||
| 1345 | error = ret = 0; | 1346 | error = ret = 0; |
| 1346 | while (nr_segs) { | 1347 | while (nr_segs) { |
| @@ -1395,8 +1396,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, | |||
| 1395 | iov++; | 1396 | iov++; |
| 1396 | } | 1397 | } |
| 1397 | 1398 | ||
| 1398 | if (pipe->inode) | 1399 | pipe_unlock(pipe); |
| 1399 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 1400 | 1400 | ||
| 1401 | if (!ret) | 1401 | if (!ret) |
| 1402 | ret = error; | 1402 | ret = error; |
| @@ -1524,7 +1524,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
| 1524 | return 0; | 1524 | return 0; |
| 1525 | 1525 | ||
| 1526 | ret = 0; | 1526 | ret = 0; |
| 1527 | mutex_lock(&pipe->inode->i_mutex); | 1527 | pipe_lock(pipe); |
| 1528 | 1528 | ||
| 1529 | while (!pipe->nrbufs) { | 1529 | while (!pipe->nrbufs) { |
| 1530 | if (signal_pending(current)) { | 1530 | if (signal_pending(current)) { |
| @@ -1542,7 +1542,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
| 1542 | pipe_wait(pipe); | 1542 | pipe_wait(pipe); |
| 1543 | } | 1543 | } |
| 1544 | 1544 | ||
| 1545 | mutex_unlock(&pipe->inode->i_mutex); | 1545 | pipe_unlock(pipe); |
| 1546 | return ret; | 1546 | return ret; |
| 1547 | } | 1547 | } |
| 1548 | 1548 | ||
| @@ -1562,7 +1562,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
| 1562 | return 0; | 1562 | return 0; |
| 1563 | 1563 | ||
| 1564 | ret = 0; | 1564 | ret = 0; |
| 1565 | mutex_lock(&pipe->inode->i_mutex); | 1565 | pipe_lock(pipe); |
| 1566 | 1566 | ||
| 1567 | while (pipe->nrbufs >= PIPE_BUFFERS) { | 1567 | while (pipe->nrbufs >= PIPE_BUFFERS) { |
| 1568 | if (!pipe->readers) { | 1568 | if (!pipe->readers) { |
| @@ -1583,7 +1583,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
| 1583 | pipe->waiting_writers--; | 1583 | pipe->waiting_writers--; |
| 1584 | } | 1584 | } |
| 1585 | 1585 | ||
| 1586 | mutex_unlock(&pipe->inode->i_mutex); | 1586 | pipe_unlock(pipe); |
| 1587 | return ret; | 1587 | return ret; |
| 1588 | } | 1588 | } |
| 1589 | 1589 | ||
| @@ -1599,10 +1599,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
| 1599 | 1599 | ||
| 1600 | /* | 1600 | /* |
| 1601 | * Potential ABBA deadlock, work around it by ordering lock | 1601 | * Potential ABBA deadlock, work around it by ordering lock |
| 1602 | * grabbing by inode address. Otherwise two different processes | 1602 | * grabbing by pipe info address. Otherwise two different processes |
| 1603 | * could deadlock (one doing tee from A -> B, the other from B -> A). | 1603 | * could deadlock (one doing tee from A -> B, the other from B -> A). |
| 1604 | */ | 1604 | */ |
| 1605 | inode_double_lock(ipipe->inode, opipe->inode); | 1605 | pipe_double_lock(ipipe, opipe); |
| 1606 | 1606 | ||
| 1607 | do { | 1607 | do { |
| 1608 | if (!opipe->readers) { | 1608 | if (!opipe->readers) { |
| @@ -1653,7 +1653,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
| 1653 | if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) | 1653 | if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) |
| 1654 | ret = -EAGAIN; | 1654 | ret = -EAGAIN; |
| 1655 | 1655 | ||
| 1656 | inode_double_unlock(ipipe->inode, opipe->inode); | 1656 | pipe_unlock(ipipe); |
| 1657 | pipe_unlock(opipe); | ||
| 1657 | 1658 | ||
| 1658 | /* | 1659 | /* |
| 1659 | * If we put data in the output pipe, wakeup any potential readers. | 1660 | * If we put data in the output pipe, wakeup any potential readers. |
diff --git a/include/linux/bio.h b/include/linux/bio.h index b900d2c67d29..b89cf2d82898 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
| @@ -504,6 +504,115 @@ static inline int bio_has_data(struct bio *bio) | |||
| 504 | return bio && bio->bi_io_vec != NULL; | 504 | return bio && bio->bi_io_vec != NULL; |
| 505 | } | 505 | } |
| 506 | 506 | ||
| 507 | /* | ||
| 508 | * BIO list managment for use by remapping drivers (e.g. DM or MD). | ||
| 509 | * | ||
| 510 | * A bio_list anchors a singly-linked list of bios chained through the bi_next | ||
| 511 | * member of the bio. The bio_list also caches the last list member to allow | ||
| 512 | * fast access to the tail. | ||
| 513 | */ | ||
| 514 | struct bio_list { | ||
| 515 | struct bio *head; | ||
| 516 | struct bio *tail; | ||
| 517 | }; | ||
| 518 | |||
| 519 | static inline int bio_list_empty(const struct bio_list *bl) | ||
| 520 | { | ||
| 521 | return bl->head == NULL; | ||
| 522 | } | ||
| 523 | |||
| 524 | static inline void bio_list_init(struct bio_list *bl) | ||
| 525 | { | ||
| 526 | bl->head = bl->tail = NULL; | ||
| 527 | } | ||
| 528 | |||
| 529 | #define bio_list_for_each(bio, bl) \ | ||
| 530 | for (bio = (bl)->head; bio; bio = bio->bi_next) | ||
| 531 | |||
| 532 | static inline unsigned bio_list_size(const struct bio_list *bl) | ||
| 533 | { | ||
| 534 | unsigned sz = 0; | ||
| 535 | struct bio *bio; | ||
| 536 | |||
| 537 | bio_list_for_each(bio, bl) | ||
| 538 | sz++; | ||
| 539 | |||
| 540 | return sz; | ||
| 541 | } | ||
| 542 | |||
| 543 | static inline void bio_list_add(struct bio_list *bl, struct bio *bio) | ||
| 544 | { | ||
| 545 | bio->bi_next = NULL; | ||
| 546 | |||
| 547 | if (bl->tail) | ||
| 548 | bl->tail->bi_next = bio; | ||
| 549 | else | ||
| 550 | bl->head = bio; | ||
| 551 | |||
| 552 | bl->tail = bio; | ||
| 553 | } | ||
| 554 | |||
| 555 | static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) | ||
| 556 | { | ||
| 557 | bio->bi_next = bl->head; | ||
| 558 | |||
| 559 | bl->head = bio; | ||
| 560 | |||
| 561 | if (!bl->tail) | ||
| 562 | bl->tail = bio; | ||
| 563 | } | ||
| 564 | |||
| 565 | static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) | ||
| 566 | { | ||
| 567 | if (!bl2->head) | ||
| 568 | return; | ||
| 569 | |||
| 570 | if (bl->tail) | ||
| 571 | bl->tail->bi_next = bl2->head; | ||
| 572 | else | ||
| 573 | bl->head = bl2->head; | ||
| 574 | |||
| 575 | bl->tail = bl2->tail; | ||
| 576 | } | ||
| 577 | |||
| 578 | static inline void bio_list_merge_head(struct bio_list *bl, | ||
| 579 | struct bio_list *bl2) | ||
| 580 | { | ||
| 581 | if (!bl2->head) | ||
| 582 | return; | ||
| 583 | |||
| 584 | if (bl->head) | ||
| 585 | bl2->tail->bi_next = bl->head; | ||
| 586 | else | ||
| 587 | bl->tail = bl2->tail; | ||
| 588 | |||
| 589 | bl->head = bl2->head; | ||
| 590 | } | ||
| 591 | |||
| 592 | static inline struct bio *bio_list_pop(struct bio_list *bl) | ||
| 593 | { | ||
| 594 | struct bio *bio = bl->head; | ||
| 595 | |||
| 596 | if (bio) { | ||
| 597 | bl->head = bl->head->bi_next; | ||
| 598 | if (!bl->head) | ||
| 599 | bl->tail = NULL; | ||
| 600 | |||
| 601 | bio->bi_next = NULL; | ||
| 602 | } | ||
| 603 | |||
| 604 | return bio; | ||
| 605 | } | ||
| 606 | |||
| 607 | static inline struct bio *bio_list_get(struct bio_list *bl) | ||
| 608 | { | ||
| 609 | struct bio *bio = bl->head; | ||
| 610 | |||
| 611 | bl->head = bl->tail = NULL; | ||
| 612 | |||
| 613 | return bio; | ||
| 614 | } | ||
| 615 | |||
| 507 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | 616 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
| 508 | 617 | ||
| 509 | #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) | 618 | #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 562d2855cf30..e766be0d4329 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -87,6 +87,60 @@ struct inodes_stat_t { | |||
| 87 | */ | 87 | */ |
| 88 | #define FMODE_NOCMTIME ((__force fmode_t)2048) | 88 | #define FMODE_NOCMTIME ((__force fmode_t)2048) |
| 89 | 89 | ||
| 90 | /* | ||
| 91 | * The below are the various read and write types that we support. Some of | ||
| 92 | * them include behavioral modifiers that send information down to the | ||
| 93 | * block layer and IO scheduler. Terminology: | ||
| 94 | * | ||
| 95 | * The block layer uses device plugging to defer IO a little bit, in | ||
| 96 | * the hope that we will see more IO very shortly. This increases | ||
| 97 | * coalescing of adjacent IO and thus reduces the number of IOs we | ||
| 98 | * have to send to the device. It also allows for better queuing, | ||
| 99 | * if the IO isn't mergeable. If the caller is going to be waiting | ||
| 100 | * for the IO, then he must ensure that the device is unplugged so | ||
| 101 | * that the IO is dispatched to the driver. | ||
| 102 | * | ||
| 103 | * All IO is handled async in Linux. This is fine for background | ||
| 104 | * writes, but for reads or writes that someone waits for completion | ||
| 105 | * on, we want to notify the block layer and IO scheduler so that they | ||
| 106 | * know about it. That allows them to make better scheduling | ||
| 107 | * decisions. So when the below references 'sync' and 'async', it | ||
| 108 | * is referencing this priority hint. | ||
| 109 | * | ||
| 110 | * With that in mind, the available types are: | ||
| 111 | * | ||
| 112 | * READ A normal read operation. Device will be plugged. | ||
| 113 | * READ_SYNC A synchronous read. Device is not plugged, caller can | ||
| 114 | * immediately wait on this read without caring about | ||
| 115 | * unplugging. | ||
| 116 | * READA Used for read-ahead operations. Lower priority, and the | ||
| 117 | * block layer could (in theory) choose to ignore this | ||
| 118 | * request if it runs into resource problems. | ||
| 119 | * WRITE A normal async write. Device will be plugged. | ||
| 120 | * SWRITE Like WRITE, but a special case for ll_rw_block() that | ||
| 121 | * tells it to lock the buffer first. Normally a buffer | ||
| 122 | * must be locked before doing IO. | ||
| 123 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down | ||
| 124 | * the hint that someone will be waiting on this IO | ||
| 125 | * shortly. The device must still be unplugged explicitly, | ||
| 126 | * WRITE_SYNC_PLUG does not do this as we could be | ||
| 127 | * submitting more writes before we actually wait on any | ||
| 128 | * of them. | ||
| 129 | * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device | ||
| 130 | * immediately after submission. The write equivalent | ||
| 131 | * of READ_SYNC. | ||
| 132 | * WRITE_ODIRECT Special case write for O_DIRECT only. | ||
| 133 | * SWRITE_SYNC | ||
| 134 | * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. | ||
| 135 | * See SWRITE. | ||
| 136 | * WRITE_BARRIER Like WRITE, but tells the block layer that all | ||
| 137 | * previously submitted writes must be safely on storage | ||
| 138 | * before this one is started. Also guarantees that when | ||
| 139 | * this write is complete, it itself is also safely on | ||
| 140 | * storage. Prevents reordering of writes on both sides | ||
| 141 | * of this IO. | ||
| 142 | * | ||
| 143 | */ | ||
| 90 | #define RW_MASK 1 | 144 | #define RW_MASK 1 |
| 91 | #define RWA_MASK 2 | 145 | #define RWA_MASK 2 |
| 92 | #define READ 0 | 146 | #define READ 0 |
| @@ -102,6 +156,11 @@ struct inodes_stat_t { | |||
| 102 | (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) | 156 | (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) |
| 103 | #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) | 157 | #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) |
| 104 | #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) | 158 | #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) |
| 159 | |||
| 160 | /* | ||
| 161 | * These aren't really reads or writes, they pass down information about | ||
| 162 | * parts of device that are now unused by the file system. | ||
| 163 | */ | ||
| 105 | #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) | 164 | #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) |
| 106 | #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) | 165 | #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) |
| 107 | 166 | ||
| @@ -738,9 +797,6 @@ enum inode_i_mutex_lock_class | |||
| 738 | I_MUTEX_QUOTA | 797 | I_MUTEX_QUOTA |
| 739 | }; | 798 | }; |
| 740 | 799 | ||
| 741 | extern void inode_double_lock(struct inode *inode1, struct inode *inode2); | ||
| 742 | extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); | ||
| 743 | |||
| 744 | /* | 800 | /* |
| 745 | * NOTE: in a 32bit arch with a preemptable kernel and | 801 | * NOTE: in a 32bit arch with a preemptable kernel and |
| 746 | * an UP compile the i_size_read/write must be atomic | 802 | * an UP compile the i_size_read/write must be atomic |
| @@ -2150,8 +2206,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, | |||
| 2150 | struct pipe_inode_info *, size_t, unsigned int); | 2206 | struct pipe_inode_info *, size_t, unsigned int); |
| 2151 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, | 2207 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, |
| 2152 | struct file *, loff_t *, size_t, unsigned int); | 2208 | struct file *, loff_t *, size_t, unsigned int); |
| 2153 | extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, | ||
| 2154 | struct file *, loff_t *, size_t, unsigned int); | ||
| 2155 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2209 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
| 2156 | struct file *out, loff_t *, size_t len, unsigned int flags); | 2210 | struct file *out, loff_t *, size_t len, unsigned int flags); |
| 2157 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | 2211 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, |
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8e4120285f72..c8f038554e80 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h | |||
| @@ -134,6 +134,11 @@ struct pipe_buf_operations { | |||
| 134 | memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ | 134 | memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ |
| 135 | #define PIPE_SIZE PAGE_SIZE | 135 | #define PIPE_SIZE PAGE_SIZE |
| 136 | 136 | ||
| 137 | /* Pipe lock and unlock operations */ | ||
| 138 | void pipe_lock(struct pipe_inode_info *); | ||
| 139 | void pipe_unlock(struct pipe_inode_info *); | ||
| 140 | void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); | ||
| 141 | |||
| 137 | /* Drop the inode semaphore and wait for a pipe event, atomically */ | 142 | /* Drop the inode semaphore and wait for a pipe event, atomically */ |
| 138 | void pipe_wait(struct pipe_inode_info *pipe); | 143 | void pipe_wait(struct pipe_inode_info *pipe); |
| 139 | 144 | ||
diff --git a/include/linux/splice.h b/include/linux/splice.h index 528dcb93c2f2..5f3faa9d15ae 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h | |||
| @@ -36,6 +36,8 @@ struct splice_desc { | |||
| 36 | void *data; /* cookie */ | 36 | void *data; /* cookie */ |
| 37 | } u; | 37 | } u; |
| 38 | loff_t pos; /* file position */ | 38 | loff_t pos; /* file position */ |
| 39 | size_t num_spliced; /* number of bytes already spliced */ | ||
| 40 | bool need_wakeup; /* need to wake up writer */ | ||
| 39 | }; | 41 | }; |
| 40 | 42 | ||
| 41 | struct partial_page { | 43 | struct partial_page { |
| @@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, | |||
| 66 | splice_actor *); | 68 | splice_actor *); |
| 67 | extern ssize_t __splice_from_pipe(struct pipe_inode_info *, | 69 | extern ssize_t __splice_from_pipe(struct pipe_inode_info *, |
| 68 | struct splice_desc *, splice_actor *); | 70 | struct splice_desc *, splice_actor *); |
| 71 | extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *, | ||
| 72 | splice_actor *); | ||
| 73 | extern int splice_from_pipe_next(struct pipe_inode_info *, | ||
| 74 | struct splice_desc *); | ||
| 75 | extern void splice_from_pipe_begin(struct splice_desc *); | ||
| 76 | extern void splice_from_pipe_end(struct pipe_inode_info *, | ||
| 77 | struct splice_desc *); | ||
| 78 | extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *, | ||
| 79 | struct splice_desc *); | ||
| 80 | |||
| 69 | extern ssize_t splice_to_pipe(struct pipe_inode_info *, | 81 | extern ssize_t splice_to_pipe(struct pipe_inode_info *, |
| 70 | struct splice_pipe_desc *); | 82 | struct splice_pipe_desc *); |
| 71 | extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, | 83 | extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 505f319e489c..8ba052c86d48 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
| @@ -64,8 +64,6 @@ static int submit(int rw, pgoff_t page_off, struct page *page, | |||
| 64 | struct bio *bio; | 64 | struct bio *bio; |
| 65 | 65 | ||
| 66 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); | 66 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); |
| 67 | if (!bio) | ||
| 68 | return -ENOMEM; | ||
| 69 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | 67 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); |
| 70 | bio->bi_bdev = resume_bdev; | 68 | bio->bi_bdev = resume_bdev; |
| 71 | bio->bi_end_io = end_swap_bio_read; | 69 | bio->bi_end_io = end_swap_bio_read; |
