aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2011-05-19 16:22:57 -0400
committerSteve French <sfrench@us.ibm.com>2011-05-25 16:05:03 -0400
commitc3d17b63e5eafcaf2678c11de801c189468631c8 (patch)
treece9edbb87abc452bf7ae1a3b33e2b5cac42ab023
parentb2e5cd33b598fb496b9366c445bd77c801efabb8 (diff)
cifs: convert cifs_writepages to use async writes
Have cifs_writepages issue asynchronous writes instead of waiting on each write call to complete before issuing another. This also allows us to return more quickly from writepages. It can just send out all of the I/Os and not wait around for the replies. In the WB_SYNC_ALL case, if the write completes with a retryable error, then the completion workqueue job will resend the write. This also changes the page locking semantics a little bit. Instead of holding the page lock until the response is received, release it after doing the send. This will reduce contention for the page lock and should prevent processes that have the file mmap'ed from being blocked unnecessarily. Signed-off-by: Jeff Layton <jlayton@redhat.com> Reviewed-and-Tested-by: Pavel Shilovsky <piastry@etersoft.ru> Signed-off-by: Steve French <sfrench@us.ibm.com>
-rw-r--r--fs/cifs/file.c241
1 files changed, 99 insertions, 142 deletions
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c672afef0c09..00b926ce7935 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1092,58 +1092,20 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1092static int cifs_writepages(struct address_space *mapping, 1092static int cifs_writepages(struct address_space *mapping,
1093 struct writeback_control *wbc) 1093 struct writeback_control *wbc)
1094{ 1094{
1095 unsigned int bytes_to_write; 1095 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1096 unsigned int bytes_written; 1096 bool done = false, scanned = false, range_whole = false;
1097 struct cifs_sb_info *cifs_sb; 1097 pgoff_t end, index;
1098 int done = 0; 1098 struct cifs_writedata *wdata;
1099 pgoff_t end;
1100 pgoff_t index;
1101 int range_whole = 0;
1102 struct kvec *iov;
1103 int len;
1104 int n_iov = 0;
1105 pgoff_t next;
1106 int nr_pages;
1107 __u64 offset = 0;
1108 struct cifsFileInfo *open_file;
1109 struct cifsTconInfo *tcon;
1110 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1111 struct page *page; 1099 struct page *page;
1112 struct pagevec pvec;
1113 int rc = 0; 1100 int rc = 0;
1114 int scanned = 0;
1115 int xid;
1116
1117 cifs_sb = CIFS_SB(mapping->host->i_sb);
1118 1101
1119 /* 1102 /*
1120 * If wsize is smaller that the page cache size, default to writing 1103 * If wsize is smaller than the page cache size, default to writing
1121 * one page at a time via cifs_writepage 1104 * one page at a time via cifs_writepage
1122 */ 1105 */
1123 if (cifs_sb->wsize < PAGE_CACHE_SIZE) 1106 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1124 return generic_writepages(mapping, wbc); 1107 return generic_writepages(mapping, wbc);
1125 1108
1126 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1127 if (iov == NULL)
1128 return generic_writepages(mapping, wbc);
1129
1130 /*
1131 * if there's no open file, then this is likely to fail too,
1132 * but it'll at least handle the return. Maybe it should be
1133 * a BUG() instead?
1134 */
1135 open_file = find_writable_file(CIFS_I(mapping->host), false);
1136 if (!open_file) {
1137 kfree(iov);
1138 return generic_writepages(mapping, wbc);
1139 }
1140
1141 tcon = tlink_tcon(open_file->tlink);
1142 cifsFileInfo_put(open_file);
1143
1144 xid = GetXid();
1145
1146 pagevec_init(&pvec, 0);
1147 if (wbc->range_cyclic) { 1109 if (wbc->range_cyclic) {
1148 index = mapping->writeback_index; /* Start from prev offset */ 1110 index = mapping->writeback_index; /* Start from prev offset */
1149 end = -1; 1111 end = -1;
@@ -1151,24 +1113,49 @@ static int cifs_writepages(struct address_space *mapping,
1151 index = wbc->range_start >> PAGE_CACHE_SHIFT; 1113 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1152 end = wbc->range_end >> PAGE_CACHE_SHIFT; 1114 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1153 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 1115 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1154 range_whole = 1; 1116 range_whole = true;
1155 scanned = 1; 1117 scanned = true;
1156 } 1118 }
1157retry: 1119retry:
1158 while (!done && (index <= end) && 1120 while (!done && index <= end) {
1159 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1121 unsigned int i, nr_pages, found_pages;
1160 PAGECACHE_TAG_DIRTY, 1122 pgoff_t next = 0, tofind;
1161 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) { 1123 struct page **pages;
1162 int first; 1124
1163 unsigned int i; 1125 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1164 1126 end - index) + 1;
1165 first = -1; 1127
1166 next = 0; 1128 wdata = cifs_writedata_alloc((unsigned int)tofind);
1167 n_iov = 0; 1129 if (!wdata) {
1168 bytes_to_write = 0; 1130 rc = -ENOMEM;
1169 1131 break;
1170 for (i = 0; i < nr_pages; i++) { 1132 }
1171 page = pvec.pages[i]; 1133
1134 /*
1135 * find_get_pages_tag seems to return a max of 256 on each
1136 * iteration, so we must call it several times in order to
1137 * fill the array or the wsize is effectively limited to
1138 * 256 * PAGE_CACHE_SIZE.
1139 */
1140 found_pages = 0;
1141 pages = wdata->pages;
1142 do {
1143 nr_pages = find_get_pages_tag(mapping, &index,
1144 PAGECACHE_TAG_DIRTY,
1145 tofind, pages);
1146 found_pages += nr_pages;
1147 tofind -= nr_pages;
1148 pages += nr_pages;
1149 } while (nr_pages && tofind && index <= end);
1150
1151 if (found_pages == 0) {
1152 kref_put(&wdata->refcount, cifs_writedata_release);
1153 break;
1154 }
1155
1156 nr_pages = 0;
1157 for (i = 0; i < found_pages; i++) {
1158 page = wdata->pages[i];
1172 /* 1159 /*
1173 * At this point we hold neither mapping->tree_lock nor 1160 * At this point we hold neither mapping->tree_lock nor
1174 * lock on the page itself: the page may be truncated or 1161 * lock on the page itself: the page may be truncated or
@@ -1177,7 +1164,7 @@ retry:
1177 * mapping 1164 * mapping
1178 */ 1165 */
1179 1166
1180 if (first < 0) 1167 if (nr_pages == 0)
1181 lock_page(page); 1168 lock_page(page);
1182 else if (!trylock_page(page)) 1169 else if (!trylock_page(page))
1183 break; 1170 break;
@@ -1188,7 +1175,7 @@ retry:
1188 } 1175 }
1189 1176
1190 if (!wbc->range_cyclic && page->index > end) { 1177 if (!wbc->range_cyclic && page->index > end) {
1191 done = 1; 1178 done = true;
1192 unlock_page(page); 1179 unlock_page(page);
1193 break; 1180 break;
1194 } 1181 }
@@ -1215,119 +1202,89 @@ retry:
1215 set_page_writeback(page); 1202 set_page_writeback(page);
1216 1203
1217 if (page_offset(page) >= mapping->host->i_size) { 1204 if (page_offset(page) >= mapping->host->i_size) {
1218 done = 1; 1205 done = true;
1219 unlock_page(page); 1206 unlock_page(page);
1220 end_page_writeback(page); 1207 end_page_writeback(page);
1221 break; 1208 break;
1222 } 1209 }
1223 1210
1224 /* 1211 wdata->pages[i] = page;
1225 * BB can we get rid of this? pages are held by pvec 1212 next = page->index + 1;
1226 */ 1213 ++nr_pages;
1227 page_cache_get(page); 1214 }
1228 1215
1229 len = min(mapping->host->i_size - page_offset(page), 1216 /* reset index to refind any pages skipped */
1230 (loff_t)PAGE_CACHE_SIZE); 1217 if (nr_pages == 0)
1218 index = wdata->pages[0]->index + 1;
1231 1219
1232 /* reserve iov[0] for the smb header */ 1220 /* put any pages we aren't going to use */
1233 n_iov++; 1221 for (i = nr_pages; i < found_pages; i++) {
1234 iov[n_iov].iov_base = kmap(page); 1222 page_cache_release(wdata->pages[i]);
1235 iov[n_iov].iov_len = len; 1223 wdata->pages[i] = NULL;
1236 bytes_to_write += len; 1224 }
1237 1225
1238 if (first < 0) { 1226 /* nothing to write? */
1239 first = i; 1227 if (nr_pages == 0) {
1240 offset = page_offset(page); 1228 kref_put(&wdata->refcount, cifs_writedata_release);
1241 } 1229 continue;
1242 next = page->index + 1;
1243 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1244 break;
1245 } 1230 }
1246 if (n_iov) {
1247retry_write:
1248 open_file = find_writable_file(CIFS_I(mapping->host),
1249 false);
1250 if (!open_file) {
1251 cERROR(1, "No writable handles for inode");
1252 rc = -EBADF;
1253 } else {
1254 rc = CIFSSMBWrite2(xid, tcon, open_file->netfid,
1255 bytes_to_write, offset,
1256 &bytes_written, iov, n_iov,
1257 0);
1258 cifsFileInfo_put(open_file);
1259 }
1260 1231
1261 cFYI(1, "Write2 rc=%d, wrote=%u", rc, bytes_written); 1232 wdata->sync_mode = wbc->sync_mode;
1233 wdata->nr_pages = nr_pages;
1234 wdata->offset = page_offset(wdata->pages[0]);
1262 1235
1263 /* 1236 do {
1264 * For now, treat a short write as if nothing got 1237 if (wdata->cfile != NULL)
1265 * written. A zero length write however indicates 1238 cifsFileInfo_put(wdata->cfile);
1266 * ENOSPC or EFBIG. We have no way to know which 1239 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1267 * though, so call it ENOSPC for now. EFBIG would 1240 false);
1268 * get translated to AS_EIO anyway. 1241 if (!wdata->cfile) {
1269 * 1242 cERROR(1, "No writable handles for inode");
1270 * FIXME: make it take into account the data that did 1243 rc = -EBADF;
1271 * get written 1244 break;
1272 */
1273 if (rc == 0) {
1274 if (bytes_written == 0)
1275 rc = -ENOSPC;
1276 else if (bytes_written < bytes_to_write)
1277 rc = -EAGAIN;
1278 } 1245 }
1246 rc = cifs_async_writev(wdata);
1247 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1279 1248
1280 /* retry on data-integrity flush */ 1249 for (i = 0; i < nr_pages; ++i)
1281 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 1250 unlock_page(wdata->pages[i]);
1282 goto retry_write;
1283 1251
1284 /* fix the stats and EOF */ 1252 /* send failure -- clean up the mess */
1285 if (bytes_written > 0) { 1253 if (rc != 0) {
1286 cifs_stats_bytes_written(tcon, bytes_written); 1254 for (i = 0; i < nr_pages; ++i) {
1287 cifs_update_eof(cifsi, offset, bytes_written);
1288 }
1289
1290 for (i = 0; i < n_iov; i++) {
1291 page = pvec.pages[first + i];
1292 /* on retryable write error, redirty page */
1293 if (rc == -EAGAIN) 1255 if (rc == -EAGAIN)
1294 redirty_page_for_writepage(wbc, page); 1256 redirty_page_for_writepage(wbc,
1295 else if (rc != 0) 1257 wdata->pages[i]);
1296 SetPageError(page); 1258 else
1297 kunmap(page); 1259 SetPageError(wdata->pages[i]);
1298 unlock_page(page); 1260 end_page_writeback(wdata->pages[i]);
1299 end_page_writeback(page); 1261 page_cache_release(wdata->pages[i]);
1300 page_cache_release(page);
1301 } 1262 }
1302
1303 if (rc != -EAGAIN) 1263 if (rc != -EAGAIN)
1304 mapping_set_error(mapping, rc); 1264 mapping_set_error(mapping, rc);
1305 else 1265 }
1306 rc = 0; 1266 kref_put(&wdata->refcount, cifs_writedata_release);
1307 1267
1308 if ((wbc->nr_to_write -= n_iov) <= 0) 1268 wbc->nr_to_write -= nr_pages;
1309 done = 1; 1269 if (wbc->nr_to_write <= 0)
1310 index = next; 1270 done = true;
1311 } else
1312 /* Need to re-find the pages we skipped */
1313 index = pvec.pages[0]->index + 1;
1314 1271
1315 pagevec_release(&pvec); 1272 index = next;
1316 } 1273 }
1274
1317 if (!scanned && !done) { 1275 if (!scanned && !done) {
1318 /* 1276 /*
1319 * We hit the last page and there is more work to be done: wrap 1277 * We hit the last page and there is more work to be done: wrap
1320 * back to the start of the file 1278 * back to the start of the file
1321 */ 1279 */
1322 scanned = 1; 1280 scanned = true;
1323 index = 0; 1281 index = 0;
1324 goto retry; 1282 goto retry;
1325 } 1283 }
1284
1326 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 1285 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1327 mapping->writeback_index = index; 1286 mapping->writeback_index = index;
1328 1287
1329 FreeXid(xid);
1330 kfree(iov);
1331 return rc; 1288 return rc;
1332} 1289}
1333 1290