diff options
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 273 |
1 files changed, 262 insertions, 11 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 18c06bbaf136..f5013d92a7e6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
192 | struct ceph_osd_client *osdc = | 192 | struct ceph_osd_client *osdc = |
193 | &ceph_inode_to_client(inode)->client->osdc; | 193 | &ceph_inode_to_client(inode)->client->osdc; |
194 | int err = 0; | 194 | int err = 0; |
195 | u64 off = page_offset(page); | ||
195 | u64 len = PAGE_CACHE_SIZE; | 196 | u64 len = PAGE_CACHE_SIZE; |
196 | 197 | ||
197 | err = ceph_readpage_from_fscache(inode, page); | 198 | if (off >= i_size_read(inode)) { |
199 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | ||
200 | SetPageUptodate(page); | ||
201 | return 0; | ||
202 | } | ||
198 | 203 | ||
204 | /* | ||
205 | * Uptodate inline data should have been added into page cache | ||
206 | * while getting Fcr caps. | ||
207 | */ | ||
208 | if (ci->i_inline_version != CEPH_INLINE_NONE) | ||
209 | return -EINVAL; | ||
210 | |||
211 | err = ceph_readpage_from_fscache(inode, page); | ||
199 | if (err == 0) | 212 | if (err == 0) |
200 | goto out; | 213 | goto out; |
201 | 214 | ||
202 | dout("readpage inode %p file %p page %p index %lu\n", | 215 | dout("readpage inode %p file %p page %p index %lu\n", |
203 | inode, filp, page, page->index); | 216 | inode, filp, page, page->index); |
204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 217 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
205 | (u64) page_offset(page), &len, | 218 | off, &len, |
206 | ci->i_truncate_seq, ci->i_truncate_size, | 219 | ci->i_truncate_seq, ci->i_truncate_size, |
207 | &page, 1, 0); | 220 | &page, 1, 0); |
208 | if (err == -ENOENT) | 221 | if (err == -ENOENT) |
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
319 | off, len); | 332 | off, len); |
320 | vino = ceph_vino(inode); | 333 | vino = ceph_vino(inode); |
321 | req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, | 334 | req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, |
322 | 1, CEPH_OSD_OP_READ, | 335 | 0, 1, CEPH_OSD_OP_READ, |
323 | CEPH_OSD_FLAG_READ, NULL, | 336 | CEPH_OSD_FLAG_READ, NULL, |
324 | ci->i_truncate_seq, ci->i_truncate_size, | 337 | ci->i_truncate_seq, ci->i_truncate_size, |
325 | false); | 338 | false); |
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
384 | int rc = 0; | 397 | int rc = 0; |
385 | int max = 0; | 398 | int max = 0; |
386 | 399 | ||
400 | if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE) | ||
401 | return -EINVAL; | ||
402 | |||
387 | rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list, | 403 | rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list, |
388 | &nr_pages); | 404 | &nr_pages); |
389 | 405 | ||
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
673 | int rc = 0; | 689 | int rc = 0; |
674 | unsigned wsize = 1 << inode->i_blkbits; | 690 | unsigned wsize = 1 << inode->i_blkbits; |
675 | struct ceph_osd_request *req = NULL; | 691 | struct ceph_osd_request *req = NULL; |
676 | int do_sync; | 692 | int do_sync = 0; |
677 | u64 truncate_size, snap_size; | 693 | u64 truncate_size, snap_size; |
678 | u32 truncate_seq; | 694 | u32 truncate_seq; |
679 | 695 | ||
@@ -750,7 +766,6 @@ retry: | |||
750 | last_snapc = snapc; | 766 | last_snapc = snapc; |
751 | 767 | ||
752 | while (!done && index <= end) { | 768 | while (!done && index <= end) { |
753 | int num_ops = do_sync ? 2 : 1; | ||
754 | unsigned i; | 769 | unsigned i; |
755 | int first; | 770 | int first; |
756 | pgoff_t next; | 771 | pgoff_t next; |
@@ -850,7 +865,8 @@ get_more_pages: | |||
850 | len = wsize; | 865 | len = wsize; |
851 | req = ceph_osdc_new_request(&fsc->client->osdc, | 866 | req = ceph_osdc_new_request(&fsc->client->osdc, |
852 | &ci->i_layout, vino, | 867 | &ci->i_layout, vino, |
853 | offset, &len, num_ops, | 868 | offset, &len, 0, |
869 | do_sync ? 2 : 1, | ||
854 | CEPH_OSD_OP_WRITE, | 870 | CEPH_OSD_OP_WRITE, |
855 | CEPH_OSD_FLAG_WRITE | | 871 | CEPH_OSD_FLAG_WRITE | |
856 | CEPH_OSD_FLAG_ONDISK, | 872 | CEPH_OSD_FLAG_ONDISK, |
@@ -862,6 +878,9 @@ get_more_pages: | |||
862 | break; | 878 | break; |
863 | } | 879 | } |
864 | 880 | ||
881 | if (do_sync) | ||
882 | osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); | ||
883 | |||
865 | req->r_callback = writepages_finish; | 884 | req->r_callback = writepages_finish; |
866 | req->r_inode = inode; | 885 | req->r_inode = inode; |
867 | 886 | ||
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1204 | struct inode *inode = file_inode(vma->vm_file); | 1223 | struct inode *inode = file_inode(vma->vm_file); |
1205 | struct ceph_inode_info *ci = ceph_inode(inode); | 1224 | struct ceph_inode_info *ci = ceph_inode(inode); |
1206 | struct ceph_file_info *fi = vma->vm_file->private_data; | 1225 | struct ceph_file_info *fi = vma->vm_file->private_data; |
1226 | struct page *pinned_page = NULL; | ||
1207 | loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT; | 1227 | loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT; |
1208 | int want, got, ret; | 1228 | int want, got, ret; |
1209 | 1229 | ||
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1215 | want = CEPH_CAP_FILE_CACHE; | 1235 | want = CEPH_CAP_FILE_CACHE; |
1216 | while (1) { | 1236 | while (1) { |
1217 | got = 0; | 1237 | got = 0; |
1218 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); | 1238 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, |
1239 | -1, &got, &pinned_page); | ||
1219 | if (ret == 0) | 1240 | if (ret == 0) |
1220 | break; | 1241 | break; |
1221 | if (ret != -ERESTARTSYS) { | 1242 | if (ret != -ERESTARTSYS) { |
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1226 | dout("filemap_fault %p %llu~%zd got cap refs on %s\n", | 1247 | dout("filemap_fault %p %llu~%zd got cap refs on %s\n", |
1227 | inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got)); | 1248 | inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got)); |
1228 | 1249 | ||
1229 | ret = filemap_fault(vma, vmf); | 1250 | if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || |
1251 | ci->i_inline_version == CEPH_INLINE_NONE) | ||
1252 | ret = filemap_fault(vma, vmf); | ||
1253 | else | ||
1254 | ret = -EAGAIN; | ||
1230 | 1255 | ||
1231 | dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", | 1256 | dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", |
1232 | inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret); | 1257 | inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret); |
1258 | if (pinned_page) | ||
1259 | page_cache_release(pinned_page); | ||
1233 | ceph_put_cap_refs(ci, got); | 1260 | ceph_put_cap_refs(ci, got); |
1234 | 1261 | ||
1262 | if (ret != -EAGAIN) | ||
1263 | return ret; | ||
1264 | |||
1265 | /* read inline data */ | ||
1266 | if (off >= PAGE_CACHE_SIZE) { | ||
1267 | /* does not support inline data > PAGE_SIZE */ | ||
1268 | ret = VM_FAULT_SIGBUS; | ||
1269 | } else { | ||
1270 | int ret1; | ||
1271 | struct address_space *mapping = inode->i_mapping; | ||
1272 | struct page *page = find_or_create_page(mapping, 0, | ||
1273 | mapping_gfp_mask(mapping) & | ||
1274 | ~__GFP_FS); | ||
1275 | if (!page) { | ||
1276 | ret = VM_FAULT_OOM; | ||
1277 | goto out; | ||
1278 | } | ||
1279 | ret1 = __ceph_do_getattr(inode, page, | ||
1280 | CEPH_STAT_CAP_INLINE_DATA, true); | ||
1281 | if (ret1 < 0 || off >= i_size_read(inode)) { | ||
1282 | unlock_page(page); | ||
1283 | page_cache_release(page); | ||
1284 | ret = VM_FAULT_SIGBUS; | ||
1285 | goto out; | ||
1286 | } | ||
1287 | if (ret1 < PAGE_CACHE_SIZE) | ||
1288 | zero_user_segment(page, ret1, PAGE_CACHE_SIZE); | ||
1289 | else | ||
1290 | flush_dcache_page(page); | ||
1291 | SetPageUptodate(page); | ||
1292 | vmf->page = page; | ||
1293 | ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; | ||
1294 | } | ||
1295 | out: | ||
1296 | dout("filemap_fault %p %llu~%zd read inline data ret %d\n", | ||
1297 | inode, off, (size_t)PAGE_CACHE_SIZE, ret); | ||
1235 | return ret; | 1298 | return ret; |
1236 | } | 1299 | } |
1237 | 1300 | ||
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1250 | size_t len; | 1313 | size_t len; |
1251 | int want, got, ret; | 1314 | int want, got, ret; |
1252 | 1315 | ||
1316 | if (ci->i_inline_version != CEPH_INLINE_NONE) { | ||
1317 | struct page *locked_page = NULL; | ||
1318 | if (off == 0) { | ||
1319 | lock_page(page); | ||
1320 | locked_page = page; | ||
1321 | } | ||
1322 | ret = ceph_uninline_data(vma->vm_file, locked_page); | ||
1323 | if (locked_page) | ||
1324 | unlock_page(locked_page); | ||
1325 | if (ret < 0) | ||
1326 | return VM_FAULT_SIGBUS; | ||
1327 | } | ||
1328 | |||
1253 | if (off + PAGE_CACHE_SIZE <= size) | 1329 | if (off + PAGE_CACHE_SIZE <= size) |
1254 | len = PAGE_CACHE_SIZE; | 1330 | len = PAGE_CACHE_SIZE; |
1255 | else | 1331 | else |
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1263 | want = CEPH_CAP_FILE_BUFFER; | 1339 | want = CEPH_CAP_FILE_BUFFER; |
1264 | while (1) { | 1340 | while (1) { |
1265 | got = 0; | 1341 | got = 0; |
1266 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len); | 1342 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, |
1343 | &got, NULL); | ||
1267 | if (ret == 0) | 1344 | if (ret == 0) |
1268 | break; | 1345 | break; |
1269 | if (ret != -ERESTARTSYS) { | 1346 | if (ret != -ERESTARTSYS) { |
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1297 | ret = VM_FAULT_SIGBUS; | 1374 | ret = VM_FAULT_SIGBUS; |
1298 | } | 1375 | } |
1299 | out: | 1376 | out: |
1300 | if (ret != VM_FAULT_LOCKED) { | 1377 | if (ret != VM_FAULT_LOCKED) |
1301 | unlock_page(page); | 1378 | unlock_page(page); |
1302 | } else { | 1379 | if (ret == VM_FAULT_LOCKED || |
1380 | ci->i_inline_version != CEPH_INLINE_NONE) { | ||
1303 | int dirty; | 1381 | int dirty; |
1304 | spin_lock(&ci->i_ceph_lock); | 1382 | spin_lock(&ci->i_ceph_lock); |
1383 | ci->i_inline_version = CEPH_INLINE_NONE; | ||
1305 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 1384 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); |
1306 | spin_unlock(&ci->i_ceph_lock); | 1385 | spin_unlock(&ci->i_ceph_lock); |
1307 | if (dirty) | 1386 | if (dirty) |
@@ -1315,6 +1394,178 @@ out: | |||
1315 | return ret; | 1394 | return ret; |
1316 | } | 1395 | } |
1317 | 1396 | ||
1397 | void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, | ||
1398 | char *data, size_t len) | ||
1399 | { | ||
1400 | struct address_space *mapping = inode->i_mapping; | ||
1401 | struct page *page; | ||
1402 | |||
1403 | if (locked_page) { | ||
1404 | page = locked_page; | ||
1405 | } else { | ||
1406 | if (i_size_read(inode) == 0) | ||
1407 | return; | ||
1408 | page = find_or_create_page(mapping, 0, | ||
1409 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
1410 | if (!page) | ||
1411 | return; | ||
1412 | if (PageUptodate(page)) { | ||
1413 | unlock_page(page); | ||
1414 | page_cache_release(page); | ||
1415 | return; | ||
1416 | } | ||
1417 | } | ||
1418 | |||
1419 | dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n", | ||
1420 | inode, ceph_vinop(inode), len, locked_page); | ||
1421 | |||
1422 | if (len > 0) { | ||
1423 | void *kaddr = kmap_atomic(page); | ||
1424 | memcpy(kaddr, data, len); | ||
1425 | kunmap_atomic(kaddr); | ||
1426 | } | ||
1427 | |||
1428 | if (page != locked_page) { | ||
1429 | if (len < PAGE_CACHE_SIZE) | ||
1430 | zero_user_segment(page, len, PAGE_CACHE_SIZE); | ||
1431 | else | ||
1432 | flush_dcache_page(page); | ||
1433 | |||
1434 | SetPageUptodate(page); | ||
1435 | unlock_page(page); | ||
1436 | page_cache_release(page); | ||
1437 | } | ||
1438 | } | ||
1439 | |||
1440 | int ceph_uninline_data(struct file *filp, struct page *locked_page) | ||
1441 | { | ||
1442 | struct inode *inode = file_inode(filp); | ||
1443 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1444 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||
1445 | struct ceph_osd_request *req; | ||
1446 | struct page *page = NULL; | ||
1447 | u64 len, inline_version; | ||
1448 | int err = 0; | ||
1449 | bool from_pagecache = false; | ||
1450 | |||
1451 | spin_lock(&ci->i_ceph_lock); | ||
1452 | inline_version = ci->i_inline_version; | ||
1453 | spin_unlock(&ci->i_ceph_lock); | ||
1454 | |||
1455 | dout("uninline_data %p %llx.%llx inline_version %llu\n", | ||
1456 | inode, ceph_vinop(inode), inline_version); | ||
1457 | |||
1458 | if (inline_version == 1 || /* initial version, no data */ | ||
1459 | inline_version == CEPH_INLINE_NONE) | ||
1460 | goto out; | ||
1461 | |||
1462 | if (locked_page) { | ||
1463 | page = locked_page; | ||
1464 | WARN_ON(!PageUptodate(page)); | ||
1465 | } else if (ceph_caps_issued(ci) & | ||
1466 | (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) { | ||
1467 | page = find_get_page(inode->i_mapping, 0); | ||
1468 | if (page) { | ||
1469 | if (PageUptodate(page)) { | ||
1470 | from_pagecache = true; | ||
1471 | lock_page(page); | ||
1472 | } else { | ||
1473 | page_cache_release(page); | ||
1474 | page = NULL; | ||
1475 | } | ||
1476 | } | ||
1477 | } | ||
1478 | |||
1479 | if (page) { | ||
1480 | len = i_size_read(inode); | ||
1481 | if (len > PAGE_CACHE_SIZE) | ||
1482 | len = PAGE_CACHE_SIZE; | ||
1483 | } else { | ||
1484 | page = __page_cache_alloc(GFP_NOFS); | ||
1485 | if (!page) { | ||
1486 | err = -ENOMEM; | ||
1487 | goto out; | ||
1488 | } | ||
1489 | err = __ceph_do_getattr(inode, page, | ||
1490 | CEPH_STAT_CAP_INLINE_DATA, true); | ||
1491 | if (err < 0) { | ||
1492 | /* no inline data */ | ||
1493 | if (err == -ENODATA) | ||
1494 | err = 0; | ||
1495 | goto out; | ||
1496 | } | ||
1497 | len = err; | ||
1498 | } | ||
1499 | |||
1500 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
1501 | ceph_vino(inode), 0, &len, 0, 1, | ||
1502 | CEPH_OSD_OP_CREATE, | ||
1503 | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, | ||
1504 | ci->i_snap_realm->cached_context, | ||
1505 | 0, 0, false); | ||
1506 | if (IS_ERR(req)) { | ||
1507 | err = PTR_ERR(req); | ||
1508 | goto out; | ||
1509 | } | ||
1510 | |||
1511 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); | ||
1512 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); | ||
1513 | if (!err) | ||
1514 | err = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||
1515 | ceph_osdc_put_request(req); | ||
1516 | if (err < 0) | ||
1517 | goto out; | ||
1518 | |||
1519 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
1520 | ceph_vino(inode), 0, &len, 1, 3, | ||
1521 | CEPH_OSD_OP_WRITE, | ||
1522 | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, | ||
1523 | ci->i_snap_realm->cached_context, | ||
1524 | ci->i_truncate_seq, ci->i_truncate_size, | ||
1525 | false); | ||
1526 | if (IS_ERR(req)) { | ||
1527 | err = PTR_ERR(req); | ||
1528 | goto out; | ||
1529 | } | ||
1530 | |||
1531 | osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); | ||
1532 | |||
1533 | err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, | ||
1534 | "inline_version", &inline_version, | ||
1535 | sizeof(inline_version), | ||
1536 | CEPH_OSD_CMPXATTR_OP_GT, | ||
1537 | CEPH_OSD_CMPXATTR_MODE_U64); | ||
1538 | if (err) | ||
1539 | goto out_put; | ||
1540 | |||
1541 | err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, | ||
1542 | "inline_version", &inline_version, | ||
1543 | sizeof(inline_version), 0, 0); | ||
1544 | if (err) | ||
1545 | goto out_put; | ||
1546 | |||
1547 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); | ||
1548 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); | ||
1549 | if (!err) | ||
1550 | err = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||
1551 | out_put: | ||
1552 | ceph_osdc_put_request(req); | ||
1553 | if (err == -ECANCELED) | ||
1554 | err = 0; | ||
1555 | out: | ||
1556 | if (page && page != locked_page) { | ||
1557 | if (from_pagecache) { | ||
1558 | unlock_page(page); | ||
1559 | page_cache_release(page); | ||
1560 | } else | ||
1561 | __free_pages(page, 0); | ||
1562 | } | ||
1563 | |||
1564 | dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", | ||
1565 | inode, ceph_vinop(inode), inline_version, err); | ||
1566 | return err; | ||
1567 | } | ||
1568 | |||
1318 | static struct vm_operations_struct ceph_vmops = { | 1569 | static struct vm_operations_struct ceph_vmops = { |
1319 | .fault = ceph_filemap_fault, | 1570 | .fault = ceph_filemap_fault, |
1320 | .page_mkwrite = ceph_page_mkwrite, | 1571 | .page_mkwrite = ceph_page_mkwrite, |