summaryrefslogtreecommitdiffstats
path: root/fs/ceph/addr.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r--fs/ceph/addr.c273
1 files changed, 262 insertions, 11 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bbaf136..f5013d92a7e6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
192 struct ceph_osd_client *osdc = 192 struct ceph_osd_client *osdc =
193 &ceph_inode_to_client(inode)->client->osdc; 193 &ceph_inode_to_client(inode)->client->osdc;
194 int err = 0; 194 int err = 0;
195 u64 off = page_offset(page);
195 u64 len = PAGE_CACHE_SIZE; 196 u64 len = PAGE_CACHE_SIZE;
196 197
197 err = ceph_readpage_from_fscache(inode, page); 198 if (off >= i_size_read(inode)) {
199 zero_user_segment(page, err, PAGE_CACHE_SIZE);
200 SetPageUptodate(page);
201 return 0;
202 }
198 203
204 /*
205 * Uptodate inline data should have been added into page cache
206 * while getting Fcr caps.
207 */
208 if (ci->i_inline_version != CEPH_INLINE_NONE)
209 return -EINVAL;
210
211 err = ceph_readpage_from_fscache(inode, page);
199 if (err == 0) 212 if (err == 0)
200 goto out; 213 goto out;
201 214
202 dout("readpage inode %p file %p page %p index %lu\n", 215 dout("readpage inode %p file %p page %p index %lu\n",
203 inode, filp, page, page->index); 216 inode, filp, page, page->index);
204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 217 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
205 (u64) page_offset(page), &len, 218 off, &len,
206 ci->i_truncate_seq, ci->i_truncate_size, 219 ci->i_truncate_seq, ci->i_truncate_size,
207 &page, 1, 0); 220 &page, 1, 0);
208 if (err == -ENOENT) 221 if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
319 off, len); 332 off, len);
320 vino = ceph_vino(inode); 333 vino = ceph_vino(inode);
321 req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 334 req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
322 1, CEPH_OSD_OP_READ, 335 0, 1, CEPH_OSD_OP_READ,
323 CEPH_OSD_FLAG_READ, NULL, 336 CEPH_OSD_FLAG_READ, NULL,
324 ci->i_truncate_seq, ci->i_truncate_size, 337 ci->i_truncate_seq, ci->i_truncate_size,
325 false); 338 false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
384 int rc = 0; 397 int rc = 0;
385 int max = 0; 398 int max = 0;
386 399
400 if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
401 return -EINVAL;
402
387 rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list, 403 rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
388 &nr_pages); 404 &nr_pages);
389 405
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
673 int rc = 0; 689 int rc = 0;
674 unsigned wsize = 1 << inode->i_blkbits; 690 unsigned wsize = 1 << inode->i_blkbits;
675 struct ceph_osd_request *req = NULL; 691 struct ceph_osd_request *req = NULL;
676 int do_sync; 692 int do_sync = 0;
677 u64 truncate_size, snap_size; 693 u64 truncate_size, snap_size;
678 u32 truncate_seq; 694 u32 truncate_seq;
679 695
@@ -750,7 +766,6 @@ retry:
750 last_snapc = snapc; 766 last_snapc = snapc;
751 767
752 while (!done && index <= end) { 768 while (!done && index <= end) {
753 int num_ops = do_sync ? 2 : 1;
754 unsigned i; 769 unsigned i;
755 int first; 770 int first;
756 pgoff_t next; 771 pgoff_t next;
@@ -850,7 +865,8 @@ get_more_pages:
850 len = wsize; 865 len = wsize;
851 req = ceph_osdc_new_request(&fsc->client->osdc, 866 req = ceph_osdc_new_request(&fsc->client->osdc,
852 &ci->i_layout, vino, 867 &ci->i_layout, vino,
853 offset, &len, num_ops, 868 offset, &len, 0,
869 do_sync ? 2 : 1,
854 CEPH_OSD_OP_WRITE, 870 CEPH_OSD_OP_WRITE,
855 CEPH_OSD_FLAG_WRITE | 871 CEPH_OSD_FLAG_WRITE |
856 CEPH_OSD_FLAG_ONDISK, 872 CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ get_more_pages:
862 break; 878 break;
863 } 879 }
864 880
881 if (do_sync)
882 osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
883
865 req->r_callback = writepages_finish; 884 req->r_callback = writepages_finish;
866 req->r_inode = inode; 885 req->r_inode = inode;
867 886
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1204 struct inode *inode = file_inode(vma->vm_file); 1223 struct inode *inode = file_inode(vma->vm_file);
1205 struct ceph_inode_info *ci = ceph_inode(inode); 1224 struct ceph_inode_info *ci = ceph_inode(inode);
1206 struct ceph_file_info *fi = vma->vm_file->private_data; 1225 struct ceph_file_info *fi = vma->vm_file->private_data;
1226 struct page *pinned_page = NULL;
1207 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT; 1227 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
1208 int want, got, ret; 1228 int want, got, ret;
1209 1229
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1215 want = CEPH_CAP_FILE_CACHE; 1235 want = CEPH_CAP_FILE_CACHE;
1216 while (1) { 1236 while (1) {
1217 got = 0; 1237 got = 0;
1218 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); 1238 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
1239 -1, &got, &pinned_page);
1219 if (ret == 0) 1240 if (ret == 0)
1220 break; 1241 break;
1221 if (ret != -ERESTARTSYS) { 1242 if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1226 dout("filemap_fault %p %llu~%zd got cap refs on %s\n", 1247 dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
1227 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got)); 1248 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
1228 1249
1229 ret = filemap_fault(vma, vmf); 1250 if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
1251 ci->i_inline_version == CEPH_INLINE_NONE)
1252 ret = filemap_fault(vma, vmf);
1253 else
1254 ret = -EAGAIN;
1230 1255
1231 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", 1256 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
1232 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret); 1257 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
1258 if (pinned_page)
1259 page_cache_release(pinned_page);
1233 ceph_put_cap_refs(ci, got); 1260 ceph_put_cap_refs(ci, got);
1234 1261
1262 if (ret != -EAGAIN)
1263 return ret;
1264
1265 /* read inline data */
1266 if (off >= PAGE_CACHE_SIZE) {
1267 /* does not support inline data > PAGE_SIZE */
1268 ret = VM_FAULT_SIGBUS;
1269 } else {
1270 int ret1;
1271 struct address_space *mapping = inode->i_mapping;
1272 struct page *page = find_or_create_page(mapping, 0,
1273 mapping_gfp_mask(mapping) &
1274 ~__GFP_FS);
1275 if (!page) {
1276 ret = VM_FAULT_OOM;
1277 goto out;
1278 }
1279 ret1 = __ceph_do_getattr(inode, page,
1280 CEPH_STAT_CAP_INLINE_DATA, true);
1281 if (ret1 < 0 || off >= i_size_read(inode)) {
1282 unlock_page(page);
1283 page_cache_release(page);
1284 ret = VM_FAULT_SIGBUS;
1285 goto out;
1286 }
1287 if (ret1 < PAGE_CACHE_SIZE)
1288 zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
1289 else
1290 flush_dcache_page(page);
1291 SetPageUptodate(page);
1292 vmf->page = page;
1293 ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
1294 }
1295out:
1296 dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
1297 inode, off, (size_t)PAGE_CACHE_SIZE, ret);
1235 return ret; 1298 return ret;
1236} 1299}
1237 1300
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1250 size_t len; 1313 size_t len;
1251 int want, got, ret; 1314 int want, got, ret;
1252 1315
1316 if (ci->i_inline_version != CEPH_INLINE_NONE) {
1317 struct page *locked_page = NULL;
1318 if (off == 0) {
1319 lock_page(page);
1320 locked_page = page;
1321 }
1322 ret = ceph_uninline_data(vma->vm_file, locked_page);
1323 if (locked_page)
1324 unlock_page(locked_page);
1325 if (ret < 0)
1326 return VM_FAULT_SIGBUS;
1327 }
1328
1253 if (off + PAGE_CACHE_SIZE <= size) 1329 if (off + PAGE_CACHE_SIZE <= size)
1254 len = PAGE_CACHE_SIZE; 1330 len = PAGE_CACHE_SIZE;
1255 else 1331 else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1263 want = CEPH_CAP_FILE_BUFFER; 1339 want = CEPH_CAP_FILE_BUFFER;
1264 while (1) { 1340 while (1) {
1265 got = 0; 1341 got = 0;
1266 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len); 1342 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
1343 &got, NULL);
1267 if (ret == 0) 1344 if (ret == 0)
1268 break; 1345 break;
1269 if (ret != -ERESTARTSYS) { 1346 if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1297 ret = VM_FAULT_SIGBUS; 1374 ret = VM_FAULT_SIGBUS;
1298 } 1375 }
1299out: 1376out:
1300 if (ret != VM_FAULT_LOCKED) { 1377 if (ret != VM_FAULT_LOCKED)
1301 unlock_page(page); 1378 unlock_page(page);
1302 } else { 1379 if (ret == VM_FAULT_LOCKED ||
1380 ci->i_inline_version != CEPH_INLINE_NONE) {
1303 int dirty; 1381 int dirty;
1304 spin_lock(&ci->i_ceph_lock); 1382 spin_lock(&ci->i_ceph_lock);
1383 ci->i_inline_version = CEPH_INLINE_NONE;
1305 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1384 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1306 spin_unlock(&ci->i_ceph_lock); 1385 spin_unlock(&ci->i_ceph_lock);
1307 if (dirty) 1386 if (dirty)
@@ -1315,6 +1394,178 @@ out:
1315 return ret; 1394 return ret;
1316} 1395}
1317 1396
1397void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
1398 char *data, size_t len)
1399{
1400 struct address_space *mapping = inode->i_mapping;
1401 struct page *page;
1402
1403 if (locked_page) {
1404 page = locked_page;
1405 } else {
1406 if (i_size_read(inode) == 0)
1407 return;
1408 page = find_or_create_page(mapping, 0,
1409 mapping_gfp_mask(mapping) & ~__GFP_FS);
1410 if (!page)
1411 return;
1412 if (PageUptodate(page)) {
1413 unlock_page(page);
1414 page_cache_release(page);
1415 return;
1416 }
1417 }
1418
1419 dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
1420 inode, ceph_vinop(inode), len, locked_page);
1421
1422 if (len > 0) {
1423 void *kaddr = kmap_atomic(page);
1424 memcpy(kaddr, data, len);
1425 kunmap_atomic(kaddr);
1426 }
1427
1428 if (page != locked_page) {
1429 if (len < PAGE_CACHE_SIZE)
1430 zero_user_segment(page, len, PAGE_CACHE_SIZE);
1431 else
1432 flush_dcache_page(page);
1433
1434 SetPageUptodate(page);
1435 unlock_page(page);
1436 page_cache_release(page);
1437 }
1438}
1439
1440int ceph_uninline_data(struct file *filp, struct page *locked_page)
1441{
1442 struct inode *inode = file_inode(filp);
1443 struct ceph_inode_info *ci = ceph_inode(inode);
1444 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
1445 struct ceph_osd_request *req;
1446 struct page *page = NULL;
1447 u64 len, inline_version;
1448 int err = 0;
1449 bool from_pagecache = false;
1450
1451 spin_lock(&ci->i_ceph_lock);
1452 inline_version = ci->i_inline_version;
1453 spin_unlock(&ci->i_ceph_lock);
1454
1455 dout("uninline_data %p %llx.%llx inline_version %llu\n",
1456 inode, ceph_vinop(inode), inline_version);
1457
1458 if (inline_version == 1 || /* initial version, no data */
1459 inline_version == CEPH_INLINE_NONE)
1460 goto out;
1461
1462 if (locked_page) {
1463 page = locked_page;
1464 WARN_ON(!PageUptodate(page));
1465 } else if (ceph_caps_issued(ci) &
1466 (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
1467 page = find_get_page(inode->i_mapping, 0);
1468 if (page) {
1469 if (PageUptodate(page)) {
1470 from_pagecache = true;
1471 lock_page(page);
1472 } else {
1473 page_cache_release(page);
1474 page = NULL;
1475 }
1476 }
1477 }
1478
1479 if (page) {
1480 len = i_size_read(inode);
1481 if (len > PAGE_CACHE_SIZE)
1482 len = PAGE_CACHE_SIZE;
1483 } else {
1484 page = __page_cache_alloc(GFP_NOFS);
1485 if (!page) {
1486 err = -ENOMEM;
1487 goto out;
1488 }
1489 err = __ceph_do_getattr(inode, page,
1490 CEPH_STAT_CAP_INLINE_DATA, true);
1491 if (err < 0) {
1492 /* no inline data */
1493 if (err == -ENODATA)
1494 err = 0;
1495 goto out;
1496 }
1497 len = err;
1498 }
1499
1500 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
1501 ceph_vino(inode), 0, &len, 0, 1,
1502 CEPH_OSD_OP_CREATE,
1503 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
1504 ci->i_snap_realm->cached_context,
1505 0, 0, false);
1506 if (IS_ERR(req)) {
1507 err = PTR_ERR(req);
1508 goto out;
1509 }
1510
1511 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
1512 err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
1513 if (!err)
1514 err = ceph_osdc_wait_request(&fsc->client->osdc, req);
1515 ceph_osdc_put_request(req);
1516 if (err < 0)
1517 goto out;
1518
1519 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
1520 ceph_vino(inode), 0, &len, 1, 3,
1521 CEPH_OSD_OP_WRITE,
1522 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
1523 ci->i_snap_realm->cached_context,
1524 ci->i_truncate_seq, ci->i_truncate_size,
1525 false);
1526 if (IS_ERR(req)) {
1527 err = PTR_ERR(req);
1528 goto out;
1529 }
1530
1531 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
1532
1533 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
1534 "inline_version", &inline_version,
1535 sizeof(inline_version),
1536 CEPH_OSD_CMPXATTR_OP_GT,
1537 CEPH_OSD_CMPXATTR_MODE_U64);
1538 if (err)
1539 goto out_put;
1540
1541 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
1542 "inline_version", &inline_version,
1543 sizeof(inline_version), 0, 0);
1544 if (err)
1545 goto out_put;
1546
1547 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
1548 err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
1549 if (!err)
1550 err = ceph_osdc_wait_request(&fsc->client->osdc, req);
1551out_put:
1552 ceph_osdc_put_request(req);
1553 if (err == -ECANCELED)
1554 err = 0;
1555out:
1556 if (page && page != locked_page) {
1557 if (from_pagecache) {
1558 unlock_page(page);
1559 page_cache_release(page);
1560 } else
1561 __free_pages(page, 0);
1562 }
1563
1564 dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
1565 inode, ceph_vinop(inode), inline_version, err);
1566 return err;
1567}
1568
1318static struct vm_operations_struct ceph_vmops = { 1569static struct vm_operations_struct ceph_vmops = {
1319 .fault = ceph_filemap_fault, 1570 .fault = ceph_filemap_fault,
1320 .page_mkwrite = ceph_page_mkwrite, 1571 .page_mkwrite = ceph_page_mkwrite,