diff options
49 files changed, 2544 insertions, 2033 deletions
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index d2c6a5ccf0f5..b19b6a03f91c 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt | |||
@@ -158,6 +158,20 @@ Mount Options | |||
158 | copies. Currently, it's only used in copy_file_range, which will revert | 158 | copies. Currently, it's only used in copy_file_range, which will revert |
159 | to the default VFS implementation if this option is used. | 159 | to the default VFS implementation if this option is used. |
160 | 160 | ||
161 | recover_session=<no|clean> | ||
162 | Set auto reconnect mode in the case where the client is blacklisted. The | ||
163 | available modes are "no" and "clean". The default is "no". | ||
164 | |||
165 | * no: never attempt to reconnect when client detects that it has been | ||
166 | blacklisted. Operations will generally fail after being blacklisted. | ||
167 | |||
168 | * clean: client reconnects to the ceph cluster automatically when it | ||
169 | detects that it has been blacklisted. During reconnect, client drops | ||
170 | dirty data/metadata, invalidates page caches and writable file handles. | ||
171 | After reconnect, file locks become stale because the MDS loses track | ||
172 | of them. If an inode contains any stale file locks, read/write on the | ||
173 | inode is not allowed until applications release all stale file locks. | ||
174 | |||
161 | More Information | 175 | More Information |
162 | ================ | 176 | ================ |
163 | 177 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index a8d193a74692..a97f1be63b9d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -9056,7 +9056,7 @@ S: Supported | |||
9056 | F: Documentation/security/keys/trusted-encrypted.rst | 9056 | F: Documentation/security/keys/trusted-encrypted.rst |
9057 | F: include/keys/trusted-type.h | 9057 | F: include/keys/trusted-type.h |
9058 | F: security/keys/trusted.c | 9058 | F: security/keys/trusted.c |
9059 | F: security/keys/trusted.h | 9059 | F: include/keys/trusted.h |
9060 | 9060 | ||
9061 | KEYS/KEYRINGS: | 9061 | KEYS/KEYRINGS: |
9062 | M: David Howells <dhowells@redhat.com> | 9062 | M: David Howells <dhowells@redhat.com> |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c8fb886aebd4..7c4350c0fb77 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -1754,8 +1754,6 @@ static struct rbd_img_request *rbd_img_request_create( | |||
1754 | mutex_init(&img_request->state_mutex); | 1754 | mutex_init(&img_request->state_mutex); |
1755 | kref_init(&img_request->kref); | 1755 | kref_init(&img_request->kref); |
1756 | 1756 | ||
1757 | dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev, | ||
1758 | obj_op_name(op_type), img_request); | ||
1759 | return img_request; | 1757 | return img_request; |
1760 | } | 1758 | } |
1761 | 1759 | ||
@@ -2944,6 +2942,9 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req) | |||
2944 | __set_bit(IMG_REQ_CHILD, &child_img_req->flags); | 2942 | __set_bit(IMG_REQ_CHILD, &child_img_req->flags); |
2945 | child_img_req->obj_request = obj_req; | 2943 | child_img_req->obj_request = obj_req; |
2946 | 2944 | ||
2945 | dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req, | ||
2946 | obj_req); | ||
2947 | |||
2947 | if (!rbd_img_is_write(img_req)) { | 2948 | if (!rbd_img_is_write(img_req)) { |
2948 | switch (img_req->data_type) { | 2949 | switch (img_req->data_type) { |
2949 | case OBJ_REQUEST_BIO: | 2950 | case OBJ_REQUEST_BIO: |
@@ -4877,6 +4878,9 @@ static void rbd_queue_workfn(struct work_struct *work) | |||
4877 | img_request->rq = rq; | 4878 | img_request->rq = rq; |
4878 | snapc = NULL; /* img_request consumes a ref */ | 4879 | snapc = NULL; /* img_request consumes a ref */ |
4879 | 4880 | ||
4881 | dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev, | ||
4882 | img_request, obj_op_name(op_type), offset, length); | ||
4883 | |||
4880 | if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT) | 4884 | if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT) |
4881 | result = rbd_img_fill_nodata(img_request, offset, length); | 4885 | result = rbd_img_fill_nodata(img_request, offset, length); |
4882 | else | 4886 | else |
@@ -5669,17 +5673,20 @@ static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) | |||
5669 | 5673 | ||
5670 | static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) | 5674 | static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) |
5671 | { | 5675 | { |
5676 | size_t size; | ||
5672 | void *reply_buf; | 5677 | void *reply_buf; |
5673 | int ret; | 5678 | int ret; |
5674 | void *p; | 5679 | void *p; |
5675 | 5680 | ||
5676 | reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL); | 5681 | /* Response will be an encoded string, which includes a length */ |
5682 | size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX; | ||
5683 | reply_buf = kzalloc(size, GFP_KERNEL); | ||
5677 | if (!reply_buf) | 5684 | if (!reply_buf) |
5678 | return -ENOMEM; | 5685 | return -ENOMEM; |
5679 | 5686 | ||
5680 | ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, | 5687 | ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, |
5681 | &rbd_dev->header_oloc, "get_object_prefix", | 5688 | &rbd_dev->header_oloc, "get_object_prefix", |
5682 | NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX); | 5689 | NULL, 0, reply_buf, size); |
5683 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); | 5690 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
5684 | if (ret < 0) | 5691 | if (ret < 0) |
5685 | goto out; | 5692 | goto out; |
@@ -6696,7 +6703,6 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
6696 | dout("rbd id object name is %s\n", oid.name); | 6703 | dout("rbd id object name is %s\n", oid.name); |
6697 | 6704 | ||
6698 | /* Response will be an encoded string, which includes a length */ | 6705 | /* Response will be an encoded string, which includes a length */ |
6699 | |||
6700 | size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX; | 6706 | size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX; |
6701 | response = kzalloc(size, GFP_NOIO); | 6707 | response = kzalloc(size, GFP_NOIO); |
6702 | if (!response) { | 6708 | if (!response) { |
@@ -6708,7 +6714,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
6708 | 6714 | ||
6709 | ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, | 6715 | ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, |
6710 | "get_id", NULL, 0, | 6716 | "get_id", NULL, 0, |
6711 | response, RBD_IMAGE_ID_LEN_MAX); | 6717 | response, size); |
6712 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); | 6718 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
6713 | if (ret == -ENOENT) { | 6719 | if (ret == -ENOENT) { |
6714 | image_id = kstrdup("", GFP_KERNEL); | 6720 | image_id = kstrdup("", GFP_KERNEL); |
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 1b4f95c13e00..d7a3888ad80f 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c | |||
@@ -320,18 +320,22 @@ int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, | |||
320 | if (!chip) | 320 | if (!chip) |
321 | return -ENODEV; | 321 | return -ENODEV; |
322 | 322 | ||
323 | for (i = 0; i < chip->nr_allocated_banks; i++) | 323 | for (i = 0; i < chip->nr_allocated_banks; i++) { |
324 | if (digests[i].alg_id != chip->allocated_banks[i].alg_id) | 324 | if (digests[i].alg_id != chip->allocated_banks[i].alg_id) { |
325 | return -EINVAL; | 325 | rc = EINVAL; |
326 | goto out; | ||
327 | } | ||
328 | } | ||
326 | 329 | ||
327 | if (chip->flags & TPM_CHIP_FLAG_TPM2) { | 330 | if (chip->flags & TPM_CHIP_FLAG_TPM2) { |
328 | rc = tpm2_pcr_extend(chip, pcr_idx, digests); | 331 | rc = tpm2_pcr_extend(chip, pcr_idx, digests); |
329 | tpm_put_ops(chip); | 332 | goto out; |
330 | return rc; | ||
331 | } | 333 | } |
332 | 334 | ||
333 | rc = tpm1_pcr_extend(chip, pcr_idx, digests[0].digest, | 335 | rc = tpm1_pcr_extend(chip, pcr_idx, digests[0].digest, |
334 | "attempting extend a PCR value"); | 336 | "attempting extend a PCR value"); |
337 | |||
338 | out: | ||
335 | tpm_put_ops(chip); | 339 | tpm_put_ops(chip); |
336 | return rc; | 340 | return rc; |
337 | } | 341 | } |
@@ -354,14 +358,9 @@ int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen) | |||
354 | if (!chip) | 358 | if (!chip) |
355 | return -ENODEV; | 359 | return -ENODEV; |
356 | 360 | ||
357 | rc = tpm_buf_init(&buf, 0, 0); | 361 | buf.data = cmd; |
358 | if (rc) | ||
359 | goto out; | ||
360 | |||
361 | memcpy(buf.data, cmd, buflen); | ||
362 | rc = tpm_transmit_cmd(chip, &buf, 0, "attempting to a send a command"); | 362 | rc = tpm_transmit_cmd(chip, &buf, 0, "attempting to a send a command"); |
363 | tpm_buf_destroy(&buf); | 363 | |
364 | out: | ||
365 | tpm_put_ops(chip); | 364 | tpm_put_ops(chip); |
366 | return rc; | 365 | return rc; |
367 | } | 366 | } |
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index a699e320393f..c1da294418d1 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -6,7 +6,7 @@ | |||
6 | obj-$(CONFIG_CEPH_FS) += ceph.o | 6 | obj-$(CONFIG_CEPH_FS) += ceph.o |
7 | 7 | ||
8 | ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ | 8 | ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
9 | export.o caps.o snap.o xattr.o quota.o \ | 9 | export.o caps.o snap.o xattr.o quota.o io.o \ |
10 | mds_client.o mdsmap.o strings.o ceph_frag.o \ | 10 | mds_client.o mdsmap.o strings.o ceph_frag.o \ |
11 | debugfs.o | 11 | debugfs.o |
12 | 12 | ||
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b3c8b886bf64..7ab616601141 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -189,8 +189,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page) | |||
189 | { | 189 | { |
190 | struct inode *inode = file_inode(filp); | 190 | struct inode *inode = file_inode(filp); |
191 | struct ceph_inode_info *ci = ceph_inode(inode); | 191 | struct ceph_inode_info *ci = ceph_inode(inode); |
192 | struct ceph_osd_client *osdc = | 192 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
193 | &ceph_inode_to_client(inode)->client->osdc; | ||
194 | int err = 0; | 193 | int err = 0; |
195 | u64 off = page_offset(page); | 194 | u64 off = page_offset(page); |
196 | u64 len = PAGE_SIZE; | 195 | u64 len = PAGE_SIZE; |
@@ -219,8 +218,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page) | |||
219 | 218 | ||
220 | dout("readpage inode %p file %p page %p index %lu\n", | 219 | dout("readpage inode %p file %p page %p index %lu\n", |
221 | inode, filp, page, page->index); | 220 | inode, filp, page, page->index); |
222 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 221 | err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
223 | off, &len, | 222 | &ci->i_layout, off, &len, |
224 | ci->i_truncate_seq, ci->i_truncate_size, | 223 | ci->i_truncate_seq, ci->i_truncate_size, |
225 | &page, 1, 0); | 224 | &page, 1, 0); |
226 | if (err == -ENOENT) | 225 | if (err == -ENOENT) |
@@ -228,6 +227,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page) | |||
228 | if (err < 0) { | 227 | if (err < 0) { |
229 | SetPageError(page); | 228 | SetPageError(page); |
230 | ceph_fscache_readpage_cancel(inode, page); | 229 | ceph_fscache_readpage_cancel(inode, page); |
230 | if (err == -EBLACKLISTED) | ||
231 | fsc->blacklisted = true; | ||
231 | goto out; | 232 | goto out; |
232 | } | 233 | } |
233 | if (err < PAGE_SIZE) | 234 | if (err < PAGE_SIZE) |
@@ -266,6 +267,8 @@ static void finish_read(struct ceph_osd_request *req) | |||
266 | int i; | 267 | int i; |
267 | 268 | ||
268 | dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); | 269 | dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); |
270 | if (rc == -EBLACKLISTED) | ||
271 | ceph_inode_to_client(inode)->blacklisted = true; | ||
269 | 272 | ||
270 | /* unlock all pages, zeroing any data we didn't read */ | 273 | /* unlock all pages, zeroing any data we didn't read */ |
271 | osd_data = osd_req_op_extent_osd_data(req, 0); | 274 | osd_data = osd_req_op_extent_osd_data(req, 0); |
@@ -323,7 +326,8 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, | |||
323 | /* caller of readpages does not hold buffer and read caps | 326 | /* caller of readpages does not hold buffer and read caps |
324 | * (fadvise, madvise and readahead cases) */ | 327 | * (fadvise, madvise and readahead cases) */ |
325 | int want = CEPH_CAP_FILE_CACHE; | 328 | int want = CEPH_CAP_FILE_CACHE; |
326 | ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, true, &got); | 329 | ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, |
330 | true, &got); | ||
327 | if (ret < 0) { | 331 | if (ret < 0) { |
328 | dout("start_read %p, error getting cap\n", inode); | 332 | dout("start_read %p, error getting cap\n", inode); |
329 | } else if (!(got & want)) { | 333 | } else if (!(got & want)) { |
@@ -569,7 +573,7 @@ static u64 get_writepages_data_length(struct inode *inode, | |||
569 | /* | 573 | /* |
570 | * Write a single page, but leave the page locked. | 574 | * Write a single page, but leave the page locked. |
571 | * | 575 | * |
572 | * If we get a write error, set the page error bit, but still adjust the | 576 | * If we get a write error, mark the mapping for error, but still adjust the |
573 | * dirty page accounting (i.e., page is no longer dirty). | 577 | * dirty page accounting (i.e., page is no longer dirty). |
574 | */ | 578 | */ |
575 | static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | 579 | static int writepage_nounlock(struct page *page, struct writeback_control *wbc) |
@@ -640,9 +644,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
640 | end_page_writeback(page); | 644 | end_page_writeback(page); |
641 | return err; | 645 | return err; |
642 | } | 646 | } |
647 | if (err == -EBLACKLISTED) | ||
648 | fsc->blacklisted = true; | ||
643 | dout("writepage setting page/mapping error %d %p\n", | 649 | dout("writepage setting page/mapping error %d %p\n", |
644 | err, page); | 650 | err, page); |
645 | SetPageError(page); | ||
646 | mapping_set_error(&inode->i_data, err); | 651 | mapping_set_error(&inode->i_data, err); |
647 | wbc->pages_skipped++; | 652 | wbc->pages_skipped++; |
648 | } else { | 653 | } else { |
@@ -680,23 +685,6 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) | |||
680 | } | 685 | } |
681 | 686 | ||
682 | /* | 687 | /* |
683 | * lame release_pages helper. release_pages() isn't exported to | ||
684 | * modules. | ||
685 | */ | ||
686 | static void ceph_release_pages(struct page **pages, int num) | ||
687 | { | ||
688 | struct pagevec pvec; | ||
689 | int i; | ||
690 | |||
691 | pagevec_init(&pvec); | ||
692 | for (i = 0; i < num; i++) { | ||
693 | if (pagevec_add(&pvec, pages[i]) == 0) | ||
694 | pagevec_release(&pvec); | ||
695 | } | ||
696 | pagevec_release(&pvec); | ||
697 | } | ||
698 | |||
699 | /* | ||
700 | * async writeback completion handler. | 688 | * async writeback completion handler. |
701 | * | 689 | * |
702 | * If we get an error, set the mapping error bit, but not the individual | 690 | * If we get an error, set the mapping error bit, but not the individual |
@@ -720,6 +708,8 @@ static void writepages_finish(struct ceph_osd_request *req) | |||
720 | if (rc < 0) { | 708 | if (rc < 0) { |
721 | mapping_set_error(mapping, rc); | 709 | mapping_set_error(mapping, rc); |
722 | ceph_set_error_write(ci); | 710 | ceph_set_error_write(ci); |
711 | if (rc == -EBLACKLISTED) | ||
712 | fsc->blacklisted = true; | ||
723 | } else { | 713 | } else { |
724 | ceph_clear_error_write(ci); | 714 | ceph_clear_error_write(ci); |
725 | } | 715 | } |
@@ -769,7 +759,7 @@ static void writepages_finish(struct ceph_osd_request *req) | |||
769 | dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n", | 759 | dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n", |
770 | inode, osd_data->length, rc >= 0 ? num_pages : 0); | 760 | inode, osd_data->length, rc >= 0 ? num_pages : 0); |
771 | 761 | ||
772 | ceph_release_pages(osd_data->pages, num_pages); | 762 | release_pages(osd_data->pages, num_pages); |
773 | } | 763 | } |
774 | 764 | ||
775 | ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc); | 765 | ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc); |
@@ -1452,7 +1442,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) | |||
1452 | want = CEPH_CAP_FILE_CACHE; | 1442 | want = CEPH_CAP_FILE_CACHE; |
1453 | 1443 | ||
1454 | got = 0; | 1444 | got = 0; |
1455 | err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); | 1445 | err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1, |
1446 | &got, &pinned_page); | ||
1456 | if (err < 0) | 1447 | if (err < 0) |
1457 | goto out_restore; | 1448 | goto out_restore; |
1458 | 1449 | ||
@@ -1540,6 +1531,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) | |||
1540 | if (!prealloc_cf) | 1531 | if (!prealloc_cf) |
1541 | return VM_FAULT_OOM; | 1532 | return VM_FAULT_OOM; |
1542 | 1533 | ||
1534 | sb_start_pagefault(inode->i_sb); | ||
1543 | ceph_block_sigs(&oldset); | 1535 | ceph_block_sigs(&oldset); |
1544 | 1536 | ||
1545 | if (ci->i_inline_version != CEPH_INLINE_NONE) { | 1537 | if (ci->i_inline_version != CEPH_INLINE_NONE) { |
@@ -1568,7 +1560,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) | |||
1568 | want = CEPH_CAP_FILE_BUFFER; | 1560 | want = CEPH_CAP_FILE_BUFFER; |
1569 | 1561 | ||
1570 | got = 0; | 1562 | got = 0; |
1571 | err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, | 1563 | err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len, |
1572 | &got, NULL); | 1564 | &got, NULL); |
1573 | if (err < 0) | 1565 | if (err < 0) |
1574 | goto out_free; | 1566 | goto out_free; |
@@ -1614,6 +1606,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) | |||
1614 | ceph_put_cap_refs(ci, got); | 1606 | ceph_put_cap_refs(ci, got); |
1615 | out_free: | 1607 | out_free: |
1616 | ceph_restore_sigs(&oldset); | 1608 | ceph_restore_sigs(&oldset); |
1609 | sb_end_pagefault(inode->i_sb); | ||
1617 | ceph_free_cap_flush(prealloc_cf); | 1610 | ceph_free_cap_flush(prealloc_cf); |
1618 | if (err < 0) | 1611 | if (err < 0) |
1619 | ret = vmf_error(err); | 1612 | ret = vmf_error(err); |
@@ -1946,12 +1939,17 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, | |||
1946 | 1939 | ||
1947 | if (err >= 0 || err == -ENOENT) | 1940 | if (err >= 0 || err == -ENOENT) |
1948 | have |= POOL_READ; | 1941 | have |= POOL_READ; |
1949 | else if (err != -EPERM) | 1942 | else if (err != -EPERM) { |
1943 | if (err == -EBLACKLISTED) | ||
1944 | fsc->blacklisted = true; | ||
1950 | goto out_unlock; | 1945 | goto out_unlock; |
1946 | } | ||
1951 | 1947 | ||
1952 | if (err2 == 0 || err2 == -EEXIST) | 1948 | if (err2 == 0 || err2 == -EEXIST) |
1953 | have |= POOL_WRITE; | 1949 | have |= POOL_WRITE; |
1954 | else if (err2 != -EPERM) { | 1950 | else if (err2 != -EPERM) { |
1951 | if (err2 == -EBLACKLISTED) | ||
1952 | fsc->blacklisted = true; | ||
1955 | err = err2; | 1953 | err = err2; |
1956 | goto out_unlock; | 1954 | goto out_unlock; |
1957 | } | 1955 | } |
@@ -1989,10 +1987,11 @@ out: | |||
1989 | return err; | 1987 | return err; |
1990 | } | 1988 | } |
1991 | 1989 | ||
1992 | int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) | 1990 | int ceph_pool_perm_check(struct inode *inode, int need) |
1993 | { | 1991 | { |
1994 | s64 pool; | 1992 | struct ceph_inode_info *ci = ceph_inode(inode); |
1995 | struct ceph_string *pool_ns; | 1993 | struct ceph_string *pool_ns; |
1994 | s64 pool; | ||
1996 | int ret, flags; | 1995 | int ret, flags; |
1997 | 1996 | ||
1998 | if (ci->i_vino.snap != CEPH_NOSNAP) { | 1997 | if (ci->i_vino.snap != CEPH_NOSNAP) { |
@@ -2004,7 +2003,7 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) | |||
2004 | return 0; | 2003 | return 0; |
2005 | } | 2004 | } |
2006 | 2005 | ||
2007 | if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), | 2006 | if (ceph_test_mount_opt(ceph_inode_to_client(inode), |
2008 | NOPOOLPERM)) | 2007 | NOPOOLPERM)) |
2009 | return 0; | 2008 | return 0; |
2010 | 2009 | ||
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index bc90cf6ad7ed..b2ec29eeb4c4 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c | |||
@@ -6,6 +6,8 @@ | |||
6 | * Written by Milosz Tanski (milosz@adfin.com) | 6 | * Written by Milosz Tanski (milosz@adfin.com) |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/ceph/ceph_debug.h> | ||
10 | |||
9 | #include "super.h" | 11 | #include "super.h" |
10 | #include "cache.h" | 12 | #include "cache.h" |
11 | 13 | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ce0f5658720a..d3b9c9d5c1bd 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -458,37 +458,6 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds) | |||
458 | } | 458 | } |
459 | 459 | ||
460 | /* | 460 | /* |
461 | * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1. | ||
462 | */ | ||
463 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci) | ||
464 | { | ||
465 | struct ceph_cap *cap; | ||
466 | int mds = -1; | ||
467 | struct rb_node *p; | ||
468 | |||
469 | /* prefer mds with WR|BUFFER|EXCL caps */ | ||
470 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | ||
471 | cap = rb_entry(p, struct ceph_cap, ci_node); | ||
472 | mds = cap->mds; | ||
473 | if (cap->issued & (CEPH_CAP_FILE_WR | | ||
474 | CEPH_CAP_FILE_BUFFER | | ||
475 | CEPH_CAP_FILE_EXCL)) | ||
476 | break; | ||
477 | } | ||
478 | return mds; | ||
479 | } | ||
480 | |||
481 | int ceph_get_cap_mds(struct inode *inode) | ||
482 | { | ||
483 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
484 | int mds; | ||
485 | spin_lock(&ci->i_ceph_lock); | ||
486 | mds = __ceph_get_cap_mds(ceph_inode(inode)); | ||
487 | spin_unlock(&ci->i_ceph_lock); | ||
488 | return mds; | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * Called under i_ceph_lock. | 461 | * Called under i_ceph_lock. |
493 | */ | 462 | */ |
494 | static void __insert_cap_node(struct ceph_inode_info *ci, | 463 | static void __insert_cap_node(struct ceph_inode_info *ci, |
@@ -628,7 +597,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
628 | /* | 597 | /* |
629 | * Add a capability under the given MDS session. | 598 | * Add a capability under the given MDS session. |
630 | * | 599 | * |
631 | * Caller should hold session snap_rwsem (read) and s_mutex. | 600 | * Caller should hold session snap_rwsem (read) and ci->i_ceph_lock |
632 | * | 601 | * |
633 | * @fmode is the open file mode, if we are opening a file, otherwise | 602 | * @fmode is the open file mode, if we are opening a file, otherwise |
634 | * it is < 0. (This is so we can atomically add the cap and add an | 603 | * it is < 0. (This is so we can atomically add the cap and add an |
@@ -645,6 +614,9 @@ void ceph_add_cap(struct inode *inode, | |||
645 | struct ceph_cap *cap; | 614 | struct ceph_cap *cap; |
646 | int mds = session->s_mds; | 615 | int mds = session->s_mds; |
647 | int actual_wanted; | 616 | int actual_wanted; |
617 | u32 gen; | ||
618 | |||
619 | lockdep_assert_held(&ci->i_ceph_lock); | ||
648 | 620 | ||
649 | dout("add_cap %p mds%d cap %llx %s seq %d\n", inode, | 621 | dout("add_cap %p mds%d cap %llx %s seq %d\n", inode, |
650 | session->s_mds, cap_id, ceph_cap_string(issued), seq); | 622 | session->s_mds, cap_id, ceph_cap_string(issued), seq); |
@@ -656,6 +628,10 @@ void ceph_add_cap(struct inode *inode, | |||
656 | if (fmode >= 0) | 628 | if (fmode >= 0) |
657 | wanted |= ceph_caps_for_mode(fmode); | 629 | wanted |= ceph_caps_for_mode(fmode); |
658 | 630 | ||
631 | spin_lock(&session->s_gen_ttl_lock); | ||
632 | gen = session->s_cap_gen; | ||
633 | spin_unlock(&session->s_gen_ttl_lock); | ||
634 | |||
659 | cap = __get_cap_for_mds(ci, mds); | 635 | cap = __get_cap_for_mds(ci, mds); |
660 | if (!cap) { | 636 | if (!cap) { |
661 | cap = *new_cap; | 637 | cap = *new_cap; |
@@ -681,7 +657,7 @@ void ceph_add_cap(struct inode *inode, | |||
681 | list_move_tail(&cap->session_caps, &session->s_caps); | 657 | list_move_tail(&cap->session_caps, &session->s_caps); |
682 | spin_unlock(&session->s_cap_lock); | 658 | spin_unlock(&session->s_cap_lock); |
683 | 659 | ||
684 | if (cap->cap_gen < session->s_cap_gen) | 660 | if (cap->cap_gen < gen) |
685 | cap->issued = cap->implemented = CEPH_CAP_PIN; | 661 | cap->issued = cap->implemented = CEPH_CAP_PIN; |
686 | 662 | ||
687 | /* | 663 | /* |
@@ -775,7 +751,7 @@ void ceph_add_cap(struct inode *inode, | |||
775 | cap->seq = seq; | 751 | cap->seq = seq; |
776 | cap->issue_seq = seq; | 752 | cap->issue_seq = seq; |
777 | cap->mseq = mseq; | 753 | cap->mseq = mseq; |
778 | cap->cap_gen = session->s_cap_gen; | 754 | cap->cap_gen = gen; |
779 | 755 | ||
780 | if (fmode >= 0) | 756 | if (fmode >= 0) |
781 | __ceph_get_fmode(ci, fmode); | 757 | __ceph_get_fmode(ci, fmode); |
@@ -1284,10 +1260,6 @@ void __ceph_remove_caps(struct ceph_inode_info *ci) | |||
1284 | * Make note of max_size reported/requested from mds, revoked caps | 1260 | * Make note of max_size reported/requested from mds, revoked caps |
1285 | * that have now been implemented. | 1261 | * that have now been implemented. |
1286 | * | 1262 | * |
1287 | * Make half-hearted attempt ot to invalidate page cache if we are | ||
1288 | * dropping RDCACHE. Note that this will leave behind locked pages | ||
1289 | * that we'll then need to deal with elsewhere. | ||
1290 | * | ||
1291 | * Return non-zero if delayed release, or we experienced an error | 1263 | * Return non-zero if delayed release, or we experienced an error |
1292 | * such that the caller should requeue + retry later. | 1264 | * such that the caller should requeue + retry later. |
1293 | * | 1265 | * |
@@ -1746,11 +1718,11 @@ static bool __finish_cap_flush(struct ceph_mds_client *mdsc, | |||
1746 | * Add dirty inode to the flushing list. Assigned a seq number so we | 1718 | * Add dirty inode to the flushing list. Assigned a seq number so we |
1747 | * can wait for caps to flush without starving. | 1719 | * can wait for caps to flush without starving. |
1748 | * | 1720 | * |
1749 | * Called under i_ceph_lock. | 1721 | * Called under i_ceph_lock. Returns the flush tid. |
1750 | */ | 1722 | */ |
1751 | static int __mark_caps_flushing(struct inode *inode, | 1723 | static u64 __mark_caps_flushing(struct inode *inode, |
1752 | struct ceph_mds_session *session, bool wake, | 1724 | struct ceph_mds_session *session, bool wake, |
1753 | u64 *flush_tid, u64 *oldest_flush_tid) | 1725 | u64 *oldest_flush_tid) |
1754 | { | 1726 | { |
1755 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 1727 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1756 | struct ceph_inode_info *ci = ceph_inode(inode); | 1728 | struct ceph_inode_info *ci = ceph_inode(inode); |
@@ -1789,8 +1761,7 @@ static int __mark_caps_flushing(struct inode *inode, | |||
1789 | 1761 | ||
1790 | list_add_tail(&cf->i_list, &ci->i_cap_flush_list); | 1762 | list_add_tail(&cf->i_list, &ci->i_cap_flush_list); |
1791 | 1763 | ||
1792 | *flush_tid = cf->tid; | 1764 | return cf->tid; |
1793 | return flushing; | ||
1794 | } | 1765 | } |
1795 | 1766 | ||
1796 | /* | 1767 | /* |
@@ -2028,11 +1999,6 @@ retry_locked: | |||
2028 | } | 1999 | } |
2029 | 2000 | ||
2030 | ack: | 2001 | ack: |
2031 | if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { | ||
2032 | dout(" skipping %p I_NOFLUSH set\n", inode); | ||
2033 | continue; | ||
2034 | } | ||
2035 | |||
2036 | if (session && session != cap->session) { | 2002 | if (session && session != cap->session) { |
2037 | dout("oops, wrong session %p mutex\n", session); | 2003 | dout("oops, wrong session %p mutex\n", session); |
2038 | mutex_unlock(&session->s_mutex); | 2004 | mutex_unlock(&session->s_mutex); |
@@ -2080,9 +2046,9 @@ ack: | |||
2080 | } | 2046 | } |
2081 | 2047 | ||
2082 | if (cap == ci->i_auth_cap && ci->i_dirty_caps) { | 2048 | if (cap == ci->i_auth_cap && ci->i_dirty_caps) { |
2083 | flushing = __mark_caps_flushing(inode, session, false, | 2049 | flushing = ci->i_dirty_caps; |
2084 | &flush_tid, | 2050 | flush_tid = __mark_caps_flushing(inode, session, false, |
2085 | &oldest_flush_tid); | 2051 | &oldest_flush_tid); |
2086 | } else { | 2052 | } else { |
2087 | flushing = 0; | 2053 | flushing = 0; |
2088 | flush_tid = 0; | 2054 | flush_tid = 0; |
@@ -2130,16 +2096,11 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) | |||
2130 | retry: | 2096 | retry: |
2131 | spin_lock(&ci->i_ceph_lock); | 2097 | spin_lock(&ci->i_ceph_lock); |
2132 | retry_locked: | 2098 | retry_locked: |
2133 | if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { | ||
2134 | spin_unlock(&ci->i_ceph_lock); | ||
2135 | dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); | ||
2136 | goto out; | ||
2137 | } | ||
2138 | if (ci->i_dirty_caps && ci->i_auth_cap) { | 2099 | if (ci->i_dirty_caps && ci->i_auth_cap) { |
2139 | struct ceph_cap *cap = ci->i_auth_cap; | 2100 | struct ceph_cap *cap = ci->i_auth_cap; |
2140 | int delayed; | 2101 | int delayed; |
2141 | 2102 | ||
2142 | if (!session || session != cap->session) { | 2103 | if (session != cap->session) { |
2143 | spin_unlock(&ci->i_ceph_lock); | 2104 | spin_unlock(&ci->i_ceph_lock); |
2144 | if (session) | 2105 | if (session) |
2145 | mutex_unlock(&session->s_mutex); | 2106 | mutex_unlock(&session->s_mutex); |
@@ -2161,8 +2122,9 @@ retry_locked: | |||
2161 | goto retry_locked; | 2122 | goto retry_locked; |
2162 | } | 2123 | } |
2163 | 2124 | ||
2164 | flushing = __mark_caps_flushing(inode, session, true, | 2125 | flushing = ci->i_dirty_caps; |
2165 | &flush_tid, &oldest_flush_tid); | 2126 | flush_tid = __mark_caps_flushing(inode, session, true, |
2127 | &oldest_flush_tid); | ||
2166 | 2128 | ||
2167 | /* __send_cap drops i_ceph_lock */ | 2129 | /* __send_cap drops i_ceph_lock */ |
2168 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, | 2130 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, |
@@ -2261,35 +2223,45 @@ static int unsafe_request_wait(struct inode *inode) | |||
2261 | 2223 | ||
2262 | int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | 2224 | int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
2263 | { | 2225 | { |
2226 | struct ceph_file_info *fi = file->private_data; | ||
2264 | struct inode *inode = file->f_mapping->host; | 2227 | struct inode *inode = file->f_mapping->host; |
2265 | struct ceph_inode_info *ci = ceph_inode(inode); | 2228 | struct ceph_inode_info *ci = ceph_inode(inode); |
2266 | u64 flush_tid; | 2229 | u64 flush_tid; |
2267 | int ret; | 2230 | int ret, err; |
2268 | int dirty; | 2231 | int dirty; |
2269 | 2232 | ||
2270 | dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); | 2233 | dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); |
2271 | 2234 | ||
2272 | ret = file_write_and_wait_range(file, start, end); | 2235 | ret = file_write_and_wait_range(file, start, end); |
2273 | if (ret < 0) | ||
2274 | goto out; | ||
2275 | |||
2276 | if (datasync) | 2236 | if (datasync) |
2277 | goto out; | 2237 | goto out; |
2278 | 2238 | ||
2279 | dirty = try_flush_caps(inode, &flush_tid); | 2239 | dirty = try_flush_caps(inode, &flush_tid); |
2280 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); | 2240 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); |
2281 | 2241 | ||
2282 | ret = unsafe_request_wait(inode); | 2242 | err = unsafe_request_wait(inode); |
2283 | 2243 | ||
2284 | /* | 2244 | /* |
2285 | * only wait on non-file metadata writeback (the mds | 2245 | * only wait on non-file metadata writeback (the mds |
2286 | * can recover size and mtime, so we don't need to | 2246 | * can recover size and mtime, so we don't need to |
2287 | * wait for that) | 2247 | * wait for that) |
2288 | */ | 2248 | */ |
2289 | if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { | 2249 | if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { |
2290 | ret = wait_event_interruptible(ci->i_cap_wq, | 2250 | err = wait_event_interruptible(ci->i_cap_wq, |
2291 | caps_are_flushed(inode, flush_tid)); | 2251 | caps_are_flushed(inode, flush_tid)); |
2292 | } | 2252 | } |
2253 | |||
2254 | if (err < 0) | ||
2255 | ret = err; | ||
2256 | |||
2257 | if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) { | ||
2258 | spin_lock(&file->f_lock); | ||
2259 | err = errseq_check_and_advance(&ci->i_meta_err, | ||
2260 | &fi->meta_err); | ||
2261 | spin_unlock(&file->f_lock); | ||
2262 | if (err < 0) | ||
2263 | ret = err; | ||
2264 | } | ||
2293 | out: | 2265 | out: |
2294 | dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); | 2266 | dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); |
2295 | return ret; | 2267 | return ret; |
@@ -2560,10 +2532,15 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got, | |||
2560 | * | 2532 | * |
2561 | * FIXME: how does a 0 return differ from -EAGAIN? | 2533 | * FIXME: how does a 0 return differ from -EAGAIN? |
2562 | */ | 2534 | */ |
2563 | static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | 2535 | enum { |
2564 | loff_t endoff, bool nonblock, int *got) | 2536 | NON_BLOCKING = 1, |
2537 | CHECK_FILELOCK = 2, | ||
2538 | }; | ||
2539 | |||
2540 | static int try_get_cap_refs(struct inode *inode, int need, int want, | ||
2541 | loff_t endoff, int flags, int *got) | ||
2565 | { | 2542 | { |
2566 | struct inode *inode = &ci->vfs_inode; | 2543 | struct ceph_inode_info *ci = ceph_inode(inode); |
2567 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 2544 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
2568 | int ret = 0; | 2545 | int ret = 0; |
2569 | int have, implemented; | 2546 | int have, implemented; |
@@ -2576,6 +2553,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | |||
2576 | again: | 2553 | again: |
2577 | spin_lock(&ci->i_ceph_lock); | 2554 | spin_lock(&ci->i_ceph_lock); |
2578 | 2555 | ||
2556 | if ((flags & CHECK_FILELOCK) && | ||
2557 | (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) { | ||
2558 | dout("try_get_cap_refs %p error filelock\n", inode); | ||
2559 | ret = -EIO; | ||
2560 | goto out_unlock; | ||
2561 | } | ||
2562 | |||
2579 | /* make sure file is actually open */ | 2563 | /* make sure file is actually open */ |
2580 | file_wanted = __ceph_caps_file_wanted(ci); | 2564 | file_wanted = __ceph_caps_file_wanted(ci); |
2581 | if ((file_wanted & need) != need) { | 2565 | if ((file_wanted & need) != need) { |
@@ -2637,7 +2621,7 @@ again: | |||
2637 | * we can not call down_read() when | 2621 | * we can not call down_read() when |
2638 | * task isn't in TASK_RUNNING state | 2622 | * task isn't in TASK_RUNNING state |
2639 | */ | 2623 | */ |
2640 | if (nonblock) { | 2624 | if (flags & NON_BLOCKING) { |
2641 | ret = -EAGAIN; | 2625 | ret = -EAGAIN; |
2642 | goto out_unlock; | 2626 | goto out_unlock; |
2643 | } | 2627 | } |
@@ -2731,18 +2715,19 @@ static void check_max_size(struct inode *inode, loff_t endoff) | |||
2731 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); | 2715 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
2732 | } | 2716 | } |
2733 | 2717 | ||
2734 | int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, | 2718 | int ceph_try_get_caps(struct inode *inode, int need, int want, |
2735 | bool nonblock, int *got) | 2719 | bool nonblock, int *got) |
2736 | { | 2720 | { |
2737 | int ret; | 2721 | int ret; |
2738 | 2722 | ||
2739 | BUG_ON(need & ~CEPH_CAP_FILE_RD); | 2723 | BUG_ON(need & ~CEPH_CAP_FILE_RD); |
2740 | BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED)); | 2724 | BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED)); |
2741 | ret = ceph_pool_perm_check(ci, need); | 2725 | ret = ceph_pool_perm_check(inode, need); |
2742 | if (ret < 0) | 2726 | if (ret < 0) |
2743 | return ret; | 2727 | return ret; |
2744 | 2728 | ||
2745 | ret = try_get_cap_refs(ci, need, want, 0, nonblock, got); | 2729 | ret = try_get_cap_refs(inode, need, want, 0, |
2730 | (nonblock ? NON_BLOCKING : 0), got); | ||
2746 | return ret == -EAGAIN ? 0 : ret; | 2731 | return ret == -EAGAIN ? 0 : ret; |
2747 | } | 2732 | } |
2748 | 2733 | ||
@@ -2751,30 +2736,40 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2751 | * due to a small max_size, make sure we check_max_size (and possibly | 2736 | * due to a small max_size, make sure we check_max_size (and possibly |
2752 | * ask the mds) so we don't get hung up indefinitely. | 2737 | * ask the mds) so we don't get hung up indefinitely. |
2753 | */ | 2738 | */ |
2754 | int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | 2739 | int ceph_get_caps(struct file *filp, int need, int want, |
2755 | loff_t endoff, int *got, struct page **pinned_page) | 2740 | loff_t endoff, int *got, struct page **pinned_page) |
2756 | { | 2741 | { |
2757 | int _got, ret; | 2742 | struct ceph_file_info *fi = filp->private_data; |
2743 | struct inode *inode = file_inode(filp); | ||
2744 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
2745 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||
2746 | int ret, _got, flags; | ||
2758 | 2747 | ||
2759 | ret = ceph_pool_perm_check(ci, need); | 2748 | ret = ceph_pool_perm_check(inode, need); |
2760 | if (ret < 0) | 2749 | if (ret < 0) |
2761 | return ret; | 2750 | return ret; |
2762 | 2751 | ||
2752 | if ((fi->fmode & CEPH_FILE_MODE_WR) && | ||
2753 | fi->filp_gen != READ_ONCE(fsc->filp_gen)) | ||
2754 | return -EBADF; | ||
2755 | |||
2763 | while (true) { | 2756 | while (true) { |
2764 | if (endoff > 0) | 2757 | if (endoff > 0) |
2765 | check_max_size(&ci->vfs_inode, endoff); | 2758 | check_max_size(inode, endoff); |
2766 | 2759 | ||
2760 | flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0; | ||
2767 | _got = 0; | 2761 | _got = 0; |
2768 | ret = try_get_cap_refs(ci, need, want, endoff, | 2762 | ret = try_get_cap_refs(inode, need, want, endoff, |
2769 | false, &_got); | 2763 | flags, &_got); |
2770 | if (ret == -EAGAIN) | 2764 | if (ret == -EAGAIN) |
2771 | continue; | 2765 | continue; |
2772 | if (!ret) { | 2766 | if (!ret) { |
2773 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | 2767 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
2774 | add_wait_queue(&ci->i_cap_wq, &wait); | 2768 | add_wait_queue(&ci->i_cap_wq, &wait); |
2775 | 2769 | ||
2776 | while (!(ret = try_get_cap_refs(ci, need, want, endoff, | 2770 | flags |= NON_BLOCKING; |
2777 | true, &_got))) { | 2771 | while (!(ret = try_get_cap_refs(inode, need, want, |
2772 | endoff, flags, &_got))) { | ||
2778 | if (signal_pending(current)) { | 2773 | if (signal_pending(current)) { |
2779 | ret = -ERESTARTSYS; | 2774 | ret = -ERESTARTSYS; |
2780 | break; | 2775 | break; |
@@ -2786,10 +2781,18 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2786 | if (ret == -EAGAIN) | 2781 | if (ret == -EAGAIN) |
2787 | continue; | 2782 | continue; |
2788 | } | 2783 | } |
2784 | |||
2785 | if ((fi->fmode & CEPH_FILE_MODE_WR) && | ||
2786 | fi->filp_gen != READ_ONCE(fsc->filp_gen)) { | ||
2787 | if (ret >= 0 && _got) | ||
2788 | ceph_put_cap_refs(ci, _got); | ||
2789 | return -EBADF; | ||
2790 | } | ||
2791 | |||
2789 | if (ret < 0) { | 2792 | if (ret < 0) { |
2790 | if (ret == -ESTALE) { | 2793 | if (ret == -ESTALE) { |
2791 | /* session was killed, try renew caps */ | 2794 | /* session was killed, try renew caps */ |
2792 | ret = ceph_renew_caps(&ci->vfs_inode); | 2795 | ret = ceph_renew_caps(inode); |
2793 | if (ret == 0) | 2796 | if (ret == 0) |
2794 | continue; | 2797 | continue; |
2795 | } | 2798 | } |
@@ -2798,9 +2801,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2798 | 2801 | ||
2799 | if (ci->i_inline_version != CEPH_INLINE_NONE && | 2802 | if (ci->i_inline_version != CEPH_INLINE_NONE && |
2800 | (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && | 2803 | (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && |
2801 | i_size_read(&ci->vfs_inode) > 0) { | 2804 | i_size_read(inode) > 0) { |
2802 | struct page *page = | 2805 | struct page *page = |
2803 | find_get_page(ci->vfs_inode.i_mapping, 0); | 2806 | find_get_page(inode->i_mapping, 0); |
2804 | if (page) { | 2807 | if (page) { |
2805 | if (PageUptodate(page)) { | 2808 | if (PageUptodate(page)) { |
2806 | *pinned_page = page; | 2809 | *pinned_page = page; |
@@ -2819,7 +2822,7 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2819 | * getattr request will bring inline data into | 2822 | * getattr request will bring inline data into |
2820 | * page cache | 2823 | * page cache |
2821 | */ | 2824 | */ |
2822 | ret = __ceph_do_getattr(&ci->vfs_inode, NULL, | 2825 | ret = __ceph_do_getattr(inode, NULL, |
2823 | CEPH_STAT_CAP_INLINE_DATA, | 2826 | CEPH_STAT_CAP_INLINE_DATA, |
2824 | true); | 2827 | true); |
2825 | if (ret < 0) | 2828 | if (ret < 0) |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 2eb88ed22993..facb387c2735 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -294,7 +294,6 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) | |||
294 | 294 | ||
295 | void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) | 295 | void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
296 | { | 296 | { |
297 | return 0; | ||
298 | } | 297 | } |
299 | 298 | ||
300 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) | 299 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 15ff1b09cfa2..b6bfa94332c3 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -35,7 +35,7 @@ struct ceph_nfs_snapfh { | |||
35 | static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, | 35 | static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, |
36 | struct inode *parent_inode) | 36 | struct inode *parent_inode) |
37 | { | 37 | { |
38 | const static int snap_handle_length = | 38 | static const int snap_handle_length = |
39 | sizeof(struct ceph_nfs_snapfh) >> 2; | 39 | sizeof(struct ceph_nfs_snapfh) >> 2; |
40 | struct ceph_nfs_snapfh *sfh = (void *)rawfh; | 40 | struct ceph_nfs_snapfh *sfh = (void *)rawfh; |
41 | u64 snapid = ceph_snap(inode); | 41 | u64 snapid = ceph_snap(inode); |
@@ -85,9 +85,9 @@ out: | |||
85 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | 85 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, |
86 | struct inode *parent_inode) | 86 | struct inode *parent_inode) |
87 | { | 87 | { |
88 | const static int handle_length = | 88 | static const int handle_length = |
89 | sizeof(struct ceph_nfs_fh) >> 2; | 89 | sizeof(struct ceph_nfs_fh) >> 2; |
90 | const static int connected_handle_length = | 90 | static const int connected_handle_length = |
91 | sizeof(struct ceph_nfs_confh) >> 2; | 91 | sizeof(struct ceph_nfs_confh) >> 2; |
92 | int type; | 92 | int type; |
93 | 93 | ||
@@ -458,33 +458,33 @@ static int __get_snap_name(struct dentry *parent, char *name, | |||
458 | if (err < 0) | 458 | if (err < 0) |
459 | goto out; | 459 | goto out; |
460 | 460 | ||
461 | rinfo = &req->r_reply_info; | 461 | rinfo = &req->r_reply_info; |
462 | for (i = 0; i < rinfo->dir_nr; i++) { | 462 | for (i = 0; i < rinfo->dir_nr; i++) { |
463 | rde = rinfo->dir_entries + i; | 463 | rde = rinfo->dir_entries + i; |
464 | BUG_ON(!rde->inode.in); | 464 | BUG_ON(!rde->inode.in); |
465 | if (ceph_snap(inode) == | 465 | if (ceph_snap(inode) == |
466 | le64_to_cpu(rde->inode.in->snapid)) { | 466 | le64_to_cpu(rde->inode.in->snapid)) { |
467 | memcpy(name, rde->name, rde->name_len); | 467 | memcpy(name, rde->name, rde->name_len); |
468 | name[rde->name_len] = '\0'; | 468 | name[rde->name_len] = '\0'; |
469 | err = 0; | 469 | err = 0; |
470 | goto out; | 470 | goto out; |
471 | } | 471 | } |
472 | } | 472 | } |
473 | 473 | ||
474 | if (rinfo->dir_end) | 474 | if (rinfo->dir_end) |
475 | break; | 475 | break; |
476 | 476 | ||
477 | BUG_ON(rinfo->dir_nr <= 0); | 477 | BUG_ON(rinfo->dir_nr <= 0); |
478 | rde = rinfo->dir_entries + (rinfo->dir_nr - 1); | 478 | rde = rinfo->dir_entries + (rinfo->dir_nr - 1); |
479 | next_offset += rinfo->dir_nr; | 479 | next_offset += rinfo->dir_nr; |
480 | last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL); | 480 | last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL); |
481 | if (!last_name) { | 481 | if (!last_name) { |
482 | err = -ENOMEM; | 482 | err = -ENOMEM; |
483 | goto out; | 483 | goto out; |
484 | } | 484 | } |
485 | 485 | ||
486 | ceph_mdsc_put_request(req); | 486 | ceph_mdsc_put_request(req); |
487 | req = NULL; | 487 | req = NULL; |
488 | } | 488 | } |
489 | err = -ENOENT; | 489 | err = -ENOENT; |
490 | out: | 490 | out: |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 685a03cc4b77..d277f71abe0b 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include "super.h" | 15 | #include "super.h" |
16 | #include "mds_client.h" | 16 | #include "mds_client.h" |
17 | #include "cache.h" | 17 | #include "cache.h" |
18 | #include "io.h" | ||
18 | 19 | ||
19 | static __le32 ceph_flags_sys2wire(u32 flags) | 20 | static __le32 ceph_flags_sys2wire(u32 flags) |
20 | { | 21 | { |
@@ -201,6 +202,7 @@ out: | |||
201 | static int ceph_init_file_info(struct inode *inode, struct file *file, | 202 | static int ceph_init_file_info(struct inode *inode, struct file *file, |
202 | int fmode, bool isdir) | 203 | int fmode, bool isdir) |
203 | { | 204 | { |
205 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
204 | struct ceph_file_info *fi; | 206 | struct ceph_file_info *fi; |
205 | 207 | ||
206 | dout("%s %p %p 0%o (%s)\n", __func__, inode, file, | 208 | dout("%s %p %p 0%o (%s)\n", __func__, inode, file, |
@@ -211,7 +213,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, | |||
211 | struct ceph_dir_file_info *dfi = | 213 | struct ceph_dir_file_info *dfi = |
212 | kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL); | 214 | kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL); |
213 | if (!dfi) { | 215 | if (!dfi) { |
214 | ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ | 216 | ceph_put_fmode(ci, fmode); /* clean up */ |
215 | return -ENOMEM; | 217 | return -ENOMEM; |
216 | } | 218 | } |
217 | 219 | ||
@@ -222,7 +224,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, | |||
222 | } else { | 224 | } else { |
223 | fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); | 225 | fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); |
224 | if (!fi) { | 226 | if (!fi) { |
225 | ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ | 227 | ceph_put_fmode(ci, fmode); /* clean up */ |
226 | return -ENOMEM; | 228 | return -ENOMEM; |
227 | } | 229 | } |
228 | 230 | ||
@@ -232,6 +234,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, | |||
232 | fi->fmode = fmode; | 234 | fi->fmode = fmode; |
233 | spin_lock_init(&fi->rw_contexts_lock); | 235 | spin_lock_init(&fi->rw_contexts_lock); |
234 | INIT_LIST_HEAD(&fi->rw_contexts); | 236 | INIT_LIST_HEAD(&fi->rw_contexts); |
237 | fi->meta_err = errseq_sample(&ci->i_meta_err); | ||
238 | fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); | ||
235 | 239 | ||
236 | return 0; | 240 | return 0; |
237 | } | 241 | } |
@@ -695,7 +699,13 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, | |||
695 | ceph_release_page_vector(pages, num_pages); | 699 | ceph_release_page_vector(pages, num_pages); |
696 | } | 700 | } |
697 | 701 | ||
698 | if (ret <= 0 || off >= i_size || !more) | 702 | if (ret < 0) { |
703 | if (ret == -EBLACKLISTED) | ||
704 | fsc->blacklisted = true; | ||
705 | break; | ||
706 | } | ||
707 | |||
708 | if (off >= i_size || !more) | ||
699 | break; | 709 | break; |
700 | } | 710 | } |
701 | 711 | ||
@@ -921,7 +931,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, | |||
921 | struct ceph_aio_request *aio_req = NULL; | 931 | struct ceph_aio_request *aio_req = NULL; |
922 | int num_pages = 0; | 932 | int num_pages = 0; |
923 | int flags; | 933 | int flags; |
924 | int ret; | 934 | int ret = 0; |
925 | struct timespec64 mtime = current_time(inode); | 935 | struct timespec64 mtime = current_time(inode); |
926 | size_t count = iov_iter_count(iter); | 936 | size_t count = iov_iter_count(iter); |
927 | loff_t pos = iocb->ki_pos; | 937 | loff_t pos = iocb->ki_pos; |
@@ -935,11 +945,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, | |||
935 | (write ? "write" : "read"), file, pos, (unsigned)count, | 945 | (write ? "write" : "read"), file, pos, (unsigned)count, |
936 | snapc, snapc ? snapc->seq : 0); | 946 | snapc, snapc ? snapc->seq : 0); |
937 | 947 | ||
938 | ret = filemap_write_and_wait_range(inode->i_mapping, | ||
939 | pos, pos + count - 1); | ||
940 | if (ret < 0) | ||
941 | return ret; | ||
942 | |||
943 | if (write) { | 948 | if (write) { |
944 | int ret2 = invalidate_inode_pages2_range(inode->i_mapping, | 949 | int ret2 = invalidate_inode_pages2_range(inode->i_mapping, |
945 | pos >> PAGE_SHIFT, | 950 | pos >> PAGE_SHIFT, |
@@ -1260,7 +1265,8 @@ again: | |||
1260 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; | 1265 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; |
1261 | else | 1266 | else |
1262 | want = CEPH_CAP_FILE_CACHE; | 1267 | want = CEPH_CAP_FILE_CACHE; |
1263 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); | 1268 | ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, |
1269 | &got, &pinned_page); | ||
1264 | if (ret < 0) | 1270 | if (ret < 0) |
1265 | return ret; | 1271 | return ret; |
1266 | 1272 | ||
@@ -1274,12 +1280,16 @@ again: | |||
1274 | 1280 | ||
1275 | if (ci->i_inline_version == CEPH_INLINE_NONE) { | 1281 | if (ci->i_inline_version == CEPH_INLINE_NONE) { |
1276 | if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) { | 1282 | if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) { |
1283 | ceph_start_io_direct(inode); | ||
1277 | ret = ceph_direct_read_write(iocb, to, | 1284 | ret = ceph_direct_read_write(iocb, to, |
1278 | NULL, NULL); | 1285 | NULL, NULL); |
1286 | ceph_end_io_direct(inode); | ||
1279 | if (ret >= 0 && ret < len) | 1287 | if (ret >= 0 && ret < len) |
1280 | retry_op = CHECK_EOF; | 1288 | retry_op = CHECK_EOF; |
1281 | } else { | 1289 | } else { |
1290 | ceph_start_io_read(inode); | ||
1282 | ret = ceph_sync_read(iocb, to, &retry_op); | 1291 | ret = ceph_sync_read(iocb, to, &retry_op); |
1292 | ceph_end_io_read(inode); | ||
1283 | } | 1293 | } |
1284 | } else { | 1294 | } else { |
1285 | retry_op = READ_INLINE; | 1295 | retry_op = READ_INLINE; |
@@ -1290,7 +1300,9 @@ again: | |||
1290 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, | 1300 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
1291 | ceph_cap_string(got)); | 1301 | ceph_cap_string(got)); |
1292 | ceph_add_rw_context(fi, &rw_ctx); | 1302 | ceph_add_rw_context(fi, &rw_ctx); |
1303 | ceph_start_io_read(inode); | ||
1293 | ret = generic_file_read_iter(iocb, to); | 1304 | ret = generic_file_read_iter(iocb, to); |
1305 | ceph_end_io_read(inode); | ||
1294 | ceph_del_rw_context(fi, &rw_ctx); | 1306 | ceph_del_rw_context(fi, &rw_ctx); |
1295 | } | 1307 | } |
1296 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", | 1308 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", |
@@ -1399,7 +1411,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
1399 | return -ENOMEM; | 1411 | return -ENOMEM; |
1400 | 1412 | ||
1401 | retry_snap: | 1413 | retry_snap: |
1402 | inode_lock(inode); | 1414 | if (iocb->ki_flags & IOCB_DIRECT) |
1415 | ceph_start_io_direct(inode); | ||
1416 | else | ||
1417 | ceph_start_io_write(inode); | ||
1403 | 1418 | ||
1404 | /* We can write back this queue in page reclaim */ | 1419 | /* We can write back this queue in page reclaim */ |
1405 | current->backing_dev_info = inode_to_bdi(inode); | 1420 | current->backing_dev_info = inode_to_bdi(inode); |
@@ -1457,7 +1472,7 @@ retry_snap: | |||
1457 | else | 1472 | else |
1458 | want = CEPH_CAP_FILE_BUFFER; | 1473 | want = CEPH_CAP_FILE_BUFFER; |
1459 | got = 0; | 1474 | got = 0; |
1460 | err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count, | 1475 | err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, |
1461 | &got, NULL); | 1476 | &got, NULL); |
1462 | if (err < 0) | 1477 | if (err < 0) |
1463 | goto out; | 1478 | goto out; |
@@ -1470,7 +1485,6 @@ retry_snap: | |||
1470 | (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) { | 1485 | (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) { |
1471 | struct ceph_snap_context *snapc; | 1486 | struct ceph_snap_context *snapc; |
1472 | struct iov_iter data; | 1487 | struct iov_iter data; |
1473 | inode_unlock(inode); | ||
1474 | 1488 | ||
1475 | spin_lock(&ci->i_ceph_lock); | 1489 | spin_lock(&ci->i_ceph_lock); |
1476 | if (__ceph_have_pending_cap_snap(ci)) { | 1490 | if (__ceph_have_pending_cap_snap(ci)) { |
@@ -1487,11 +1501,14 @@ retry_snap: | |||
1487 | 1501 | ||
1488 | /* we might need to revert back to that point */ | 1502 | /* we might need to revert back to that point */ |
1489 | data = *from; | 1503 | data = *from; |
1490 | if (iocb->ki_flags & IOCB_DIRECT) | 1504 | if (iocb->ki_flags & IOCB_DIRECT) { |
1491 | written = ceph_direct_read_write(iocb, &data, snapc, | 1505 | written = ceph_direct_read_write(iocb, &data, snapc, |
1492 | &prealloc_cf); | 1506 | &prealloc_cf); |
1493 | else | 1507 | ceph_end_io_direct(inode); |
1508 | } else { | ||
1494 | written = ceph_sync_write(iocb, &data, pos, snapc); | 1509 | written = ceph_sync_write(iocb, &data, pos, snapc); |
1510 | ceph_end_io_write(inode); | ||
1511 | } | ||
1495 | if (written > 0) | 1512 | if (written > 0) |
1496 | iov_iter_advance(from, written); | 1513 | iov_iter_advance(from, written); |
1497 | ceph_put_snap_context(snapc); | 1514 | ceph_put_snap_context(snapc); |
@@ -1506,7 +1523,7 @@ retry_snap: | |||
1506 | written = generic_perform_write(file, from, pos); | 1523 | written = generic_perform_write(file, from, pos); |
1507 | if (likely(written >= 0)) | 1524 | if (likely(written >= 0)) |
1508 | iocb->ki_pos = pos + written; | 1525 | iocb->ki_pos = pos + written; |
1509 | inode_unlock(inode); | 1526 | ceph_end_io_write(inode); |
1510 | } | 1527 | } |
1511 | 1528 | ||
1512 | if (written >= 0) { | 1529 | if (written >= 0) { |
@@ -1541,9 +1558,11 @@ retry_snap: | |||
1541 | } | 1558 | } |
1542 | 1559 | ||
1543 | goto out_unlocked; | 1560 | goto out_unlocked; |
1544 | |||
1545 | out: | 1561 | out: |
1546 | inode_unlock(inode); | 1562 | if (iocb->ki_flags & IOCB_DIRECT) |
1563 | ceph_end_io_direct(inode); | ||
1564 | else | ||
1565 | ceph_end_io_write(inode); | ||
1547 | out_unlocked: | 1566 | out_unlocked: |
1548 | ceph_free_cap_flush(prealloc_cf); | 1567 | ceph_free_cap_flush(prealloc_cf); |
1549 | current->backing_dev_info = NULL; | 1568 | current->backing_dev_info = NULL; |
@@ -1781,7 +1800,7 @@ static long ceph_fallocate(struct file *file, int mode, | |||
1781 | else | 1800 | else |
1782 | want = CEPH_CAP_FILE_BUFFER; | 1801 | want = CEPH_CAP_FILE_BUFFER; |
1783 | 1802 | ||
1784 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL); | 1803 | ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got, NULL); |
1785 | if (ret < 0) | 1804 | if (ret < 0) |
1786 | goto unlock; | 1805 | goto unlock; |
1787 | 1806 | ||
@@ -1810,16 +1829,15 @@ unlock: | |||
1810 | * src_ci. Two attempts are made to obtain both caps, and an error is return if | 1829 | * src_ci. Two attempts are made to obtain both caps, and an error is return if |
1811 | * this fails; zero is returned on success. | 1830 | * this fails; zero is returned on success. |
1812 | */ | 1831 | */ |
1813 | static int get_rd_wr_caps(struct ceph_inode_info *src_ci, | 1832 | static int get_rd_wr_caps(struct file *src_filp, int *src_got, |
1814 | loff_t src_endoff, int *src_got, | 1833 | struct file *dst_filp, |
1815 | struct ceph_inode_info *dst_ci, | ||
1816 | loff_t dst_endoff, int *dst_got) | 1834 | loff_t dst_endoff, int *dst_got) |
1817 | { | 1835 | { |
1818 | int ret = 0; | 1836 | int ret = 0; |
1819 | bool retrying = false; | 1837 | bool retrying = false; |
1820 | 1838 | ||
1821 | retry_caps: | 1839 | retry_caps: |
1822 | ret = ceph_get_caps(dst_ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER, | 1840 | ret = ceph_get_caps(dst_filp, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER, |
1823 | dst_endoff, dst_got, NULL); | 1841 | dst_endoff, dst_got, NULL); |
1824 | if (ret < 0) | 1842 | if (ret < 0) |
1825 | return ret; | 1843 | return ret; |
@@ -1829,24 +1847,24 @@ retry_caps: | |||
1829 | * we would risk a deadlock by using ceph_get_caps. Thus, we'll do some | 1847 | * we would risk a deadlock by using ceph_get_caps. Thus, we'll do some |
1830 | * retry dance instead to try to get both capabilities. | 1848 | * retry dance instead to try to get both capabilities. |
1831 | */ | 1849 | */ |
1832 | ret = ceph_try_get_caps(src_ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_SHARED, | 1850 | ret = ceph_try_get_caps(file_inode(src_filp), |
1851 | CEPH_CAP_FILE_RD, CEPH_CAP_FILE_SHARED, | ||
1833 | false, src_got); | 1852 | false, src_got); |
1834 | if (ret <= 0) { | 1853 | if (ret <= 0) { |
1835 | /* Start by dropping dst_ci caps and getting src_ci caps */ | 1854 | /* Start by dropping dst_ci caps and getting src_ci caps */ |
1836 | ceph_put_cap_refs(dst_ci, *dst_got); | 1855 | ceph_put_cap_refs(ceph_inode(file_inode(dst_filp)), *dst_got); |
1837 | if (retrying) { | 1856 | if (retrying) { |
1838 | if (!ret) | 1857 | if (!ret) |
1839 | /* ceph_try_get_caps masks EAGAIN */ | 1858 | /* ceph_try_get_caps masks EAGAIN */ |
1840 | ret = -EAGAIN; | 1859 | ret = -EAGAIN; |
1841 | return ret; | 1860 | return ret; |
1842 | } | 1861 | } |
1843 | ret = ceph_get_caps(src_ci, CEPH_CAP_FILE_RD, | 1862 | ret = ceph_get_caps(src_filp, CEPH_CAP_FILE_RD, |
1844 | CEPH_CAP_FILE_SHARED, src_endoff, | 1863 | CEPH_CAP_FILE_SHARED, -1, src_got, NULL); |
1845 | src_got, NULL); | ||
1846 | if (ret < 0) | 1864 | if (ret < 0) |
1847 | return ret; | 1865 | return ret; |
1848 | /*... drop src_ci caps too, and retry */ | 1866 | /*... drop src_ci caps too, and retry */ |
1849 | ceph_put_cap_refs(src_ci, *src_got); | 1867 | ceph_put_cap_refs(ceph_inode(file_inode(src_filp)), *src_got); |
1850 | retrying = true; | 1868 | retrying = true; |
1851 | goto retry_caps; | 1869 | goto retry_caps; |
1852 | } | 1870 | } |
@@ -1904,6 +1922,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, | |||
1904 | struct ceph_inode_info *src_ci = ceph_inode(src_inode); | 1922 | struct ceph_inode_info *src_ci = ceph_inode(src_inode); |
1905 | struct ceph_inode_info *dst_ci = ceph_inode(dst_inode); | 1923 | struct ceph_inode_info *dst_ci = ceph_inode(dst_inode); |
1906 | struct ceph_cap_flush *prealloc_cf; | 1924 | struct ceph_cap_flush *prealloc_cf; |
1925 | struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode); | ||
1907 | struct ceph_object_locator src_oloc, dst_oloc; | 1926 | struct ceph_object_locator src_oloc, dst_oloc; |
1908 | struct ceph_object_id src_oid, dst_oid; | 1927 | struct ceph_object_id src_oid, dst_oid; |
1909 | loff_t endoff = 0, size; | 1928 | loff_t endoff = 0, size; |
@@ -1913,10 +1932,16 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, | |||
1913 | int src_got = 0, dst_got = 0, err, dirty; | 1932 | int src_got = 0, dst_got = 0, err, dirty; |
1914 | bool do_final_copy = false; | 1933 | bool do_final_copy = false; |
1915 | 1934 | ||
1916 | if (src_inode == dst_inode) | 1935 | if (src_inode->i_sb != dst_inode->i_sb) { |
1917 | return -EINVAL; | 1936 | struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode); |
1918 | if (src_inode->i_sb != dst_inode->i_sb) | 1937 | |
1919 | return -EXDEV; | 1938 | if (ceph_fsid_compare(&src_fsc->client->fsid, |
1939 | &dst_fsc->client->fsid)) { | ||
1940 | dout("Copying files across clusters: src: %pU dst: %pU\n", | ||
1941 | &src_fsc->client->fsid, &dst_fsc->client->fsid); | ||
1942 | return -EXDEV; | ||
1943 | } | ||
1944 | } | ||
1920 | if (ceph_snap(dst_inode) != CEPH_NOSNAP) | 1945 | if (ceph_snap(dst_inode) != CEPH_NOSNAP) |
1921 | return -EROFS; | 1946 | return -EROFS; |
1922 | 1947 | ||
@@ -1928,7 +1953,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, | |||
1928 | * efficient). | 1953 | * efficient). |
1929 | */ | 1954 | */ |
1930 | 1955 | ||
1931 | if (ceph_test_mount_opt(ceph_inode_to_client(src_inode), NOCOPYFROM)) | 1956 | if (ceph_test_mount_opt(src_fsc, NOCOPYFROM)) |
1932 | return -EOPNOTSUPP; | 1957 | return -EOPNOTSUPP; |
1933 | 1958 | ||
1934 | if ((src_ci->i_layout.stripe_unit != dst_ci->i_layout.stripe_unit) || | 1959 | if ((src_ci->i_layout.stripe_unit != dst_ci->i_layout.stripe_unit) || |
@@ -1960,8 +1985,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, | |||
1960 | * clients may have dirty data in their caches. And OSDs know nothing | 1985 | * clients may have dirty data in their caches. And OSDs know nothing |
1961 | * about caps, so they can't safely do the remote object copies. | 1986 | * about caps, so they can't safely do the remote object copies. |
1962 | */ | 1987 | */ |
1963 | err = get_rd_wr_caps(src_ci, (src_off + len), &src_got, | 1988 | err = get_rd_wr_caps(src_file, &src_got, |
1964 | dst_ci, (dst_off + len), &dst_got); | 1989 | dst_file, (dst_off + len), &dst_got); |
1965 | if (err < 0) { | 1990 | if (err < 0) { |
1966 | dout("get_rd_wr_caps returned %d\n", err); | 1991 | dout("get_rd_wr_caps returned %d\n", err); |
1967 | ret = -EOPNOTSUPP; | 1992 | ret = -EOPNOTSUPP; |
@@ -2018,9 +2043,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, | |||
2018 | goto out; | 2043 | goto out; |
2019 | } | 2044 | } |
2020 | len -= ret; | 2045 | len -= ret; |
2021 | err = get_rd_wr_caps(src_ci, (src_off + len), | 2046 | err = get_rd_wr_caps(src_file, &src_got, |
2022 | &src_got, dst_ci, | 2047 | dst_file, (dst_off + len), &dst_got); |
2023 | (dst_off + len), &dst_got); | ||
2024 | if (err < 0) | 2048 | if (err < 0) |
2025 | goto out; | 2049 | goto out; |
2026 | err = is_file_size_ok(src_inode, dst_inode, | 2050 | err = is_file_size_ok(src_inode, dst_inode, |
@@ -2044,7 +2068,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, | |||
2044 | dst_ci->i_vino.ino, dst_objnum); | 2068 | dst_ci->i_vino.ino, dst_objnum); |
2045 | /* Do an object remote copy */ | 2069 | /* Do an object remote copy */ |
2046 | err = ceph_osdc_copy_from( | 2070 | err = ceph_osdc_copy_from( |
2047 | &ceph_inode_to_client(src_inode)->client->osdc, | 2071 | &src_fsc->client->osdc, |
2048 | src_ci->i_vino.snap, 0, | 2072 | src_ci->i_vino.snap, 0, |
2049 | &src_oid, &src_oloc, | 2073 | &src_oid, &src_oloc, |
2050 | CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | | 2074 | CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 18500edefc56..9f135624ae47 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -515,6 +515,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
515 | 515 | ||
516 | ceph_fscache_inode_init(ci); | 516 | ceph_fscache_inode_init(ci); |
517 | 517 | ||
518 | ci->i_meta_err = 0; | ||
519 | |||
518 | return &ci->vfs_inode; | 520 | return &ci->vfs_inode; |
519 | } | 521 | } |
520 | 522 | ||
@@ -801,7 +803,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
801 | 803 | ||
802 | /* update inode */ | 804 | /* update inode */ |
803 | inode->i_rdev = le32_to_cpu(info->rdev); | 805 | inode->i_rdev = le32_to_cpu(info->rdev); |
804 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 806 | /* directories have fl_stripe_unit set to zero */ |
807 | if (le32_to_cpu(info->layout.fl_stripe_unit)) | ||
808 | inode->i_blkbits = | ||
809 | fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | ||
810 | else | ||
811 | inode->i_blkbits = CEPH_BLOCK_SHIFT; | ||
805 | 812 | ||
806 | __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); | 813 | __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); |
807 | 814 | ||
@@ -1982,7 +1989,7 @@ static const struct inode_operations ceph_symlink_iops = { | |||
1982 | int __ceph_setattr(struct inode *inode, struct iattr *attr) | 1989 | int __ceph_setattr(struct inode *inode, struct iattr *attr) |
1983 | { | 1990 | { |
1984 | struct ceph_inode_info *ci = ceph_inode(inode); | 1991 | struct ceph_inode_info *ci = ceph_inode(inode); |
1985 | const unsigned int ia_valid = attr->ia_valid; | 1992 | unsigned int ia_valid = attr->ia_valid; |
1986 | struct ceph_mds_request *req; | 1993 | struct ceph_mds_request *req; |
1987 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 1994 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1988 | struct ceph_cap_flush *prealloc_cf; | 1995 | struct ceph_cap_flush *prealloc_cf; |
@@ -2087,6 +2094,26 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) | |||
2087 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; | 2094 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; |
2088 | } | 2095 | } |
2089 | } | 2096 | } |
2097 | if (ia_valid & ATTR_SIZE) { | ||
2098 | dout("setattr %p size %lld -> %lld\n", inode, | ||
2099 | inode->i_size, attr->ia_size); | ||
2100 | if ((issued & CEPH_CAP_FILE_EXCL) && | ||
2101 | attr->ia_size > inode->i_size) { | ||
2102 | i_size_write(inode, attr->ia_size); | ||
2103 | inode->i_blocks = calc_inode_blocks(attr->ia_size); | ||
2104 | ci->i_reported_size = attr->ia_size; | ||
2105 | dirtied |= CEPH_CAP_FILE_EXCL; | ||
2106 | ia_valid |= ATTR_MTIME; | ||
2107 | } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || | ||
2108 | attr->ia_size != inode->i_size) { | ||
2109 | req->r_args.setattr.size = cpu_to_le64(attr->ia_size); | ||
2110 | req->r_args.setattr.old_size = | ||
2111 | cpu_to_le64(inode->i_size); | ||
2112 | mask |= CEPH_SETATTR_SIZE; | ||
2113 | release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | | ||
2114 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; | ||
2115 | } | ||
2116 | } | ||
2090 | if (ia_valid & ATTR_MTIME) { | 2117 | if (ia_valid & ATTR_MTIME) { |
2091 | dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, | 2118 | dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, |
2092 | inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, | 2119 | inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, |
@@ -2109,25 +2136,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) | |||
2109 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; | 2136 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; |
2110 | } | 2137 | } |
2111 | } | 2138 | } |
2112 | if (ia_valid & ATTR_SIZE) { | ||
2113 | dout("setattr %p size %lld -> %lld\n", inode, | ||
2114 | inode->i_size, attr->ia_size); | ||
2115 | if ((issued & CEPH_CAP_FILE_EXCL) && | ||
2116 | attr->ia_size > inode->i_size) { | ||
2117 | i_size_write(inode, attr->ia_size); | ||
2118 | inode->i_blocks = calc_inode_blocks(attr->ia_size); | ||
2119 | ci->i_reported_size = attr->ia_size; | ||
2120 | dirtied |= CEPH_CAP_FILE_EXCL; | ||
2121 | } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || | ||
2122 | attr->ia_size != inode->i_size) { | ||
2123 | req->r_args.setattr.size = cpu_to_le64(attr->ia_size); | ||
2124 | req->r_args.setattr.old_size = | ||
2125 | cpu_to_le64(inode->i_size); | ||
2126 | mask |= CEPH_SETATTR_SIZE; | ||
2127 | release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | | ||
2128 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; | ||
2129 | } | ||
2130 | } | ||
2131 | 2139 | ||
2132 | /* these do nothing */ | 2140 | /* these do nothing */ |
2133 | if (ia_valid & ATTR_CTIME) { | 2141 | if (ia_valid & ATTR_CTIME) { |
diff --git a/fs/ceph/io.c b/fs/ceph/io.c new file mode 100644 index 000000000000..97602ea92ff4 --- /dev/null +++ b/fs/ceph/io.c | |||
@@ -0,0 +1,163 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Copyright (c) 2016 Trond Myklebust | ||
4 | * Copyright (c) 2019 Jeff Layton | ||
5 | * | ||
6 | * I/O and data path helper functionality. | ||
7 | * | ||
8 | * Heavily borrowed from equivalent code in fs/nfs/io.c | ||
9 | */ | ||
10 | |||
11 | #include <linux/ceph/ceph_debug.h> | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/rwsem.h> | ||
16 | #include <linux/fs.h> | ||
17 | |||
18 | #include "super.h" | ||
19 | #include "io.h" | ||
20 | |||
21 | /* Call with exclusively locked inode->i_rwsem */ | ||
22 | static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) | ||
23 | { | ||
24 | lockdep_assert_held_write(&inode->i_rwsem); | ||
25 | |||
26 | if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) { | ||
27 | spin_lock(&ci->i_ceph_lock); | ||
28 | ci->i_ceph_flags &= ~CEPH_I_ODIRECT; | ||
29 | spin_unlock(&ci->i_ceph_lock); | ||
30 | inode_dio_wait(inode); | ||
31 | } | ||
32 | } | ||
33 | |||
34 | /** | ||
35 | * ceph_start_io_read - declare the file is being used for buffered reads | ||
36 | * @inode: file inode | ||
37 | * | ||
38 | * Declare that a buffered read operation is about to start, and ensure | ||
39 | * that we block all direct I/O. | ||
40 | * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset, | ||
41 | * and holds a shared lock on inode->i_rwsem to ensure that the flag | ||
42 | * cannot be changed. | ||
43 | * In practice, this means that buffered read operations are allowed to | ||
44 | * execute in parallel, thanks to the shared lock, whereas direct I/O | ||
45 | * operations need to wait to grab an exclusive lock in order to set | ||
46 | * CEPH_I_ODIRECT. | ||
47 | * Note that buffered writes and truncates both take a write lock on | ||
48 | * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. | ||
49 | */ | ||
50 | void | ||
51 | ceph_start_io_read(struct inode *inode) | ||
52 | { | ||
53 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
54 | |||
55 | /* Be an optimist! */ | ||
56 | down_read(&inode->i_rwsem); | ||
57 | if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) | ||
58 | return; | ||
59 | up_read(&inode->i_rwsem); | ||
60 | /* Slow path.... */ | ||
61 | down_write(&inode->i_rwsem); | ||
62 | ceph_block_o_direct(ci, inode); | ||
63 | downgrade_write(&inode->i_rwsem); | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * ceph_end_io_read - declare that the buffered read operation is done | ||
68 | * @inode: file inode | ||
69 | * | ||
70 | * Declare that a buffered read operation is done, and release the shared | ||
71 | * lock on inode->i_rwsem. | ||
72 | */ | ||
73 | void | ||
74 | ceph_end_io_read(struct inode *inode) | ||
75 | { | ||
76 | up_read(&inode->i_rwsem); | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * ceph_start_io_write - declare the file is being used for buffered writes | ||
81 | * @inode: file inode | ||
82 | * | ||
83 | * Declare that a buffered write operation is about to start, and ensure | ||
84 | * that we block all direct I/O. | ||
85 | */ | ||
86 | void | ||
87 | ceph_start_io_write(struct inode *inode) | ||
88 | { | ||
89 | down_write(&inode->i_rwsem); | ||
90 | ceph_block_o_direct(ceph_inode(inode), inode); | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * ceph_end_io_write - declare that the buffered write operation is done | ||
95 | * @inode: file inode | ||
96 | * | ||
97 | * Declare that a buffered write operation is done, and release the | ||
98 | * lock on inode->i_rwsem. | ||
99 | */ | ||
100 | void | ||
101 | ceph_end_io_write(struct inode *inode) | ||
102 | { | ||
103 | up_write(&inode->i_rwsem); | ||
104 | } | ||
105 | |||
106 | /* Call with exclusively locked inode->i_rwsem */ | ||
107 | static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) | ||
108 | { | ||
109 | lockdep_assert_held_write(&inode->i_rwsem); | ||
110 | |||
111 | if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) { | ||
112 | spin_lock(&ci->i_ceph_lock); | ||
113 | ci->i_ceph_flags |= CEPH_I_ODIRECT; | ||
114 | spin_unlock(&ci->i_ceph_lock); | ||
115 | /* FIXME: unmap_mapping_range? */ | ||
116 | filemap_write_and_wait(inode->i_mapping); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * ceph_end_io_direct - declare the file is being used for direct i/o | ||
122 | * @inode: file inode | ||
123 | * | ||
124 | * Declare that a direct I/O operation is about to start, and ensure | ||
125 | * that we block all buffered I/O. | ||
126 | * On exit, the function ensures that the CEPH_I_ODIRECT flag is set, | ||
127 | * and holds a shared lock on inode->i_rwsem to ensure that the flag | ||
128 | * cannot be changed. | ||
129 | * In practice, this means that direct I/O operations are allowed to | ||
130 | * execute in parallel, thanks to the shared lock, whereas buffered I/O | ||
131 | * operations need to wait to grab an exclusive lock in order to clear | ||
132 | * CEPH_I_ODIRECT. | ||
133 | * Note that buffered writes and truncates both take a write lock on | ||
134 | * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. | ||
135 | */ | ||
136 | void | ||
137 | ceph_start_io_direct(struct inode *inode) | ||
138 | { | ||
139 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
140 | |||
141 | /* Be an optimist! */ | ||
142 | down_read(&inode->i_rwsem); | ||
143 | if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) | ||
144 | return; | ||
145 | up_read(&inode->i_rwsem); | ||
146 | /* Slow path.... */ | ||
147 | down_write(&inode->i_rwsem); | ||
148 | ceph_block_buffered(ci, inode); | ||
149 | downgrade_write(&inode->i_rwsem); | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * ceph_end_io_direct - declare that the direct i/o operation is done | ||
154 | * @inode: file inode | ||
155 | * | ||
156 | * Declare that a direct I/O operation is done, and release the shared | ||
157 | * lock on inode->i_rwsem. | ||
158 | */ | ||
159 | void | ||
160 | ceph_end_io_direct(struct inode *inode) | ||
161 | { | ||
162 | up_read(&inode->i_rwsem); | ||
163 | } | ||
diff --git a/fs/ceph/io.h b/fs/ceph/io.h new file mode 100644 index 000000000000..fa594cd77348 --- /dev/null +++ b/fs/ceph/io.h | |||
@@ -0,0 +1,12 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _FS_CEPH_IO_H | ||
3 | #define _FS_CEPH_IO_H | ||
4 | |||
5 | void ceph_start_io_read(struct inode *inode); | ||
6 | void ceph_end_io_read(struct inode *inode); | ||
7 | void ceph_start_io_write(struct inode *inode); | ||
8 | void ceph_end_io_write(struct inode *inode); | ||
9 | void ceph_start_io_direct(struct inode *inode); | ||
10 | void ceph_end_io_direct(struct inode *inode); | ||
11 | |||
12 | #endif /* FS_CEPH_IO_H */ | ||
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 5083e238ad15..544e9e85b120 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -32,14 +32,18 @@ void __init ceph_flock_init(void) | |||
32 | 32 | ||
33 | static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) | 33 | static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) |
34 | { | 34 | { |
35 | struct inode *inode = file_inode(src->fl_file); | 35 | struct ceph_file_info *fi = dst->fl_file->private_data; |
36 | struct inode *inode = file_inode(dst->fl_file); | ||
36 | atomic_inc(&ceph_inode(inode)->i_filelock_ref); | 37 | atomic_inc(&ceph_inode(inode)->i_filelock_ref); |
38 | atomic_inc(&fi->num_locks); | ||
37 | } | 39 | } |
38 | 40 | ||
39 | static void ceph_fl_release_lock(struct file_lock *fl) | 41 | static void ceph_fl_release_lock(struct file_lock *fl) |
40 | { | 42 | { |
43 | struct ceph_file_info *fi = fl->fl_file->private_data; | ||
41 | struct inode *inode = file_inode(fl->fl_file); | 44 | struct inode *inode = file_inode(fl->fl_file); |
42 | struct ceph_inode_info *ci = ceph_inode(inode); | 45 | struct ceph_inode_info *ci = ceph_inode(inode); |
46 | atomic_dec(&fi->num_locks); | ||
43 | if (atomic_dec_and_test(&ci->i_filelock_ref)) { | 47 | if (atomic_dec_and_test(&ci->i_filelock_ref)) { |
44 | /* clear error when all locks are released */ | 48 | /* clear error when all locks are released */ |
45 | spin_lock(&ci->i_ceph_lock); | 49 | spin_lock(&ci->i_ceph_lock); |
@@ -73,7 +77,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, | |||
73 | * window. Caller function will decrease the counter. | 77 | * window. Caller function will decrease the counter. |
74 | */ | 78 | */ |
75 | fl->fl_ops = &ceph_fl_lock_ops; | 79 | fl->fl_ops = &ceph_fl_lock_ops; |
76 | atomic_inc(&ceph_inode(inode)->i_filelock_ref); | 80 | fl->fl_ops->fl_copy_lock(fl, NULL); |
77 | } | 81 | } |
78 | 82 | ||
79 | if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) | 83 | if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 920e9f048bd8..a8a8f84f3bbf 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -639,7 +639,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
639 | s->s_renew_seq = 0; | 639 | s->s_renew_seq = 0; |
640 | INIT_LIST_HEAD(&s->s_caps); | 640 | INIT_LIST_HEAD(&s->s_caps); |
641 | s->s_nr_caps = 0; | 641 | s->s_nr_caps = 0; |
642 | s->s_trim_caps = 0; | ||
643 | refcount_set(&s->s_ref, 1); | 642 | refcount_set(&s->s_ref, 1); |
644 | INIT_LIST_HEAD(&s->s_waiting); | 643 | INIT_LIST_HEAD(&s->s_waiting); |
645 | INIT_LIST_HEAD(&s->s_unsafe); | 644 | INIT_LIST_HEAD(&s->s_unsafe); |
@@ -1270,6 +1269,7 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, | |||
1270 | { | 1269 | { |
1271 | struct ceph_mds_request *req; | 1270 | struct ceph_mds_request *req; |
1272 | struct rb_node *p; | 1271 | struct rb_node *p; |
1272 | struct ceph_inode_info *ci; | ||
1273 | 1273 | ||
1274 | dout("cleanup_session_requests mds%d\n", session->s_mds); | 1274 | dout("cleanup_session_requests mds%d\n", session->s_mds); |
1275 | mutex_lock(&mdsc->mutex); | 1275 | mutex_lock(&mdsc->mutex); |
@@ -1278,6 +1278,16 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, | |||
1278 | struct ceph_mds_request, r_unsafe_item); | 1278 | struct ceph_mds_request, r_unsafe_item); |
1279 | pr_warn_ratelimited(" dropping unsafe request %llu\n", | 1279 | pr_warn_ratelimited(" dropping unsafe request %llu\n", |
1280 | req->r_tid); | 1280 | req->r_tid); |
1281 | if (req->r_target_inode) { | ||
1282 | /* dropping unsafe change of inode's attributes */ | ||
1283 | ci = ceph_inode(req->r_target_inode); | ||
1284 | errseq_set(&ci->i_meta_err, -EIO); | ||
1285 | } | ||
1286 | if (req->r_unsafe_dir) { | ||
1287 | /* dropping unsafe directory operation */ | ||
1288 | ci = ceph_inode(req->r_unsafe_dir); | ||
1289 | errseq_set(&ci->i_meta_err, -EIO); | ||
1290 | } | ||
1281 | __unregister_request(mdsc, req); | 1291 | __unregister_request(mdsc, req); |
1282 | } | 1292 | } |
1283 | /* zero r_attempts, so kick_requests() will re-send requests */ | 1293 | /* zero r_attempts, so kick_requests() will re-send requests */ |
@@ -1370,7 +1380,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1370 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg; | 1380 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg; |
1371 | struct ceph_inode_info *ci = ceph_inode(inode); | 1381 | struct ceph_inode_info *ci = ceph_inode(inode); |
1372 | LIST_HEAD(to_remove); | 1382 | LIST_HEAD(to_remove); |
1373 | bool drop = false; | 1383 | bool dirty_dropped = false; |
1374 | bool invalidate = false; | 1384 | bool invalidate = false; |
1375 | 1385 | ||
1376 | dout("removing cap %p, ci is %p, inode is %p\n", | 1386 | dout("removing cap %p, ci is %p, inode is %p\n", |
@@ -1383,9 +1393,12 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1383 | struct ceph_cap_flush *cf; | 1393 | struct ceph_cap_flush *cf; |
1384 | struct ceph_mds_client *mdsc = fsc->mdsc; | 1394 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1385 | 1395 | ||
1386 | if (ci->i_wrbuffer_ref > 0 && | 1396 | if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { |
1387 | READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) | 1397 | if (inode->i_data.nrpages > 0) |
1388 | invalidate = true; | 1398 | invalidate = true; |
1399 | if (ci->i_wrbuffer_ref > 0) | ||
1400 | mapping_set_error(&inode->i_data, -EIO); | ||
1401 | } | ||
1389 | 1402 | ||
1390 | while (!list_empty(&ci->i_cap_flush_list)) { | 1403 | while (!list_empty(&ci->i_cap_flush_list)) { |
1391 | cf = list_first_entry(&ci->i_cap_flush_list, | 1404 | cf = list_first_entry(&ci->i_cap_flush_list, |
@@ -1405,7 +1418,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1405 | inode, ceph_ino(inode)); | 1418 | inode, ceph_ino(inode)); |
1406 | ci->i_dirty_caps = 0; | 1419 | ci->i_dirty_caps = 0; |
1407 | list_del_init(&ci->i_dirty_item); | 1420 | list_del_init(&ci->i_dirty_item); |
1408 | drop = true; | 1421 | dirty_dropped = true; |
1409 | } | 1422 | } |
1410 | if (!list_empty(&ci->i_flushing_item)) { | 1423 | if (!list_empty(&ci->i_flushing_item)) { |
1411 | pr_warn_ratelimited( | 1424 | pr_warn_ratelimited( |
@@ -1415,10 +1428,22 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1415 | ci->i_flushing_caps = 0; | 1428 | ci->i_flushing_caps = 0; |
1416 | list_del_init(&ci->i_flushing_item); | 1429 | list_del_init(&ci->i_flushing_item); |
1417 | mdsc->num_cap_flushing--; | 1430 | mdsc->num_cap_flushing--; |
1418 | drop = true; | 1431 | dirty_dropped = true; |
1419 | } | 1432 | } |
1420 | spin_unlock(&mdsc->cap_dirty_lock); | 1433 | spin_unlock(&mdsc->cap_dirty_lock); |
1421 | 1434 | ||
1435 | if (dirty_dropped) { | ||
1436 | errseq_set(&ci->i_meta_err, -EIO); | ||
1437 | |||
1438 | if (ci->i_wrbuffer_ref_head == 0 && | ||
1439 | ci->i_wr_ref == 0 && | ||
1440 | ci->i_dirty_caps == 0 && | ||
1441 | ci->i_flushing_caps == 0) { | ||
1442 | ceph_put_snap_context(ci->i_head_snapc); | ||
1443 | ci->i_head_snapc = NULL; | ||
1444 | } | ||
1445 | } | ||
1446 | |||
1422 | if (atomic_read(&ci->i_filelock_ref) > 0) { | 1447 | if (atomic_read(&ci->i_filelock_ref) > 0) { |
1423 | /* make further file lock syscall return -EIO */ | 1448 | /* make further file lock syscall return -EIO */ |
1424 | ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; | 1449 | ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; |
@@ -1430,15 +1455,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1430 | list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); | 1455 | list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); |
1431 | ci->i_prealloc_cap_flush = NULL; | 1456 | ci->i_prealloc_cap_flush = NULL; |
1432 | } | 1457 | } |
1433 | |||
1434 | if (drop && | ||
1435 | ci->i_wrbuffer_ref_head == 0 && | ||
1436 | ci->i_wr_ref == 0 && | ||
1437 | ci->i_dirty_caps == 0 && | ||
1438 | ci->i_flushing_caps == 0) { | ||
1439 | ceph_put_snap_context(ci->i_head_snapc); | ||
1440 | ci->i_head_snapc = NULL; | ||
1441 | } | ||
1442 | } | 1458 | } |
1443 | spin_unlock(&ci->i_ceph_lock); | 1459 | spin_unlock(&ci->i_ceph_lock); |
1444 | while (!list_empty(&to_remove)) { | 1460 | while (!list_empty(&to_remove)) { |
@@ -1452,7 +1468,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1452 | wake_up_all(&ci->i_cap_wq); | 1468 | wake_up_all(&ci->i_cap_wq); |
1453 | if (invalidate) | 1469 | if (invalidate) |
1454 | ceph_queue_invalidate(inode); | 1470 | ceph_queue_invalidate(inode); |
1455 | if (drop) | 1471 | if (dirty_dropped) |
1456 | iput(inode); | 1472 | iput(inode); |
1457 | return 0; | 1473 | return 0; |
1458 | } | 1474 | } |
@@ -1705,11 +1721,11 @@ out: | |||
1705 | */ | 1721 | */ |
1706 | static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) | 1722 | static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) |
1707 | { | 1723 | { |
1708 | struct ceph_mds_session *session = arg; | 1724 | int *remaining = arg; |
1709 | struct ceph_inode_info *ci = ceph_inode(inode); | 1725 | struct ceph_inode_info *ci = ceph_inode(inode); |
1710 | int used, wanted, oissued, mine; | 1726 | int used, wanted, oissued, mine; |
1711 | 1727 | ||
1712 | if (session->s_trim_caps <= 0) | 1728 | if (*remaining <= 0) |
1713 | return -1; | 1729 | return -1; |
1714 | 1730 | ||
1715 | spin_lock(&ci->i_ceph_lock); | 1731 | spin_lock(&ci->i_ceph_lock); |
@@ -1746,7 +1762,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) | |||
1746 | if (oissued) { | 1762 | if (oissued) { |
1747 | /* we aren't the only cap.. just remove us */ | 1763 | /* we aren't the only cap.. just remove us */ |
1748 | __ceph_remove_cap(cap, true); | 1764 | __ceph_remove_cap(cap, true); |
1749 | session->s_trim_caps--; | 1765 | (*remaining)--; |
1750 | } else { | 1766 | } else { |
1751 | struct dentry *dentry; | 1767 | struct dentry *dentry; |
1752 | /* try dropping referring dentries */ | 1768 | /* try dropping referring dentries */ |
@@ -1758,7 +1774,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) | |||
1758 | d_prune_aliases(inode); | 1774 | d_prune_aliases(inode); |
1759 | count = atomic_read(&inode->i_count); | 1775 | count = atomic_read(&inode->i_count); |
1760 | if (count == 1) | 1776 | if (count == 1) |
1761 | session->s_trim_caps--; | 1777 | (*remaining)--; |
1762 | dout("trim_caps_cb %p cap %p pruned, count now %d\n", | 1778 | dout("trim_caps_cb %p cap %p pruned, count now %d\n", |
1763 | inode, cap, count); | 1779 | inode, cap, count); |
1764 | } else { | 1780 | } else { |
@@ -1784,12 +1800,12 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, | |||
1784 | dout("trim_caps mds%d start: %d / %d, trim %d\n", | 1800 | dout("trim_caps mds%d start: %d / %d, trim %d\n", |
1785 | session->s_mds, session->s_nr_caps, max_caps, trim_caps); | 1801 | session->s_mds, session->s_nr_caps, max_caps, trim_caps); |
1786 | if (trim_caps > 0) { | 1802 | if (trim_caps > 0) { |
1787 | session->s_trim_caps = trim_caps; | 1803 | int remaining = trim_caps; |
1788 | ceph_iterate_session_caps(session, trim_caps_cb, session); | 1804 | |
1805 | ceph_iterate_session_caps(session, trim_caps_cb, &remaining); | ||
1789 | dout("trim_caps mds%d done: %d / %d, trimmed %d\n", | 1806 | dout("trim_caps mds%d done: %d / %d, trimmed %d\n", |
1790 | session->s_mds, session->s_nr_caps, max_caps, | 1807 | session->s_mds, session->s_nr_caps, max_caps, |
1791 | trim_caps - session->s_trim_caps); | 1808 | trim_caps - remaining); |
1792 | session->s_trim_caps = 0; | ||
1793 | } | 1809 | } |
1794 | 1810 | ||
1795 | ceph_flush_cap_releases(mdsc, session); | 1811 | ceph_flush_cap_releases(mdsc, session); |
@@ -3015,18 +3031,23 @@ bad: | |||
3015 | pr_err("mdsc_handle_forward decode error err=%d\n", err); | 3031 | pr_err("mdsc_handle_forward decode error err=%d\n", err); |
3016 | } | 3032 | } |
3017 | 3033 | ||
3018 | static int __decode_and_drop_session_metadata(void **p, void *end) | 3034 | static int __decode_session_metadata(void **p, void *end, |
3035 | bool *blacklisted) | ||
3019 | { | 3036 | { |
3020 | /* map<string,string> */ | 3037 | /* map<string,string> */ |
3021 | u32 n; | 3038 | u32 n; |
3039 | bool err_str; | ||
3022 | ceph_decode_32_safe(p, end, n, bad); | 3040 | ceph_decode_32_safe(p, end, n, bad); |
3023 | while (n-- > 0) { | 3041 | while (n-- > 0) { |
3024 | u32 len; | 3042 | u32 len; |
3025 | ceph_decode_32_safe(p, end, len, bad); | 3043 | ceph_decode_32_safe(p, end, len, bad); |
3026 | ceph_decode_need(p, end, len, bad); | 3044 | ceph_decode_need(p, end, len, bad); |
3045 | err_str = !strncmp(*p, "error_string", len); | ||
3027 | *p += len; | 3046 | *p += len; |
3028 | ceph_decode_32_safe(p, end, len, bad); | 3047 | ceph_decode_32_safe(p, end, len, bad); |
3029 | ceph_decode_need(p, end, len, bad); | 3048 | ceph_decode_need(p, end, len, bad); |
3049 | if (err_str && strnstr(*p, "blacklisted", len)) | ||
3050 | *blacklisted = true; | ||
3030 | *p += len; | 3051 | *p += len; |
3031 | } | 3052 | } |
3032 | return 0; | 3053 | return 0; |
@@ -3050,6 +3071,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
3050 | u64 seq; | 3071 | u64 seq; |
3051 | unsigned long features = 0; | 3072 | unsigned long features = 0; |
3052 | int wake = 0; | 3073 | int wake = 0; |
3074 | bool blacklisted = false; | ||
3053 | 3075 | ||
3054 | /* decode */ | 3076 | /* decode */ |
3055 | ceph_decode_need(&p, end, sizeof(*h), bad); | 3077 | ceph_decode_need(&p, end, sizeof(*h), bad); |
@@ -3062,7 +3084,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
3062 | if (msg_version >= 3) { | 3084 | if (msg_version >= 3) { |
3063 | u32 len; | 3085 | u32 len; |
3064 | /* version >= 2, metadata */ | 3086 | /* version >= 2, metadata */ |
3065 | if (__decode_and_drop_session_metadata(&p, end) < 0) | 3087 | if (__decode_session_metadata(&p, end, &blacklisted) < 0) |
3066 | goto bad; | 3088 | goto bad; |
3067 | /* version >= 3, feature bits */ | 3089 | /* version >= 3, feature bits */ |
3068 | ceph_decode_32_safe(&p, end, len, bad); | 3090 | ceph_decode_32_safe(&p, end, len, bad); |
@@ -3149,6 +3171,8 @@ static void handle_session(struct ceph_mds_session *session, | |||
3149 | session->s_state = CEPH_MDS_SESSION_REJECTED; | 3171 | session->s_state = CEPH_MDS_SESSION_REJECTED; |
3150 | cleanup_session_requests(mdsc, session); | 3172 | cleanup_session_requests(mdsc, session); |
3151 | remove_session_caps(session); | 3173 | remove_session_caps(session); |
3174 | if (blacklisted) | ||
3175 | mdsc->fsc->blacklisted = true; | ||
3152 | wake = 2; /* for good measure */ | 3176 | wake = 2; /* for good measure */ |
3153 | break; | 3177 | break; |
3154 | 3178 | ||
@@ -3998,7 +4022,27 @@ static void lock_unlock_sessions(struct ceph_mds_client *mdsc) | |||
3998 | mutex_unlock(&mdsc->mutex); | 4022 | mutex_unlock(&mdsc->mutex); |
3999 | } | 4023 | } |
4000 | 4024 | ||
4025 | static void maybe_recover_session(struct ceph_mds_client *mdsc) | ||
4026 | { | ||
4027 | struct ceph_fs_client *fsc = mdsc->fsc; | ||
4028 | |||
4029 | if (!ceph_test_mount_opt(fsc, CLEANRECOVER)) | ||
4030 | return; | ||
4031 | |||
4032 | if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED) | ||
4033 | return; | ||
4034 | |||
4035 | if (!READ_ONCE(fsc->blacklisted)) | ||
4036 | return; | ||
4037 | |||
4038 | if (fsc->last_auto_reconnect && | ||
4039 | time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30)) | ||
4040 | return; | ||
4001 | 4041 | ||
4042 | pr_info("auto reconnect after blacklisted\n"); | ||
4043 | fsc->last_auto_reconnect = jiffies; | ||
4044 | ceph_force_reconnect(fsc->sb); | ||
4045 | } | ||
4002 | 4046 | ||
4003 | /* | 4047 | /* |
4004 | * delayed work -- periodically trim expired leases, renew caps with mds | 4048 | * delayed work -- periodically trim expired leases, renew caps with mds |
@@ -4044,7 +4088,9 @@ static void delayed_work(struct work_struct *work) | |||
4044 | pr_info("mds%d hung\n", s->s_mds); | 4088 | pr_info("mds%d hung\n", s->s_mds); |
4045 | } | 4089 | } |
4046 | } | 4090 | } |
4047 | if (s->s_state < CEPH_MDS_SESSION_OPEN) { | 4091 | if (s->s_state == CEPH_MDS_SESSION_NEW || |
4092 | s->s_state == CEPH_MDS_SESSION_RESTARTING || | ||
4093 | s->s_state == CEPH_MDS_SESSION_REJECTED) { | ||
4048 | /* this mds is failed or recovering, just wait */ | 4094 | /* this mds is failed or recovering, just wait */ |
4049 | ceph_put_mds_session(s); | 4095 | ceph_put_mds_session(s); |
4050 | continue; | 4096 | continue; |
@@ -4072,6 +4118,8 @@ static void delayed_work(struct work_struct *work) | |||
4072 | 4118 | ||
4073 | ceph_trim_snapid_map(mdsc); | 4119 | ceph_trim_snapid_map(mdsc); |
4074 | 4120 | ||
4121 | maybe_recover_session(mdsc); | ||
4122 | |||
4075 | schedule_delayed(mdsc); | 4123 | schedule_delayed(mdsc); |
4076 | } | 4124 | } |
4077 | 4125 | ||
@@ -4355,7 +4403,12 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) | |||
4355 | session = __ceph_lookup_mds_session(mdsc, mds); | 4403 | session = __ceph_lookup_mds_session(mdsc, mds); |
4356 | if (!session) | 4404 | if (!session) |
4357 | continue; | 4405 | continue; |
4406 | |||
4407 | if (session->s_state == CEPH_MDS_SESSION_REJECTED) | ||
4408 | __unregister_session(mdsc, session); | ||
4409 | __wake_requests(mdsc, &session->s_waiting); | ||
4358 | mutex_unlock(&mdsc->mutex); | 4410 | mutex_unlock(&mdsc->mutex); |
4411 | |||
4359 | mutex_lock(&session->s_mutex); | 4412 | mutex_lock(&session->s_mutex); |
4360 | __close_session(mdsc, session); | 4413 | __close_session(mdsc, session); |
4361 | if (session->s_state == CEPH_MDS_SESSION_CLOSING) { | 4414 | if (session->s_state == CEPH_MDS_SESSION_CLOSING) { |
@@ -4364,6 +4417,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) | |||
4364 | } | 4417 | } |
4365 | mutex_unlock(&session->s_mutex); | 4418 | mutex_unlock(&session->s_mutex); |
4366 | ceph_put_mds_session(session); | 4419 | ceph_put_mds_session(session); |
4420 | |||
4367 | mutex_lock(&mdsc->mutex); | 4421 | mutex_lock(&mdsc->mutex); |
4368 | kick_requests(mdsc, mds); | 4422 | kick_requests(mdsc, mds); |
4369 | } | 4423 | } |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index f7c8603484fe..5cd131b41d84 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -148,9 +148,9 @@ enum { | |||
148 | CEPH_MDS_SESSION_OPENING = 2, | 148 | CEPH_MDS_SESSION_OPENING = 2, |
149 | CEPH_MDS_SESSION_OPEN = 3, | 149 | CEPH_MDS_SESSION_OPEN = 3, |
150 | CEPH_MDS_SESSION_HUNG = 4, | 150 | CEPH_MDS_SESSION_HUNG = 4, |
151 | CEPH_MDS_SESSION_CLOSING = 5, | 151 | CEPH_MDS_SESSION_RESTARTING = 5, |
152 | CEPH_MDS_SESSION_RESTARTING = 6, | 152 | CEPH_MDS_SESSION_RECONNECTING = 6, |
153 | CEPH_MDS_SESSION_RECONNECTING = 7, | 153 | CEPH_MDS_SESSION_CLOSING = 7, |
154 | CEPH_MDS_SESSION_REJECTED = 8, | 154 | CEPH_MDS_SESSION_REJECTED = 8, |
155 | }; | 155 | }; |
156 | 156 | ||
@@ -176,7 +176,7 @@ struct ceph_mds_session { | |||
176 | spinlock_t s_cap_lock; | 176 | spinlock_t s_cap_lock; |
177 | struct list_head s_caps; /* all caps issued by this session */ | 177 | struct list_head s_caps; /* all caps issued by this session */ |
178 | struct ceph_cap *s_cap_iterator; | 178 | struct ceph_cap *s_cap_iterator; |
179 | int s_nr_caps, s_trim_caps; | 179 | int s_nr_caps; |
180 | int s_num_cap_releases; | 180 | int s_num_cap_releases; |
181 | int s_cap_reconnect; | 181 | int s_cap_reconnect; |
182 | int s_readonly; | 182 | int s_readonly; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 377fafc76f20..edfd643a8205 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -143,6 +143,7 @@ enum { | |||
143 | Opt_snapdirname, | 143 | Opt_snapdirname, |
144 | Opt_mds_namespace, | 144 | Opt_mds_namespace, |
145 | Opt_fscache_uniq, | 145 | Opt_fscache_uniq, |
146 | Opt_recover_session, | ||
146 | Opt_last_string, | 147 | Opt_last_string, |
147 | /* string args above */ | 148 | /* string args above */ |
148 | Opt_dirstat, | 149 | Opt_dirstat, |
@@ -184,6 +185,7 @@ static match_table_t fsopt_tokens = { | |||
184 | /* int args above */ | 185 | /* int args above */ |
185 | {Opt_snapdirname, "snapdirname=%s"}, | 186 | {Opt_snapdirname, "snapdirname=%s"}, |
186 | {Opt_mds_namespace, "mds_namespace=%s"}, | 187 | {Opt_mds_namespace, "mds_namespace=%s"}, |
188 | {Opt_recover_session, "recover_session=%s"}, | ||
187 | {Opt_fscache_uniq, "fsc=%s"}, | 189 | {Opt_fscache_uniq, "fsc=%s"}, |
188 | /* string args above */ | 190 | /* string args above */ |
189 | {Opt_dirstat, "dirstat"}, | 191 | {Opt_dirstat, "dirstat"}, |
@@ -254,6 +256,17 @@ static int parse_fsopt_token(char *c, void *private) | |||
254 | if (!fsopt->mds_namespace) | 256 | if (!fsopt->mds_namespace) |
255 | return -ENOMEM; | 257 | return -ENOMEM; |
256 | break; | 258 | break; |
259 | case Opt_recover_session: | ||
260 | if (!strncmp(argstr[0].from, "no", | ||
261 | argstr[0].to - argstr[0].from)) { | ||
262 | fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; | ||
263 | } else if (!strncmp(argstr[0].from, "clean", | ||
264 | argstr[0].to - argstr[0].from)) { | ||
265 | fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; | ||
266 | } else { | ||
267 | return -EINVAL; | ||
268 | } | ||
269 | break; | ||
257 | case Opt_fscache_uniq: | 270 | case Opt_fscache_uniq: |
258 | kfree(fsopt->fscache_uniq); | 271 | kfree(fsopt->fscache_uniq); |
259 | fsopt->fscache_uniq = kstrndup(argstr[0].from, | 272 | fsopt->fscache_uniq = kstrndup(argstr[0].from, |
@@ -576,6 +589,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
576 | 589 | ||
577 | if (fsopt->mds_namespace) | 590 | if (fsopt->mds_namespace) |
578 | seq_show_option(m, "mds_namespace", fsopt->mds_namespace); | 591 | seq_show_option(m, "mds_namespace", fsopt->mds_namespace); |
592 | |||
593 | if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) | ||
594 | seq_show_option(m, "recover_session", "clean"); | ||
595 | |||
579 | if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) | 596 | if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) |
580 | seq_printf(m, ",wsize=%d", fsopt->wsize); | 597 | seq_printf(m, ",wsize=%d", fsopt->wsize); |
581 | if (fsopt->rsize != CEPH_MAX_READ_SIZE) | 598 | if (fsopt->rsize != CEPH_MAX_READ_SIZE) |
@@ -664,6 +681,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | |||
664 | 681 | ||
665 | fsc->sb = NULL; | 682 | fsc->sb = NULL; |
666 | fsc->mount_state = CEPH_MOUNT_MOUNTING; | 683 | fsc->mount_state = CEPH_MOUNT_MOUNTING; |
684 | fsc->filp_gen = 1; | ||
667 | 685 | ||
668 | atomic_long_set(&fsc->writeback_count, 0); | 686 | atomic_long_set(&fsc->writeback_count, 0); |
669 | 687 | ||
@@ -713,6 +731,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) | |||
713 | { | 731 | { |
714 | dout("destroy_fs_client %p\n", fsc); | 732 | dout("destroy_fs_client %p\n", fsc); |
715 | 733 | ||
734 | ceph_mdsc_destroy(fsc); | ||
716 | destroy_workqueue(fsc->inode_wq); | 735 | destroy_workqueue(fsc->inode_wq); |
717 | destroy_workqueue(fsc->cap_wq); | 736 | destroy_workqueue(fsc->cap_wq); |
718 | 737 | ||
@@ -829,7 +848,7 @@ static void ceph_umount_begin(struct super_block *sb) | |||
829 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | 848 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; |
830 | ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); | 849 | ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); |
831 | ceph_mdsc_force_umount(fsc->mdsc); | 850 | ceph_mdsc_force_umount(fsc->mdsc); |
832 | return; | 851 | fsc->filp_gen++; // invalidate open files |
833 | } | 852 | } |
834 | 853 | ||
835 | static int ceph_remount(struct super_block *sb, int *flags, char *data) | 854 | static int ceph_remount(struct super_block *sb, int *flags, char *data) |
@@ -1089,7 +1108,6 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, | |||
1089 | } | 1108 | } |
1090 | 1109 | ||
1091 | if (ceph_sb_to_client(sb) != fsc) { | 1110 | if (ceph_sb_to_client(sb) != fsc) { |
1092 | ceph_mdsc_destroy(fsc); | ||
1093 | destroy_fs_client(fsc); | 1111 | destroy_fs_client(fsc); |
1094 | fsc = ceph_sb_to_client(sb); | 1112 | fsc = ceph_sb_to_client(sb); |
1095 | dout("get_sb got existing client %p\n", fsc); | 1113 | dout("get_sb got existing client %p\n", fsc); |
@@ -1115,7 +1133,6 @@ out_splat: | |||
1115 | goto out_final; | 1133 | goto out_final; |
1116 | 1134 | ||
1117 | out: | 1135 | out: |
1118 | ceph_mdsc_destroy(fsc); | ||
1119 | destroy_fs_client(fsc); | 1136 | destroy_fs_client(fsc); |
1120 | out_final: | 1137 | out_final: |
1121 | dout("ceph_mount fail %ld\n", PTR_ERR(res)); | 1138 | dout("ceph_mount fail %ld\n", PTR_ERR(res)); |
@@ -1139,8 +1156,6 @@ static void ceph_kill_sb(struct super_block *s) | |||
1139 | 1156 | ||
1140 | ceph_fscache_unregister_fs(fsc); | 1157 | ceph_fscache_unregister_fs(fsc); |
1141 | 1158 | ||
1142 | ceph_mdsc_destroy(fsc); | ||
1143 | |||
1144 | destroy_fs_client(fsc); | 1159 | destroy_fs_client(fsc); |
1145 | free_anon_bdev(dev); | 1160 | free_anon_bdev(dev); |
1146 | } | 1161 | } |
@@ -1154,6 +1169,33 @@ static struct file_system_type ceph_fs_type = { | |||
1154 | }; | 1169 | }; |
1155 | MODULE_ALIAS_FS("ceph"); | 1170 | MODULE_ALIAS_FS("ceph"); |
1156 | 1171 | ||
1172 | int ceph_force_reconnect(struct super_block *sb) | ||
1173 | { | ||
1174 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); | ||
1175 | int err = 0; | ||
1176 | |||
1177 | ceph_umount_begin(sb); | ||
1178 | |||
1179 | /* Make sure all page caches get invalidated. | ||
1180 | * see remove_session_caps_cb() */ | ||
1181 | flush_workqueue(fsc->inode_wq); | ||
1182 | |||
1183 | /* In case that we were blacklisted. This also reset | ||
1184 | * all mon/osd connections */ | ||
1185 | ceph_reset_client_addr(fsc->client); | ||
1186 | |||
1187 | ceph_osdc_clear_abort_err(&fsc->client->osdc); | ||
1188 | |||
1189 | fsc->blacklisted = false; | ||
1190 | fsc->mount_state = CEPH_MOUNT_MOUNTED; | ||
1191 | |||
1192 | if (sb->s_root) { | ||
1193 | err = __ceph_do_getattr(d_inode(sb->s_root), NULL, | ||
1194 | CEPH_STAT_CAP_INODE, true); | ||
1195 | } | ||
1196 | return err; | ||
1197 | } | ||
1198 | |||
1157 | static int __init init_ceph(void) | 1199 | static int __init init_ceph(void) |
1158 | { | 1200 | { |
1159 | int ret = init_caches(); | 1201 | int ret = init_caches(); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6b9f1ee7de85..f98d9247f9cb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/posix_acl.h> | 17 | #include <linux/posix_acl.h> |
18 | #include <linux/refcount.h> | 18 | #include <linux/refcount.h> |
19 | #include <linux/security.h> | ||
19 | 20 | ||
20 | #include <linux/ceph/libceph.h> | 21 | #include <linux/ceph/libceph.h> |
21 | 22 | ||
@@ -31,6 +32,7 @@ | |||
31 | #define CEPH_BLOCK_SHIFT 22 /* 4 MB */ | 32 | #define CEPH_BLOCK_SHIFT 22 /* 4 MB */ |
32 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 33 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
33 | 34 | ||
35 | #define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blacklisted */ | ||
34 | #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ | 36 | #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ |
35 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ | 37 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ |
36 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ | 38 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ |
@@ -101,6 +103,11 @@ struct ceph_fs_client { | |||
101 | struct ceph_client *client; | 103 | struct ceph_client *client; |
102 | 104 | ||
103 | unsigned long mount_state; | 105 | unsigned long mount_state; |
106 | |||
107 | unsigned long last_auto_reconnect; | ||
108 | bool blacklisted; | ||
109 | |||
110 | u32 filp_gen; | ||
104 | loff_t max_file_size; | 111 | loff_t max_file_size; |
105 | 112 | ||
106 | struct ceph_mds_client *mdsc; | 113 | struct ceph_mds_client *mdsc; |
@@ -395,6 +402,8 @@ struct ceph_inode_info { | |||
395 | struct fscache_cookie *fscache; | 402 | struct fscache_cookie *fscache; |
396 | u32 i_fscache_gen; | 403 | u32 i_fscache_gen; |
397 | #endif | 404 | #endif |
405 | errseq_t i_meta_err; | ||
406 | |||
398 | struct inode vfs_inode; /* at end */ | 407 | struct inode vfs_inode; /* at end */ |
399 | }; | 408 | }; |
400 | 409 | ||
@@ -499,17 +508,16 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, | |||
499 | #define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ | 508 | #define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ |
500 | #define CEPH_I_NODELAY (1 << 1) /* do not delay cap release */ | 509 | #define CEPH_I_NODELAY (1 << 1) /* do not delay cap release */ |
501 | #define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ | 510 | #define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ |
502 | #define CEPH_I_NOFLUSH (1 << 3) /* do not flush dirty caps */ | 511 | #define CEPH_I_POOL_PERM (1 << 3) /* pool rd/wr bits are valid */ |
503 | #define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */ | 512 | #define CEPH_I_POOL_RD (1 << 4) /* can read from pool */ |
504 | #define CEPH_I_POOL_RD (1 << 5) /* can read from pool */ | 513 | #define CEPH_I_POOL_WR (1 << 5) /* can write to pool */ |
505 | #define CEPH_I_POOL_WR (1 << 6) /* can write to pool */ | 514 | #define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ |
506 | #define CEPH_I_SEC_INITED (1 << 7) /* security initialized */ | 515 | #define CEPH_I_CAP_DROPPED (1 << 7) /* caps were forcibly dropped */ |
507 | #define CEPH_I_CAP_DROPPED (1 << 8) /* caps were forcibly dropped */ | 516 | #define CEPH_I_KICK_FLUSH (1 << 8) /* kick flushing caps */ |
508 | #define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */ | 517 | #define CEPH_I_FLUSH_SNAPS (1 << 9) /* need flush snapss */ |
509 | #define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */ | 518 | #define CEPH_I_ERROR_WRITE (1 << 10) /* have seen write errors */ |
510 | #define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */ | 519 | #define CEPH_I_ERROR_FILELOCK (1 << 11) /* have seen file lock errors */ |
511 | #define CEPH_I_ERROR_FILELOCK (1 << 12) /* have seen file lock errors */ | 520 | #define CEPH_I_ODIRECT (1 << 12) /* inode in direct I/O mode */ |
512 | |||
513 | 521 | ||
514 | /* | 522 | /* |
515 | * Masks of ceph inode work. | 523 | * Masks of ceph inode work. |
@@ -703,6 +711,10 @@ struct ceph_file_info { | |||
703 | 711 | ||
704 | spinlock_t rw_contexts_lock; | 712 | spinlock_t rw_contexts_lock; |
705 | struct list_head rw_contexts; | 713 | struct list_head rw_contexts; |
714 | |||
715 | errseq_t meta_err; | ||
716 | u32 filp_gen; | ||
717 | atomic_t num_locks; | ||
706 | }; | 718 | }; |
707 | 719 | ||
708 | struct ceph_dir_file_info { | 720 | struct ceph_dir_file_info { |
@@ -842,7 +854,8 @@ static inline int default_congestion_kb(void) | |||
842 | } | 854 | } |
843 | 855 | ||
844 | 856 | ||
845 | 857 | /* super.c */ | |
858 | extern int ceph_force_reconnect(struct super_block *sb); | ||
846 | /* snap.c */ | 859 | /* snap.c */ |
847 | struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, | 860 | struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, |
848 | u64 ino); | 861 | u64 ino); |
@@ -959,7 +972,10 @@ static inline bool ceph_security_xattr_wanted(struct inode *in) | |||
959 | #ifdef CONFIG_CEPH_FS_SECURITY_LABEL | 972 | #ifdef CONFIG_CEPH_FS_SECURITY_LABEL |
960 | extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, | 973 | extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, |
961 | struct ceph_acl_sec_ctx *ctx); | 974 | struct ceph_acl_sec_ctx *ctx); |
962 | extern void ceph_security_invalidate_secctx(struct inode *inode); | 975 | static inline void ceph_security_invalidate_secctx(struct inode *inode) |
976 | { | ||
977 | security_inode_invalidate_secctx(inode); | ||
978 | } | ||
963 | #else | 979 | #else |
964 | static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, | 980 | static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, |
965 | struct ceph_acl_sec_ctx *ctx) | 981 | struct ceph_acl_sec_ctx *ctx) |
@@ -1039,7 +1055,6 @@ extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | |||
1039 | struct ceph_mds_session *session); | 1055 | struct ceph_mds_session *session); |
1040 | extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, | 1056 | extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, |
1041 | int mds); | 1057 | int mds); |
1042 | extern int ceph_get_cap_mds(struct inode *inode); | ||
1043 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); | 1058 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); |
1044 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | 1059 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); |
1045 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | 1060 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, |
@@ -1058,9 +1073,9 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn, | |||
1058 | struct inode *dir, | 1073 | struct inode *dir, |
1059 | int mds, int drop, int unless); | 1074 | int mds, int drop, int unless); |
1060 | 1075 | ||
1061 | extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | 1076 | extern int ceph_get_caps(struct file *filp, int need, int want, |
1062 | loff_t endoff, int *got, struct page **pinned_page); | 1077 | loff_t endoff, int *got, struct page **pinned_page); |
1063 | extern int ceph_try_get_caps(struct ceph_inode_info *ci, | 1078 | extern int ceph_try_get_caps(struct inode *inode, |
1064 | int need, int want, bool nonblock, int *got); | 1079 | int need, int want, bool nonblock, int *got); |
1065 | 1080 | ||
1066 | /* for counting open files by mode */ | 1081 | /* for counting open files by mode */ |
@@ -1071,7 +1086,7 @@ extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode); | |||
1071 | extern const struct address_space_operations ceph_aops; | 1086 | extern const struct address_space_operations ceph_aops; |
1072 | extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | 1087 | extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); |
1073 | extern int ceph_uninline_data(struct file *filp, struct page *locked_page); | 1088 | extern int ceph_uninline_data(struct file *filp, struct page *locked_page); |
1074 | extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need); | 1089 | extern int ceph_pool_perm_check(struct inode *inode, int need); |
1075 | extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); | 1090 | extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); |
1076 | 1091 | ||
1077 | /* file.c */ | 1092 | /* file.c */ |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 939eab7aa219..cb18ee637cb7 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -20,7 +20,8 @@ static int __remove_xattr(struct ceph_inode_info *ci, | |||
20 | 20 | ||
21 | static bool ceph_is_valid_xattr(const char *name) | 21 | static bool ceph_is_valid_xattr(const char *name) |
22 | { | 22 | { |
23 | return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || | 23 | return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || |
24 | !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || | ||
24 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | 25 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || |
25 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | 26 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); |
26 | } | 27 | } |
@@ -892,7 +893,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, | |||
892 | memcpy(value, xattr->val, xattr->val_len); | 893 | memcpy(value, xattr->val, xattr->val_len); |
893 | 894 | ||
894 | if (current->journal_info && | 895 | if (current->journal_info && |
895 | !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) | 896 | !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && |
897 | security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN)) | ||
896 | ci->i_ceph_flags |= CEPH_I_SEC_INITED; | 898 | ci->i_ceph_flags |= CEPH_I_SEC_INITED; |
897 | out: | 899 | out: |
898 | spin_unlock(&ci->i_ceph_lock); | 900 | spin_unlock(&ci->i_ceph_lock); |
@@ -903,11 +905,9 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
903 | { | 905 | { |
904 | struct inode *inode = d_inode(dentry); | 906 | struct inode *inode = d_inode(dentry); |
905 | struct ceph_inode_info *ci = ceph_inode(inode); | 907 | struct ceph_inode_info *ci = ceph_inode(inode); |
906 | struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); | ||
907 | bool len_only = (size == 0); | 908 | bool len_only = (size == 0); |
908 | u32 namelen; | 909 | u32 namelen; |
909 | int err; | 910 | int err; |
910 | int i; | ||
911 | 911 | ||
912 | spin_lock(&ci->i_ceph_lock); | 912 | spin_lock(&ci->i_ceph_lock); |
913 | dout("listxattr %p ver=%lld index_ver=%lld\n", inode, | 913 | dout("listxattr %p ver=%lld index_ver=%lld\n", inode, |
@@ -936,33 +936,6 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
936 | names = __copy_xattr_names(ci, names); | 936 | names = __copy_xattr_names(ci, names); |
937 | size -= namelen; | 937 | size -= namelen; |
938 | } | 938 | } |
939 | |||
940 | |||
941 | /* virtual xattr names, too */ | ||
942 | if (vxattrs) { | ||
943 | for (i = 0; vxattrs[i].name; i++) { | ||
944 | size_t this_len; | ||
945 | |||
946 | if (vxattrs[i].flags & VXATTR_FLAG_HIDDEN) | ||
947 | continue; | ||
948 | if (vxattrs[i].exists_cb && !vxattrs[i].exists_cb(ci)) | ||
949 | continue; | ||
950 | |||
951 | this_len = strlen(vxattrs[i].name) + 1; | ||
952 | namelen += this_len; | ||
953 | if (len_only) | ||
954 | continue; | ||
955 | |||
956 | if (this_len > size) { | ||
957 | err = -ERANGE; | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | memcpy(names, vxattrs[i].name, this_len); | ||
962 | names += this_len; | ||
963 | size -= this_len; | ||
964 | } | ||
965 | } | ||
966 | err = namelen; | 939 | err = namelen; |
967 | out: | 940 | out: |
968 | spin_unlock(&ci->i_ceph_lock); | 941 | spin_unlock(&ci->i_ceph_lock); |
@@ -1293,42 +1266,8 @@ out: | |||
1293 | ceph_pagelist_release(pagelist); | 1266 | ceph_pagelist_release(pagelist); |
1294 | return err; | 1267 | return err; |
1295 | } | 1268 | } |
1296 | 1269 | #endif /* CONFIG_CEPH_FS_SECURITY_LABEL */ | |
1297 | void ceph_security_invalidate_secctx(struct inode *inode) | 1270 | #endif /* CONFIG_SECURITY */ |
1298 | { | ||
1299 | security_inode_invalidate_secctx(inode); | ||
1300 | } | ||
1301 | |||
1302 | static int ceph_xattr_set_security_label(const struct xattr_handler *handler, | ||
1303 | struct dentry *unused, struct inode *inode, | ||
1304 | const char *key, const void *buf, | ||
1305 | size_t buflen, int flags) | ||
1306 | { | ||
1307 | if (security_ismaclabel(key)) { | ||
1308 | const char *name = xattr_full_name(handler, key); | ||
1309 | return __ceph_setxattr(inode, name, buf, buflen, flags); | ||
1310 | } | ||
1311 | return -EOPNOTSUPP; | ||
1312 | } | ||
1313 | |||
1314 | static int ceph_xattr_get_security_label(const struct xattr_handler *handler, | ||
1315 | struct dentry *unused, struct inode *inode, | ||
1316 | const char *key, void *buf, size_t buflen) | ||
1317 | { | ||
1318 | if (security_ismaclabel(key)) { | ||
1319 | const char *name = xattr_full_name(handler, key); | ||
1320 | return __ceph_getxattr(inode, name, buf, buflen); | ||
1321 | } | ||
1322 | return -EOPNOTSUPP; | ||
1323 | } | ||
1324 | |||
1325 | static const struct xattr_handler ceph_security_label_handler = { | ||
1326 | .prefix = XATTR_SECURITY_PREFIX, | ||
1327 | .get = ceph_xattr_get_security_label, | ||
1328 | .set = ceph_xattr_set_security_label, | ||
1329 | }; | ||
1330 | #endif | ||
1331 | #endif | ||
1332 | 1271 | ||
1333 | void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx) | 1272 | void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx) |
1334 | { | 1273 | { |
@@ -1352,9 +1291,6 @@ const struct xattr_handler *ceph_xattr_handlers[] = { | |||
1352 | &posix_acl_access_xattr_handler, | 1291 | &posix_acl_access_xattr_handler, |
1353 | &posix_acl_default_xattr_handler, | 1292 | &posix_acl_default_xattr_handler, |
1354 | #endif | 1293 | #endif |
1355 | #ifdef CONFIG_CEPH_FS_SECURITY_LABEL | ||
1356 | &ceph_security_label_handler, | ||
1357 | #endif | ||
1358 | &ceph_other_xattr_handler, | 1294 | &ceph_other_xattr_handler, |
1359 | NULL, | 1295 | NULL, |
1360 | }; | 1296 | }; |
diff --git a/fs/fs_context.c b/fs/fs_context.c index 87c2c9687d90..138b5b4d621d 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c | |||
@@ -504,7 +504,6 @@ void put_fs_context(struct fs_context *fc) | |||
504 | put_net(fc->net_ns); | 504 | put_net(fc->net_ns); |
505 | put_user_ns(fc->user_ns); | 505 | put_user_ns(fc->user_ns); |
506 | put_cred(fc->cred); | 506 | put_cred(fc->cred); |
507 | kfree(fc->subtype); | ||
508 | put_fc_log(fc); | 507 | put_fc_log(fc); |
509 | put_filesystem(fc->fs_type); | 508 | put_filesystem(fc->fs_type); |
510 | kfree(fc->source); | 509 | kfree(fc->source); |
@@ -571,17 +570,6 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) | |||
571 | return 0; | 570 | return 0; |
572 | } | 571 | } |
573 | 572 | ||
574 | if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) && | ||
575 | strcmp(param->key, "subtype") == 0) { | ||
576 | if (param->type != fs_value_is_string) | ||
577 | return invalf(fc, "VFS: Legacy: Non-string subtype"); | ||
578 | if (fc->subtype) | ||
579 | return invalf(fc, "VFS: Legacy: Multiple subtype"); | ||
580 | fc->subtype = param->string; | ||
581 | param->string = NULL; | ||
582 | return 0; | ||
583 | } | ||
584 | |||
585 | if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) | 573 | if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) |
586 | return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); | 574 | return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); |
587 | 575 | ||
@@ -738,8 +726,6 @@ void vfs_clean_context(struct fs_context *fc) | |||
738 | fc->s_fs_info = NULL; | 726 | fc->s_fs_info = NULL; |
739 | fc->sb_flags = 0; | 727 | fc->sb_flags = 0; |
740 | security_free_mnt_opts(&fc->security); | 728 | security_free_mnt_opts(&fc->security); |
741 | kfree(fc->subtype); | ||
742 | fc->subtype = NULL; | ||
743 | kfree(fc->source); | 729 | kfree(fc->source); |
744 | fc->source = NULL; | 730 | fc->source = NULL; |
745 | 731 | ||
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index bab7a0db81dd..00015d851382 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
@@ -142,11 +142,10 @@ static int cuse_open(struct inode *inode, struct file *file) | |||
142 | 142 | ||
143 | static int cuse_release(struct inode *inode, struct file *file) | 143 | static int cuse_release(struct inode *inode, struct file *file) |
144 | { | 144 | { |
145 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
146 | struct fuse_file *ff = file->private_data; | 145 | struct fuse_file *ff = file->private_data; |
147 | struct fuse_conn *fc = ff->fc; | 146 | struct fuse_conn *fc = ff->fc; |
148 | 147 | ||
149 | fuse_sync_release(fi, ff, file->f_flags); | 148 | fuse_sync_release(NULL, ff, file->f_flags); |
150 | fuse_conn_put(fc); | 149 | fuse_conn_put(fc); |
151 | 150 | ||
152 | return 0; | 151 | return 0; |
@@ -299,6 +298,14 @@ static void cuse_gendev_release(struct device *dev) | |||
299 | kfree(dev); | 298 | kfree(dev); |
300 | } | 299 | } |
301 | 300 | ||
301 | struct cuse_init_args { | ||
302 | struct fuse_args_pages ap; | ||
303 | struct cuse_init_in in; | ||
304 | struct cuse_init_out out; | ||
305 | struct page *page; | ||
306 | struct fuse_page_desc desc; | ||
307 | }; | ||
308 | |||
302 | /** | 309 | /** |
303 | * cuse_process_init_reply - finish initializing CUSE channel | 310 | * cuse_process_init_reply - finish initializing CUSE channel |
304 | * | 311 | * |
@@ -306,21 +313,22 @@ static void cuse_gendev_release(struct device *dev) | |||
306 | * required data structures for it. Please read the comment at the | 313 | * required data structures for it. Please read the comment at the |
307 | * top of this file for high level overview. | 314 | * top of this file for high level overview. |
308 | */ | 315 | */ |
309 | static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | 316 | static void cuse_process_init_reply(struct fuse_conn *fc, |
317 | struct fuse_args *args, int error) | ||
310 | { | 318 | { |
319 | struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); | ||
320 | struct fuse_args_pages *ap = &ia->ap; | ||
311 | struct cuse_conn *cc = fc_to_cc(fc), *pos; | 321 | struct cuse_conn *cc = fc_to_cc(fc), *pos; |
312 | struct cuse_init_out *arg = req->out.args[0].value; | 322 | struct cuse_init_out *arg = &ia->out; |
313 | struct page *page = req->pages[0]; | 323 | struct page *page = ap->pages[0]; |
314 | struct cuse_devinfo devinfo = { }; | 324 | struct cuse_devinfo devinfo = { }; |
315 | struct device *dev; | 325 | struct device *dev; |
316 | struct cdev *cdev; | 326 | struct cdev *cdev; |
317 | dev_t devt; | 327 | dev_t devt; |
318 | int rc, i; | 328 | int rc, i; |
319 | 329 | ||
320 | if (req->out.h.error || | 330 | if (error || arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) |
321 | arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { | ||
322 | goto err; | 331 | goto err; |
323 | } | ||
324 | 332 | ||
325 | fc->minor = arg->minor; | 333 | fc->minor = arg->minor; |
326 | fc->max_read = max_t(unsigned, arg->max_read, 4096); | 334 | fc->max_read = max_t(unsigned, arg->max_read, 4096); |
@@ -329,7 +337,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
329 | /* parse init reply */ | 337 | /* parse init reply */ |
330 | cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; | 338 | cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; |
331 | 339 | ||
332 | rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size, | 340 | rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size, |
333 | &devinfo); | 341 | &devinfo); |
334 | if (rc) | 342 | if (rc) |
335 | goto err; | 343 | goto err; |
@@ -396,7 +404,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
396 | dev_set_uevent_suppress(dev, 0); | 404 | dev_set_uevent_suppress(dev, 0); |
397 | kobject_uevent(&dev->kobj, KOBJ_ADD); | 405 | kobject_uevent(&dev->kobj, KOBJ_ADD); |
398 | out: | 406 | out: |
399 | kfree(arg); | 407 | kfree(ia); |
400 | __free_page(page); | 408 | __free_page(page); |
401 | return; | 409 | return; |
402 | 410 | ||
@@ -415,55 +423,49 @@ err: | |||
415 | static int cuse_send_init(struct cuse_conn *cc) | 423 | static int cuse_send_init(struct cuse_conn *cc) |
416 | { | 424 | { |
417 | int rc; | 425 | int rc; |
418 | struct fuse_req *req; | ||
419 | struct page *page; | 426 | struct page *page; |
420 | struct fuse_conn *fc = &cc->fc; | 427 | struct fuse_conn *fc = &cc->fc; |
421 | struct cuse_init_in *arg; | 428 | struct cuse_init_args *ia; |
422 | void *outarg; | 429 | struct fuse_args_pages *ap; |
423 | 430 | ||
424 | BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); | 431 | BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); |
425 | 432 | ||
426 | req = fuse_get_req_for_background(fc, 1); | ||
427 | if (IS_ERR(req)) { | ||
428 | rc = PTR_ERR(req); | ||
429 | goto err; | ||
430 | } | ||
431 | |||
432 | rc = -ENOMEM; | 433 | rc = -ENOMEM; |
433 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 434 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
434 | if (!page) | 435 | if (!page) |
435 | goto err_put_req; | 436 | goto err; |
436 | 437 | ||
437 | outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL); | 438 | ia = kzalloc(sizeof(*ia), GFP_KERNEL); |
438 | if (!outarg) | 439 | if (!ia) |
439 | goto err_free_page; | 440 | goto err_free_page; |
440 | 441 | ||
441 | arg = &req->misc.cuse_init_in; | 442 | ap = &ia->ap; |
442 | arg->major = FUSE_KERNEL_VERSION; | 443 | ia->in.major = FUSE_KERNEL_VERSION; |
443 | arg->minor = FUSE_KERNEL_MINOR_VERSION; | 444 | ia->in.minor = FUSE_KERNEL_MINOR_VERSION; |
444 | arg->flags |= CUSE_UNRESTRICTED_IOCTL; | 445 | ia->in.flags |= CUSE_UNRESTRICTED_IOCTL; |
445 | req->in.h.opcode = CUSE_INIT; | 446 | ap->args.opcode = CUSE_INIT; |
446 | req->in.numargs = 1; | 447 | ap->args.in_numargs = 1; |
447 | req->in.args[0].size = sizeof(struct cuse_init_in); | 448 | ap->args.in_args[0].size = sizeof(ia->in); |
448 | req->in.args[0].value = arg; | 449 | ap->args.in_args[0].value = &ia->in; |
449 | req->out.numargs = 2; | 450 | ap->args.out_numargs = 2; |
450 | req->out.args[0].size = sizeof(struct cuse_init_out); | 451 | ap->args.out_args[0].size = sizeof(ia->out); |
451 | req->out.args[0].value = outarg; | 452 | ap->args.out_args[0].value = &ia->out; |
452 | req->out.args[1].size = CUSE_INIT_INFO_MAX; | 453 | ap->args.out_args[1].size = CUSE_INIT_INFO_MAX; |
453 | req->out.argvar = 1; | 454 | ap->args.out_argvar = 1; |
454 | req->out.argpages = 1; | 455 | ap->args.out_pages = 1; |
455 | req->pages[0] = page; | 456 | ap->num_pages = 1; |
456 | req->page_descs[0].length = req->out.args[1].size; | 457 | ap->pages = &ia->page; |
457 | req->num_pages = 1; | 458 | ap->descs = &ia->desc; |
458 | req->end = cuse_process_init_reply; | 459 | ia->page = page; |
459 | fuse_request_send_background(fc, req); | 460 | ia->desc.length = ap->args.out_args[1].size; |
460 | 461 | ap->args.end = cuse_process_init_reply; | |
461 | return 0; | 462 | |
462 | 463 | rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL); | |
464 | if (rc) { | ||
465 | kfree(ia); | ||
463 | err_free_page: | 466 | err_free_page: |
464 | __free_page(page); | 467 | __free_page(page); |
465 | err_put_req: | 468 | } |
466 | fuse_put_request(fc, req); | ||
467 | err: | 469 | err: |
468 | return rc; | 470 | return rc; |
469 | } | 471 | } |
@@ -504,9 +506,9 @@ static int cuse_channel_open(struct inode *inode, struct file *file) | |||
504 | * Limit the cuse channel to requests that can | 506 | * Limit the cuse channel to requests that can |
505 | * be represented in file->f_cred->user_ns. | 507 | * be represented in file->f_cred->user_ns. |
506 | */ | 508 | */ |
507 | fuse_conn_init(&cc->fc, file->f_cred->user_ns); | 509 | fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL); |
508 | 510 | ||
509 | fud = fuse_dev_alloc(&cc->fc); | 511 | fud = fuse_dev_alloc_install(&cc->fc); |
510 | if (!fud) { | 512 | if (!fud) { |
511 | kfree(cc); | 513 | kfree(cc); |
512 | return -ENOMEM; | 514 | return -ENOMEM; |
@@ -519,6 +521,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) | |||
519 | rc = cuse_send_init(cc); | 521 | rc = cuse_send_init(cc); |
520 | if (rc) { | 522 | if (rc) { |
521 | fuse_dev_free(fud); | 523 | fuse_dev_free(fud); |
524 | fuse_conn_put(&cc->fc); | ||
522 | return rc; | 525 | return rc; |
523 | } | 526 | } |
524 | file->private_data = fud; | 527 | file->private_data = fud; |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ea8237513dfa..dadd617d826c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -40,107 +40,30 @@ static struct fuse_dev *fuse_get_dev(struct file *file) | |||
40 | return READ_ONCE(file->private_data); | 40 | return READ_ONCE(file->private_data); |
41 | } | 41 | } |
42 | 42 | ||
43 | static void fuse_request_init(struct fuse_req *req, struct page **pages, | 43 | static void fuse_request_init(struct fuse_req *req) |
44 | struct fuse_page_desc *page_descs, | ||
45 | unsigned npages) | ||
46 | { | 44 | { |
47 | INIT_LIST_HEAD(&req->list); | 45 | INIT_LIST_HEAD(&req->list); |
48 | INIT_LIST_HEAD(&req->intr_entry); | 46 | INIT_LIST_HEAD(&req->intr_entry); |
49 | init_waitqueue_head(&req->waitq); | 47 | init_waitqueue_head(&req->waitq); |
50 | refcount_set(&req->count, 1); | 48 | refcount_set(&req->count, 1); |
51 | req->pages = pages; | ||
52 | req->page_descs = page_descs; | ||
53 | req->max_pages = npages; | ||
54 | __set_bit(FR_PENDING, &req->flags); | 49 | __set_bit(FR_PENDING, &req->flags); |
55 | } | 50 | } |
56 | 51 | ||
57 | static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags, | 52 | static struct fuse_req *fuse_request_alloc(gfp_t flags) |
58 | struct fuse_page_desc **desc) | ||
59 | { | ||
60 | struct page **pages; | ||
61 | |||
62 | pages = kzalloc(npages * (sizeof(struct page *) + | ||
63 | sizeof(struct fuse_page_desc)), flags); | ||
64 | *desc = (void *) pages + npages * sizeof(struct page *); | ||
65 | |||
66 | return pages; | ||
67 | } | ||
68 | |||
69 | static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) | ||
70 | { | 53 | { |
71 | struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); | 54 | struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); |
72 | if (req) { | 55 | if (req) |
73 | struct page **pages = NULL; | 56 | fuse_request_init(req); |
74 | struct fuse_page_desc *page_descs = NULL; | ||
75 | |||
76 | WARN_ON(npages > FUSE_MAX_MAX_PAGES); | ||
77 | if (npages > FUSE_REQ_INLINE_PAGES) { | ||
78 | pages = fuse_req_pages_alloc(npages, flags, | ||
79 | &page_descs); | ||
80 | if (!pages) { | ||
81 | kmem_cache_free(fuse_req_cachep, req); | ||
82 | return NULL; | ||
83 | } | ||
84 | } else if (npages) { | ||
85 | pages = req->inline_pages; | ||
86 | page_descs = req->inline_page_descs; | ||
87 | } | ||
88 | 57 | ||
89 | fuse_request_init(req, pages, page_descs, npages); | ||
90 | } | ||
91 | return req; | 58 | return req; |
92 | } | 59 | } |
93 | 60 | ||
94 | struct fuse_req *fuse_request_alloc(unsigned npages) | 61 | static void fuse_request_free(struct fuse_req *req) |
95 | { | ||
96 | return __fuse_request_alloc(npages, GFP_KERNEL); | ||
97 | } | ||
98 | EXPORT_SYMBOL_GPL(fuse_request_alloc); | ||
99 | |||
100 | struct fuse_req *fuse_request_alloc_nofs(unsigned npages) | ||
101 | { | ||
102 | return __fuse_request_alloc(npages, GFP_NOFS); | ||
103 | } | ||
104 | |||
105 | static void fuse_req_pages_free(struct fuse_req *req) | ||
106 | { | ||
107 | if (req->pages != req->inline_pages) | ||
108 | kfree(req->pages); | ||
109 | } | ||
110 | |||
111 | bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req, | ||
112 | gfp_t flags) | ||
113 | { | ||
114 | struct page **pages; | ||
115 | struct fuse_page_desc *page_descs; | ||
116 | unsigned int npages = min_t(unsigned int, | ||
117 | max_t(unsigned int, req->max_pages * 2, | ||
118 | FUSE_DEFAULT_MAX_PAGES_PER_REQ), | ||
119 | fc->max_pages); | ||
120 | WARN_ON(npages <= req->max_pages); | ||
121 | |||
122 | pages = fuse_req_pages_alloc(npages, flags, &page_descs); | ||
123 | if (!pages) | ||
124 | return false; | ||
125 | |||
126 | memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages); | ||
127 | memcpy(page_descs, req->page_descs, | ||
128 | sizeof(struct fuse_page_desc) * req->max_pages); | ||
129 | fuse_req_pages_free(req); | ||
130 | req->pages = pages; | ||
131 | req->page_descs = page_descs; | ||
132 | req->max_pages = npages; | ||
133 | |||
134 | return true; | ||
135 | } | ||
136 | |||
137 | void fuse_request_free(struct fuse_req *req) | ||
138 | { | 62 | { |
139 | fuse_req_pages_free(req); | ||
140 | kmem_cache_free(fuse_req_cachep, req); | 63 | kmem_cache_free(fuse_req_cachep, req); |
141 | } | 64 | } |
142 | 65 | ||
143 | void __fuse_get_request(struct fuse_req *req) | 66 | static void __fuse_get_request(struct fuse_req *req) |
144 | { | 67 | { |
145 | refcount_inc(&req->count); | 68 | refcount_inc(&req->count); |
146 | } | 69 | } |
@@ -177,8 +100,9 @@ static void fuse_drop_waiting(struct fuse_conn *fc) | |||
177 | } | 100 | } |
178 | } | 101 | } |
179 | 102 | ||
180 | static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, | 103 | static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); |
181 | bool for_background) | 104 | |
105 | static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) | ||
182 | { | 106 | { |
183 | struct fuse_req *req; | 107 | struct fuse_req *req; |
184 | int err; | 108 | int err; |
@@ -201,7 +125,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, | |||
201 | if (fc->conn_error) | 125 | if (fc->conn_error) |
202 | goto out; | 126 | goto out; |
203 | 127 | ||
204 | req = fuse_request_alloc(npages); | 128 | req = fuse_request_alloc(GFP_KERNEL); |
205 | err = -ENOMEM; | 129 | err = -ENOMEM; |
206 | if (!req) { | 130 | if (!req) { |
207 | if (for_background) | 131 | if (for_background) |
@@ -229,101 +153,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, | |||
229 | return ERR_PTR(err); | 153 | return ERR_PTR(err); |
230 | } | 154 | } |
231 | 155 | ||
232 | struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) | 156 | static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) |
233 | { | ||
234 | return __fuse_get_req(fc, npages, false); | ||
235 | } | ||
236 | EXPORT_SYMBOL_GPL(fuse_get_req); | ||
237 | |||
238 | struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, | ||
239 | unsigned npages) | ||
240 | { | ||
241 | return __fuse_get_req(fc, npages, true); | ||
242 | } | ||
243 | EXPORT_SYMBOL_GPL(fuse_get_req_for_background); | ||
244 | |||
245 | /* | ||
246 | * Return request in fuse_file->reserved_req. However that may | ||
247 | * currently be in use. If that is the case, wait for it to become | ||
248 | * available. | ||
249 | */ | ||
250 | static struct fuse_req *get_reserved_req(struct fuse_conn *fc, | ||
251 | struct file *file) | ||
252 | { | ||
253 | struct fuse_req *req = NULL; | ||
254 | struct fuse_inode *fi = get_fuse_inode(file_inode(file)); | ||
255 | struct fuse_file *ff = file->private_data; | ||
256 | |||
257 | do { | ||
258 | wait_event(fc->reserved_req_waitq, ff->reserved_req); | ||
259 | spin_lock(&fi->lock); | ||
260 | if (ff->reserved_req) { | ||
261 | req = ff->reserved_req; | ||
262 | ff->reserved_req = NULL; | ||
263 | req->stolen_file = get_file(file); | ||
264 | } | ||
265 | spin_unlock(&fi->lock); | ||
266 | } while (!req); | ||
267 | |||
268 | return req; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Put stolen request back into fuse_file->reserved_req | ||
273 | */ | ||
274 | static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) | ||
275 | { | ||
276 | struct file *file = req->stolen_file; | ||
277 | struct fuse_inode *fi = get_fuse_inode(file_inode(file)); | ||
278 | struct fuse_file *ff = file->private_data; | ||
279 | |||
280 | WARN_ON(req->max_pages); | ||
281 | spin_lock(&fi->lock); | ||
282 | memset(req, 0, sizeof(*req)); | ||
283 | fuse_request_init(req, NULL, NULL, 0); | ||
284 | BUG_ON(ff->reserved_req); | ||
285 | ff->reserved_req = req; | ||
286 | wake_up_all(&fc->reserved_req_waitq); | ||
287 | spin_unlock(&fi->lock); | ||
288 | fput(file); | ||
289 | } | ||
290 | |||
291 | /* | ||
292 | * Gets a requests for a file operation, always succeeds | ||
293 | * | ||
294 | * This is used for sending the FLUSH request, which must get to | ||
295 | * userspace, due to POSIX locks which may need to be unlocked. | ||
296 | * | ||
297 | * If allocation fails due to OOM, use the reserved request in | ||
298 | * fuse_file. | ||
299 | * | ||
300 | * This is very unlikely to deadlock accidentally, since the | ||
301 | * filesystem should not have it's own file open. If deadlock is | ||
302 | * intentional, it can still be broken by "aborting" the filesystem. | ||
303 | */ | ||
304 | struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, | ||
305 | struct file *file) | ||
306 | { | ||
307 | struct fuse_req *req; | ||
308 | |||
309 | atomic_inc(&fc->num_waiting); | ||
310 | wait_event(fc->blocked_waitq, fc->initialized); | ||
311 | /* Matches smp_wmb() in fuse_set_initialized() */ | ||
312 | smp_rmb(); | ||
313 | req = fuse_request_alloc(0); | ||
314 | if (!req) | ||
315 | req = get_reserved_req(fc, file); | ||
316 | |||
317 | req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); | ||
318 | req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); | ||
319 | req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); | ||
320 | |||
321 | __set_bit(FR_WAITING, &req->flags); | ||
322 | __clear_bit(FR_BACKGROUND, &req->flags); | ||
323 | return req; | ||
324 | } | ||
325 | |||
326 | void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) | ||
327 | { | 157 | { |
328 | if (refcount_dec_and_test(&req->count)) { | 158 | if (refcount_dec_and_test(&req->count)) { |
329 | if (test_bit(FR_BACKGROUND, &req->flags)) { | 159 | if (test_bit(FR_BACKGROUND, &req->flags)) { |
@@ -342,15 +172,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) | |||
342 | fuse_drop_waiting(fc); | 172 | fuse_drop_waiting(fc); |
343 | } | 173 | } |
344 | 174 | ||
345 | if (req->stolen_file) | 175 | fuse_request_free(req); |
346 | put_reserved_req(fc, req); | ||
347 | else | ||
348 | fuse_request_free(req); | ||
349 | } | 176 | } |
350 | } | 177 | } |
351 | EXPORT_SYMBOL_GPL(fuse_put_request); | ||
352 | 178 | ||
353 | static unsigned len_args(unsigned numargs, struct fuse_arg *args) | 179 | unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args) |
354 | { | 180 | { |
355 | unsigned nbytes = 0; | 181 | unsigned nbytes = 0; |
356 | unsigned i; | 182 | unsigned i; |
@@ -360,25 +186,47 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) | |||
360 | 186 | ||
361 | return nbytes; | 187 | return nbytes; |
362 | } | 188 | } |
189 | EXPORT_SYMBOL_GPL(fuse_len_args); | ||
363 | 190 | ||
364 | static u64 fuse_get_unique(struct fuse_iqueue *fiq) | 191 | u64 fuse_get_unique(struct fuse_iqueue *fiq) |
365 | { | 192 | { |
366 | fiq->reqctr += FUSE_REQ_ID_STEP; | 193 | fiq->reqctr += FUSE_REQ_ID_STEP; |
367 | return fiq->reqctr; | 194 | return fiq->reqctr; |
368 | } | 195 | } |
196 | EXPORT_SYMBOL_GPL(fuse_get_unique); | ||
369 | 197 | ||
370 | static unsigned int fuse_req_hash(u64 unique) | 198 | static unsigned int fuse_req_hash(u64 unique) |
371 | { | 199 | { |
372 | return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); | 200 | return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); |
373 | } | 201 | } |
374 | 202 | ||
375 | static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) | 203 | /** |
204 | * A new request is available, wake fiq->waitq | ||
205 | */ | ||
206 | static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) | ||
207 | __releases(fiq->lock) | ||
208 | { | ||
209 | wake_up(&fiq->waitq); | ||
210 | kill_fasync(&fiq->fasync, SIGIO, POLL_IN); | ||
211 | spin_unlock(&fiq->lock); | ||
212 | } | ||
213 | |||
214 | const struct fuse_iqueue_ops fuse_dev_fiq_ops = { | ||
215 | .wake_forget_and_unlock = fuse_dev_wake_and_unlock, | ||
216 | .wake_interrupt_and_unlock = fuse_dev_wake_and_unlock, | ||
217 | .wake_pending_and_unlock = fuse_dev_wake_and_unlock, | ||
218 | }; | ||
219 | EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); | ||
220 | |||
221 | static void queue_request_and_unlock(struct fuse_iqueue *fiq, | ||
222 | struct fuse_req *req) | ||
223 | __releases(fiq->lock) | ||
376 | { | 224 | { |
377 | req->in.h.len = sizeof(struct fuse_in_header) + | 225 | req->in.h.len = sizeof(struct fuse_in_header) + |
378 | len_args(req->in.numargs, (struct fuse_arg *) req->in.args); | 226 | fuse_len_args(req->args->in_numargs, |
227 | (struct fuse_arg *) req->args->in_args); | ||
379 | list_add_tail(&req->list, &fiq->pending); | 228 | list_add_tail(&req->list, &fiq->pending); |
380 | wake_up_locked(&fiq->waitq); | 229 | fiq->ops->wake_pending_and_unlock(fiq); |
381 | kill_fasync(&fiq->fasync, SIGIO, POLL_IN); | ||
382 | } | 230 | } |
383 | 231 | ||
384 | void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, | 232 | void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, |
@@ -389,16 +237,15 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, | |||
389 | forget->forget_one.nodeid = nodeid; | 237 | forget->forget_one.nodeid = nodeid; |
390 | forget->forget_one.nlookup = nlookup; | 238 | forget->forget_one.nlookup = nlookup; |
391 | 239 | ||
392 | spin_lock(&fiq->waitq.lock); | 240 | spin_lock(&fiq->lock); |
393 | if (fiq->connected) { | 241 | if (fiq->connected) { |
394 | fiq->forget_list_tail->next = forget; | 242 | fiq->forget_list_tail->next = forget; |
395 | fiq->forget_list_tail = forget; | 243 | fiq->forget_list_tail = forget; |
396 | wake_up_locked(&fiq->waitq); | 244 | fiq->ops->wake_forget_and_unlock(fiq); |
397 | kill_fasync(&fiq->fasync, SIGIO, POLL_IN); | ||
398 | } else { | 245 | } else { |
399 | kfree(forget); | 246 | kfree(forget); |
247 | spin_unlock(&fiq->lock); | ||
400 | } | 248 | } |
401 | spin_unlock(&fiq->waitq.lock); | ||
402 | } | 249 | } |
403 | 250 | ||
404 | static void flush_bg_queue(struct fuse_conn *fc) | 251 | static void flush_bg_queue(struct fuse_conn *fc) |
@@ -412,10 +259,9 @@ static void flush_bg_queue(struct fuse_conn *fc) | |||
412 | req = list_first_entry(&fc->bg_queue, struct fuse_req, list); | 259 | req = list_first_entry(&fc->bg_queue, struct fuse_req, list); |
413 | list_del(&req->list); | 260 | list_del(&req->list); |
414 | fc->active_background++; | 261 | fc->active_background++; |
415 | spin_lock(&fiq->waitq.lock); | 262 | spin_lock(&fiq->lock); |
416 | req->in.h.unique = fuse_get_unique(fiq); | 263 | req->in.h.unique = fuse_get_unique(fiq); |
417 | queue_request(fiq, req); | 264 | queue_request_and_unlock(fiq, req); |
418 | spin_unlock(&fiq->waitq.lock); | ||
419 | } | 265 | } |
420 | } | 266 | } |
421 | 267 | ||
@@ -427,9 +273,10 @@ static void flush_bg_queue(struct fuse_conn *fc) | |||
427 | * the 'end' callback is called if given, else the reference to the | 273 | * the 'end' callback is called if given, else the reference to the |
428 | * request is released | 274 | * request is released |
429 | */ | 275 | */ |
430 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) | 276 | void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) |
431 | { | 277 | { |
432 | struct fuse_iqueue *fiq = &fc->iq; | 278 | struct fuse_iqueue *fiq = &fc->iq; |
279 | bool async = req->args->end; | ||
433 | 280 | ||
434 | if (test_and_set_bit(FR_FINISHED, &req->flags)) | 281 | if (test_and_set_bit(FR_FINISHED, &req->flags)) |
435 | goto put_request; | 282 | goto put_request; |
@@ -439,9 +286,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) | |||
439 | * smp_mb() from queue_interrupt(). | 286 | * smp_mb() from queue_interrupt(). |
440 | */ | 287 | */ |
441 | if (!list_empty(&req->intr_entry)) { | 288 | if (!list_empty(&req->intr_entry)) { |
442 | spin_lock(&fiq->waitq.lock); | 289 | spin_lock(&fiq->lock); |
443 | list_del_init(&req->intr_entry); | 290 | list_del_init(&req->intr_entry); |
444 | spin_unlock(&fiq->waitq.lock); | 291 | spin_unlock(&fiq->lock); |
445 | } | 292 | } |
446 | WARN_ON(test_bit(FR_PENDING, &req->flags)); | 293 | WARN_ON(test_bit(FR_PENDING, &req->flags)); |
447 | WARN_ON(test_bit(FR_SENT, &req->flags)); | 294 | WARN_ON(test_bit(FR_SENT, &req->flags)); |
@@ -475,18 +322,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) | |||
475 | wake_up(&req->waitq); | 322 | wake_up(&req->waitq); |
476 | } | 323 | } |
477 | 324 | ||
478 | if (req->end) | 325 | if (async) |
479 | req->end(fc, req); | 326 | req->args->end(fc, req->args, req->out.h.error); |
480 | put_request: | 327 | put_request: |
481 | fuse_put_request(fc, req); | 328 | fuse_put_request(fc, req); |
482 | } | 329 | } |
330 | EXPORT_SYMBOL_GPL(fuse_request_end); | ||
483 | 331 | ||
484 | static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) | 332 | static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) |
485 | { | 333 | { |
486 | spin_lock(&fiq->waitq.lock); | 334 | spin_lock(&fiq->lock); |
487 | /* Check for we've sent request to interrupt this req */ | 335 | /* Check for we've sent request to interrupt this req */ |
488 | if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { | 336 | if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { |
489 | spin_unlock(&fiq->waitq.lock); | 337 | spin_unlock(&fiq->lock); |
490 | return -EINVAL; | 338 | return -EINVAL; |
491 | } | 339 | } |
492 | 340 | ||
@@ -499,13 +347,13 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) | |||
499 | smp_mb(); | 347 | smp_mb(); |
500 | if (test_bit(FR_FINISHED, &req->flags)) { | 348 | if (test_bit(FR_FINISHED, &req->flags)) { |
501 | list_del_init(&req->intr_entry); | 349 | list_del_init(&req->intr_entry); |
502 | spin_unlock(&fiq->waitq.lock); | 350 | spin_unlock(&fiq->lock); |
503 | return 0; | 351 | return 0; |
504 | } | 352 | } |
505 | wake_up_locked(&fiq->waitq); | 353 | fiq->ops->wake_interrupt_and_unlock(fiq); |
506 | kill_fasync(&fiq->fasync, SIGIO, POLL_IN); | 354 | } else { |
355 | spin_unlock(&fiq->lock); | ||
507 | } | 356 | } |
508 | spin_unlock(&fiq->waitq.lock); | ||
509 | return 0; | 357 | return 0; |
510 | } | 358 | } |
511 | 359 | ||
@@ -535,16 +383,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) | |||
535 | if (!err) | 383 | if (!err) |
536 | return; | 384 | return; |
537 | 385 | ||
538 | spin_lock(&fiq->waitq.lock); | 386 | spin_lock(&fiq->lock); |
539 | /* Request is not yet in userspace, bail out */ | 387 | /* Request is not yet in userspace, bail out */ |
540 | if (test_bit(FR_PENDING, &req->flags)) { | 388 | if (test_bit(FR_PENDING, &req->flags)) { |
541 | list_del(&req->list); | 389 | list_del(&req->list); |
542 | spin_unlock(&fiq->waitq.lock); | 390 | spin_unlock(&fiq->lock); |
543 | __fuse_put_request(req); | 391 | __fuse_put_request(req); |
544 | req->out.h.error = -EINTR; | 392 | req->out.h.error = -EINTR; |
545 | return; | 393 | return; |
546 | } | 394 | } |
547 | spin_unlock(&fiq->waitq.lock); | 395 | spin_unlock(&fiq->lock); |
548 | } | 396 | } |
549 | 397 | ||
550 | /* | 398 | /* |
@@ -559,101 +407,110 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) | |||
559 | struct fuse_iqueue *fiq = &fc->iq; | 407 | struct fuse_iqueue *fiq = &fc->iq; |
560 | 408 | ||
561 | BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); | 409 | BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); |
562 | spin_lock(&fiq->waitq.lock); | 410 | spin_lock(&fiq->lock); |
563 | if (!fiq->connected) { | 411 | if (!fiq->connected) { |
564 | spin_unlock(&fiq->waitq.lock); | 412 | spin_unlock(&fiq->lock); |
565 | req->out.h.error = -ENOTCONN; | 413 | req->out.h.error = -ENOTCONN; |
566 | } else { | 414 | } else { |
567 | req->in.h.unique = fuse_get_unique(fiq); | 415 | req->in.h.unique = fuse_get_unique(fiq); |
568 | queue_request(fiq, req); | ||
569 | /* acquire extra reference, since request is still needed | 416 | /* acquire extra reference, since request is still needed |
570 | after request_end() */ | 417 | after fuse_request_end() */ |
571 | __fuse_get_request(req); | 418 | __fuse_get_request(req); |
572 | spin_unlock(&fiq->waitq.lock); | 419 | queue_request_and_unlock(fiq, req); |
573 | 420 | ||
574 | request_wait_answer(fc, req); | 421 | request_wait_answer(fc, req); |
575 | /* Pairs with smp_wmb() in request_end() */ | 422 | /* Pairs with smp_wmb() in fuse_request_end() */ |
576 | smp_rmb(); | 423 | smp_rmb(); |
577 | } | 424 | } |
578 | } | 425 | } |
579 | 426 | ||
580 | void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) | ||
581 | { | ||
582 | __set_bit(FR_ISREPLY, &req->flags); | ||
583 | if (!test_bit(FR_WAITING, &req->flags)) { | ||
584 | __set_bit(FR_WAITING, &req->flags); | ||
585 | atomic_inc(&fc->num_waiting); | ||
586 | } | ||
587 | __fuse_request_send(fc, req); | ||
588 | } | ||
589 | EXPORT_SYMBOL_GPL(fuse_request_send); | ||
590 | |||
591 | static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) | 427 | static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) |
592 | { | 428 | { |
593 | if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS) | 429 | if (fc->minor < 4 && args->opcode == FUSE_STATFS) |
594 | args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE; | 430 | args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE; |
595 | 431 | ||
596 | if (fc->minor < 9) { | 432 | if (fc->minor < 9) { |
597 | switch (args->in.h.opcode) { | 433 | switch (args->opcode) { |
598 | case FUSE_LOOKUP: | 434 | case FUSE_LOOKUP: |
599 | case FUSE_CREATE: | 435 | case FUSE_CREATE: |
600 | case FUSE_MKNOD: | 436 | case FUSE_MKNOD: |
601 | case FUSE_MKDIR: | 437 | case FUSE_MKDIR: |
602 | case FUSE_SYMLINK: | 438 | case FUSE_SYMLINK: |
603 | case FUSE_LINK: | 439 | case FUSE_LINK: |
604 | args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; | 440 | args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; |
605 | break; | 441 | break; |
606 | case FUSE_GETATTR: | 442 | case FUSE_GETATTR: |
607 | case FUSE_SETATTR: | 443 | case FUSE_SETATTR: |
608 | args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; | 444 | args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; |
609 | break; | 445 | break; |
610 | } | 446 | } |
611 | } | 447 | } |
612 | if (fc->minor < 12) { | 448 | if (fc->minor < 12) { |
613 | switch (args->in.h.opcode) { | 449 | switch (args->opcode) { |
614 | case FUSE_CREATE: | 450 | case FUSE_CREATE: |
615 | args->in.args[0].size = sizeof(struct fuse_open_in); | 451 | args->in_args[0].size = sizeof(struct fuse_open_in); |
616 | break; | 452 | break; |
617 | case FUSE_MKNOD: | 453 | case FUSE_MKNOD: |
618 | args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE; | 454 | args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE; |
619 | break; | 455 | break; |
620 | } | 456 | } |
621 | } | 457 | } |
622 | } | 458 | } |
623 | 459 | ||
460 | static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req) | ||
461 | { | ||
462 | req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); | ||
463 | req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); | ||
464 | req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); | ||
465 | } | ||
466 | |||
467 | static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) | ||
468 | { | ||
469 | req->in.h.opcode = args->opcode; | ||
470 | req->in.h.nodeid = args->nodeid; | ||
471 | req->args = args; | ||
472 | } | ||
473 | |||
624 | ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) | 474 | ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) |
625 | { | 475 | { |
626 | struct fuse_req *req; | 476 | struct fuse_req *req; |
627 | ssize_t ret; | 477 | ssize_t ret; |
628 | 478 | ||
629 | req = fuse_get_req(fc, 0); | 479 | if (args->force) { |
630 | if (IS_ERR(req)) | 480 | atomic_inc(&fc->num_waiting); |
631 | return PTR_ERR(req); | 481 | req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL); |
482 | |||
483 | if (!args->nocreds) | ||
484 | fuse_force_creds(fc, req); | ||
485 | |||
486 | __set_bit(FR_WAITING, &req->flags); | ||
487 | __set_bit(FR_FORCE, &req->flags); | ||
488 | } else { | ||
489 | WARN_ON(args->nocreds); | ||
490 | req = fuse_get_req(fc, false); | ||
491 | if (IS_ERR(req)) | ||
492 | return PTR_ERR(req); | ||
493 | } | ||
632 | 494 | ||
633 | /* Needs to be done after fuse_get_req() so that fc->minor is valid */ | 495 | /* Needs to be done after fuse_get_req() so that fc->minor is valid */ |
634 | fuse_adjust_compat(fc, args); | 496 | fuse_adjust_compat(fc, args); |
497 | fuse_args_to_req(req, args); | ||
635 | 498 | ||
636 | req->in.h.opcode = args->in.h.opcode; | 499 | if (!args->noreply) |
637 | req->in.h.nodeid = args->in.h.nodeid; | 500 | __set_bit(FR_ISREPLY, &req->flags); |
638 | req->in.numargs = args->in.numargs; | 501 | __fuse_request_send(fc, req); |
639 | memcpy(req->in.args, args->in.args, | ||
640 | args->in.numargs * sizeof(struct fuse_in_arg)); | ||
641 | req->out.argvar = args->out.argvar; | ||
642 | req->out.numargs = args->out.numargs; | ||
643 | memcpy(req->out.args, args->out.args, | ||
644 | args->out.numargs * sizeof(struct fuse_arg)); | ||
645 | fuse_request_send(fc, req); | ||
646 | ret = req->out.h.error; | 502 | ret = req->out.h.error; |
647 | if (!ret && args->out.argvar) { | 503 | if (!ret && args->out_argvar) { |
648 | BUG_ON(args->out.numargs != 1); | 504 | BUG_ON(args->out_numargs == 0); |
649 | ret = req->out.args[0].size; | 505 | ret = args->out_args[args->out_numargs - 1].size; |
650 | } | 506 | } |
651 | fuse_put_request(fc, req); | 507 | fuse_put_request(fc, req); |
652 | 508 | ||
653 | return ret; | 509 | return ret; |
654 | } | 510 | } |
655 | 511 | ||
656 | bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req) | 512 | static bool fuse_request_queue_background(struct fuse_conn *fc, |
513 | struct fuse_req *req) | ||
657 | { | 514 | { |
658 | bool queued = false; | 515 | bool queued = false; |
659 | 516 | ||
@@ -681,56 +538,63 @@ bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req) | |||
681 | return queued; | 538 | return queued; |
682 | } | 539 | } |
683 | 540 | ||
684 | void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) | 541 | int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, |
542 | gfp_t gfp_flags) | ||
685 | { | 543 | { |
686 | WARN_ON(!req->end); | 544 | struct fuse_req *req; |
545 | |||
546 | if (args->force) { | ||
547 | WARN_ON(!args->nocreds); | ||
548 | req = fuse_request_alloc(gfp_flags); | ||
549 | if (!req) | ||
550 | return -ENOMEM; | ||
551 | __set_bit(FR_BACKGROUND, &req->flags); | ||
552 | } else { | ||
553 | WARN_ON(args->nocreds); | ||
554 | req = fuse_get_req(fc, true); | ||
555 | if (IS_ERR(req)) | ||
556 | return PTR_ERR(req); | ||
557 | } | ||
558 | |||
559 | fuse_args_to_req(req, args); | ||
560 | |||
687 | if (!fuse_request_queue_background(fc, req)) { | 561 | if (!fuse_request_queue_background(fc, req)) { |
688 | req->out.h.error = -ENOTCONN; | ||
689 | req->end(fc, req); | ||
690 | fuse_put_request(fc, req); | 562 | fuse_put_request(fc, req); |
563 | return -ENOTCONN; | ||
691 | } | 564 | } |
565 | |||
566 | return 0; | ||
692 | } | 567 | } |
693 | EXPORT_SYMBOL_GPL(fuse_request_send_background); | 568 | EXPORT_SYMBOL_GPL(fuse_simple_background); |
694 | 569 | ||
695 | static int fuse_request_send_notify_reply(struct fuse_conn *fc, | 570 | static int fuse_simple_notify_reply(struct fuse_conn *fc, |
696 | struct fuse_req *req, u64 unique) | 571 | struct fuse_args *args, u64 unique) |
697 | { | 572 | { |
698 | int err = -ENODEV; | 573 | struct fuse_req *req; |
699 | struct fuse_iqueue *fiq = &fc->iq; | 574 | struct fuse_iqueue *fiq = &fc->iq; |
575 | int err = 0; | ||
576 | |||
577 | req = fuse_get_req(fc, false); | ||
578 | if (IS_ERR(req)) | ||
579 | return PTR_ERR(req); | ||
700 | 580 | ||
701 | __clear_bit(FR_ISREPLY, &req->flags); | 581 | __clear_bit(FR_ISREPLY, &req->flags); |
702 | req->in.h.unique = unique; | 582 | req->in.h.unique = unique; |
703 | spin_lock(&fiq->waitq.lock); | 583 | |
584 | fuse_args_to_req(req, args); | ||
585 | |||
586 | spin_lock(&fiq->lock); | ||
704 | if (fiq->connected) { | 587 | if (fiq->connected) { |
705 | queue_request(fiq, req); | 588 | queue_request_and_unlock(fiq, req); |
706 | err = 0; | 589 | } else { |
590 | err = -ENODEV; | ||
591 | spin_unlock(&fiq->lock); | ||
592 | fuse_put_request(fc, req); | ||
707 | } | 593 | } |
708 | spin_unlock(&fiq->waitq.lock); | ||
709 | 594 | ||
710 | return err; | 595 | return err; |
711 | } | 596 | } |
712 | 597 | ||
713 | void fuse_force_forget(struct file *file, u64 nodeid) | ||
714 | { | ||
715 | struct inode *inode = file_inode(file); | ||
716 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
717 | struct fuse_req *req; | ||
718 | struct fuse_forget_in inarg; | ||
719 | |||
720 | memset(&inarg, 0, sizeof(inarg)); | ||
721 | inarg.nlookup = 1; | ||
722 | req = fuse_get_req_nofail_nopages(fc, file); | ||
723 | req->in.h.opcode = FUSE_FORGET; | ||
724 | req->in.h.nodeid = nodeid; | ||
725 | req->in.numargs = 1; | ||
726 | req->in.args[0].size = sizeof(inarg); | ||
727 | req->in.args[0].value = &inarg; | ||
728 | __clear_bit(FR_ISREPLY, &req->flags); | ||
729 | __fuse_request_send(fc, req); | ||
730 | /* ignore errors */ | ||
731 | fuse_put_request(fc, req); | ||
732 | } | ||
733 | |||
734 | /* | 598 | /* |
735 | * Lock the request. Up to the next unlock_request() there mustn't be | 599 | * Lock the request. Up to the next unlock_request() there mustn't be |
736 | * anything that could cause a page-fault. If the request was already | 600 | * anything that could cause a page-fault. If the request was already |
@@ -1084,14 +948,15 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, | |||
1084 | { | 948 | { |
1085 | unsigned i; | 949 | unsigned i; |
1086 | struct fuse_req *req = cs->req; | 950 | struct fuse_req *req = cs->req; |
951 | struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); | ||
952 | |||
1087 | 953 | ||
1088 | for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { | 954 | for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) { |
1089 | int err; | 955 | int err; |
1090 | unsigned offset = req->page_descs[i].offset; | 956 | unsigned int offset = ap->descs[i].offset; |
1091 | unsigned count = min(nbytes, req->page_descs[i].length); | 957 | unsigned int count = min(nbytes, ap->descs[i].length); |
1092 | 958 | ||
1093 | err = fuse_copy_page(cs, &req->pages[i], offset, count, | 959 | err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing); |
1094 | zeroing); | ||
1095 | if (err) | 960 | if (err) |
1096 | return err; | 961 | return err; |
1097 | 962 | ||
@@ -1149,12 +1014,12 @@ static int request_pending(struct fuse_iqueue *fiq) | |||
1149 | * Unlike other requests this is assembled on demand, without a need | 1014 | * Unlike other requests this is assembled on demand, without a need |
1150 | * to allocate a separate fuse_req structure. | 1015 | * to allocate a separate fuse_req structure. |
1151 | * | 1016 | * |
1152 | * Called with fiq->waitq.lock held, releases it | 1017 | * Called with fiq->lock held, releases it |
1153 | */ | 1018 | */ |
1154 | static int fuse_read_interrupt(struct fuse_iqueue *fiq, | 1019 | static int fuse_read_interrupt(struct fuse_iqueue *fiq, |
1155 | struct fuse_copy_state *cs, | 1020 | struct fuse_copy_state *cs, |
1156 | size_t nbytes, struct fuse_req *req) | 1021 | size_t nbytes, struct fuse_req *req) |
1157 | __releases(fiq->waitq.lock) | 1022 | __releases(fiq->lock) |
1158 | { | 1023 | { |
1159 | struct fuse_in_header ih; | 1024 | struct fuse_in_header ih; |
1160 | struct fuse_interrupt_in arg; | 1025 | struct fuse_interrupt_in arg; |
@@ -1169,7 +1034,7 @@ __releases(fiq->waitq.lock) | |||
1169 | ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT); | 1034 | ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT); |
1170 | arg.unique = req->in.h.unique; | 1035 | arg.unique = req->in.h.unique; |
1171 | 1036 | ||
1172 | spin_unlock(&fiq->waitq.lock); | 1037 | spin_unlock(&fiq->lock); |
1173 | if (nbytes < reqsize) | 1038 | if (nbytes < reqsize) |
1174 | return -EINVAL; | 1039 | return -EINVAL; |
1175 | 1040 | ||
@@ -1181,9 +1046,9 @@ __releases(fiq->waitq.lock) | |||
1181 | return err ? err : reqsize; | 1046 | return err ? err : reqsize; |
1182 | } | 1047 | } |
1183 | 1048 | ||
1184 | static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, | 1049 | struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, |
1185 | unsigned max, | 1050 | unsigned int max, |
1186 | unsigned *countp) | 1051 | unsigned int *countp) |
1187 | { | 1052 | { |
1188 | struct fuse_forget_link *head = fiq->forget_list_head.next; | 1053 | struct fuse_forget_link *head = fiq->forget_list_head.next; |
1189 | struct fuse_forget_link **newhead = &head; | 1054 | struct fuse_forget_link **newhead = &head; |
@@ -1202,14 +1067,15 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, | |||
1202 | 1067 | ||
1203 | return head; | 1068 | return head; |
1204 | } | 1069 | } |
1070 | EXPORT_SYMBOL(fuse_dequeue_forget); | ||
1205 | 1071 | ||
1206 | static int fuse_read_single_forget(struct fuse_iqueue *fiq, | 1072 | static int fuse_read_single_forget(struct fuse_iqueue *fiq, |
1207 | struct fuse_copy_state *cs, | 1073 | struct fuse_copy_state *cs, |
1208 | size_t nbytes) | 1074 | size_t nbytes) |
1209 | __releases(fiq->waitq.lock) | 1075 | __releases(fiq->lock) |
1210 | { | 1076 | { |
1211 | int err; | 1077 | int err; |
1212 | struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL); | 1078 | struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL); |
1213 | struct fuse_forget_in arg = { | 1079 | struct fuse_forget_in arg = { |
1214 | .nlookup = forget->forget_one.nlookup, | 1080 | .nlookup = forget->forget_one.nlookup, |
1215 | }; | 1081 | }; |
@@ -1220,7 +1086,7 @@ __releases(fiq->waitq.lock) | |||
1220 | .len = sizeof(ih) + sizeof(arg), | 1086 | .len = sizeof(ih) + sizeof(arg), |
1221 | }; | 1087 | }; |
1222 | 1088 | ||
1223 | spin_unlock(&fiq->waitq.lock); | 1089 | spin_unlock(&fiq->lock); |
1224 | kfree(forget); | 1090 | kfree(forget); |
1225 | if (nbytes < ih.len) | 1091 | if (nbytes < ih.len) |
1226 | return -EINVAL; | 1092 | return -EINVAL; |
@@ -1238,7 +1104,7 @@ __releases(fiq->waitq.lock) | |||
1238 | 1104 | ||
1239 | static int fuse_read_batch_forget(struct fuse_iqueue *fiq, | 1105 | static int fuse_read_batch_forget(struct fuse_iqueue *fiq, |
1240 | struct fuse_copy_state *cs, size_t nbytes) | 1106 | struct fuse_copy_state *cs, size_t nbytes) |
1241 | __releases(fiq->waitq.lock) | 1107 | __releases(fiq->lock) |
1242 | { | 1108 | { |
1243 | int err; | 1109 | int err; |
1244 | unsigned max_forgets; | 1110 | unsigned max_forgets; |
@@ -1252,13 +1118,13 @@ __releases(fiq->waitq.lock) | |||
1252 | }; | 1118 | }; |
1253 | 1119 | ||
1254 | if (nbytes < ih.len) { | 1120 | if (nbytes < ih.len) { |
1255 | spin_unlock(&fiq->waitq.lock); | 1121 | spin_unlock(&fiq->lock); |
1256 | return -EINVAL; | 1122 | return -EINVAL; |
1257 | } | 1123 | } |
1258 | 1124 | ||
1259 | max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); | 1125 | max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); |
1260 | head = dequeue_forget(fiq, max_forgets, &count); | 1126 | head = fuse_dequeue_forget(fiq, max_forgets, &count); |
1261 | spin_unlock(&fiq->waitq.lock); | 1127 | spin_unlock(&fiq->lock); |
1262 | 1128 | ||
1263 | arg.count = count; | 1129 | arg.count = count; |
1264 | ih.len += count * sizeof(struct fuse_forget_one); | 1130 | ih.len += count * sizeof(struct fuse_forget_one); |
@@ -1288,7 +1154,7 @@ __releases(fiq->waitq.lock) | |||
1288 | static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, | 1154 | static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, |
1289 | struct fuse_copy_state *cs, | 1155 | struct fuse_copy_state *cs, |
1290 | size_t nbytes) | 1156 | size_t nbytes) |
1291 | __releases(fiq->waitq.lock) | 1157 | __releases(fiq->lock) |
1292 | { | 1158 | { |
1293 | if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) | 1159 | if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) |
1294 | return fuse_read_single_forget(fiq, cs, nbytes); | 1160 | return fuse_read_single_forget(fiq, cs, nbytes); |
@@ -1302,7 +1168,7 @@ __releases(fiq->waitq.lock) | |||
1302 | * the pending list and copies request data to userspace buffer. If | 1168 | * the pending list and copies request data to userspace buffer. If |
1303 | * no reply is needed (FORGET) or request has been aborted or there | 1169 | * no reply is needed (FORGET) or request has been aborted or there |
1304 | * was an error during the copying then it's finished by calling | 1170 | * was an error during the copying then it's finished by calling |
1305 | * request_end(). Otherwise add it to the processing list, and set | 1171 | * fuse_request_end(). Otherwise add it to the processing list, and set |
1306 | * the 'sent' flag. | 1172 | * the 'sent' flag. |
1307 | */ | 1173 | */ |
1308 | static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, | 1174 | static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, |
@@ -1313,21 +1179,42 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, | |||
1313 | struct fuse_iqueue *fiq = &fc->iq; | 1179 | struct fuse_iqueue *fiq = &fc->iq; |
1314 | struct fuse_pqueue *fpq = &fud->pq; | 1180 | struct fuse_pqueue *fpq = &fud->pq; |
1315 | struct fuse_req *req; | 1181 | struct fuse_req *req; |
1316 | struct fuse_in *in; | 1182 | struct fuse_args *args; |
1317 | unsigned reqsize; | 1183 | unsigned reqsize; |
1318 | unsigned int hash; | 1184 | unsigned int hash; |
1319 | 1185 | ||
1186 | /* | ||
1187 | * Require sane minimum read buffer - that has capacity for fixed part | ||
1188 | * of any request header + negotiated max_write room for data. | ||
1189 | * | ||
1190 | * Historically libfuse reserves 4K for fixed header room, but e.g. | ||
1191 | * GlusterFS reserves only 80 bytes | ||
1192 | * | ||
1193 | * = `sizeof(fuse_in_header) + sizeof(fuse_write_in)` | ||
1194 | * | ||
1195 | * which is the absolute minimum any sane filesystem should be using | ||
1196 | * for header room. | ||
1197 | */ | ||
1198 | if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, | ||
1199 | sizeof(struct fuse_in_header) + | ||
1200 | sizeof(struct fuse_write_in) + | ||
1201 | fc->max_write)) | ||
1202 | return -EINVAL; | ||
1203 | |||
1320 | restart: | 1204 | restart: |
1321 | spin_lock(&fiq->waitq.lock); | 1205 | for (;;) { |
1322 | err = -EAGAIN; | 1206 | spin_lock(&fiq->lock); |
1323 | if ((file->f_flags & O_NONBLOCK) && fiq->connected && | 1207 | if (!fiq->connected || request_pending(fiq)) |
1324 | !request_pending(fiq)) | 1208 | break; |
1325 | goto err_unlock; | 1209 | spin_unlock(&fiq->lock); |
1326 | 1210 | ||
1327 | err = wait_event_interruptible_exclusive_locked(fiq->waitq, | 1211 | if (file->f_flags & O_NONBLOCK) |
1212 | return -EAGAIN; | ||
1213 | err = wait_event_interruptible_exclusive(fiq->waitq, | ||
1328 | !fiq->connected || request_pending(fiq)); | 1214 | !fiq->connected || request_pending(fiq)); |
1329 | if (err) | 1215 | if (err) |
1330 | goto err_unlock; | 1216 | return err; |
1217 | } | ||
1331 | 1218 | ||
1332 | if (!fiq->connected) { | 1219 | if (!fiq->connected) { |
1333 | err = fc->aborted ? -ECONNABORTED : -ENODEV; | 1220 | err = fc->aborted ? -ECONNABORTED : -ENODEV; |
@@ -1351,28 +1238,28 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, | |||
1351 | req = list_entry(fiq->pending.next, struct fuse_req, list); | 1238 | req = list_entry(fiq->pending.next, struct fuse_req, list); |
1352 | clear_bit(FR_PENDING, &req->flags); | 1239 | clear_bit(FR_PENDING, &req->flags); |
1353 | list_del_init(&req->list); | 1240 | list_del_init(&req->list); |
1354 | spin_unlock(&fiq->waitq.lock); | 1241 | spin_unlock(&fiq->lock); |
1355 | 1242 | ||
1356 | in = &req->in; | 1243 | args = req->args; |
1357 | reqsize = in->h.len; | 1244 | reqsize = req->in.h.len; |
1358 | 1245 | ||
1359 | /* If request is too large, reply with an error and restart the read */ | 1246 | /* If request is too large, reply with an error and restart the read */ |
1360 | if (nbytes < reqsize) { | 1247 | if (nbytes < reqsize) { |
1361 | req->out.h.error = -EIO; | 1248 | req->out.h.error = -EIO; |
1362 | /* SETXATTR is special, since it may contain too large data */ | 1249 | /* SETXATTR is special, since it may contain too large data */ |
1363 | if (in->h.opcode == FUSE_SETXATTR) | 1250 | if (args->opcode == FUSE_SETXATTR) |
1364 | req->out.h.error = -E2BIG; | 1251 | req->out.h.error = -E2BIG; |
1365 | request_end(fc, req); | 1252 | fuse_request_end(fc, req); |
1366 | goto restart; | 1253 | goto restart; |
1367 | } | 1254 | } |
1368 | spin_lock(&fpq->lock); | 1255 | spin_lock(&fpq->lock); |
1369 | list_add(&req->list, &fpq->io); | 1256 | list_add(&req->list, &fpq->io); |
1370 | spin_unlock(&fpq->lock); | 1257 | spin_unlock(&fpq->lock); |
1371 | cs->req = req; | 1258 | cs->req = req; |
1372 | err = fuse_copy_one(cs, &in->h, sizeof(in->h)); | 1259 | err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h)); |
1373 | if (!err) | 1260 | if (!err) |
1374 | err = fuse_copy_args(cs, in->numargs, in->argpages, | 1261 | err = fuse_copy_args(cs, args->in_numargs, args->in_pages, |
1375 | (struct fuse_arg *) in->args, 0); | 1262 | (struct fuse_arg *) args->in_args, 0); |
1376 | fuse_copy_finish(cs); | 1263 | fuse_copy_finish(cs); |
1377 | spin_lock(&fpq->lock); | 1264 | spin_lock(&fpq->lock); |
1378 | clear_bit(FR_LOCKED, &req->flags); | 1265 | clear_bit(FR_LOCKED, &req->flags); |
@@ -1405,11 +1292,11 @@ out_end: | |||
1405 | if (!test_bit(FR_PRIVATE, &req->flags)) | 1292 | if (!test_bit(FR_PRIVATE, &req->flags)) |
1406 | list_del_init(&req->list); | 1293 | list_del_init(&req->list); |
1407 | spin_unlock(&fpq->lock); | 1294 | spin_unlock(&fpq->lock); |
1408 | request_end(fc, req); | 1295 | fuse_request_end(fc, req); |
1409 | return err; | 1296 | return err; |
1410 | 1297 | ||
1411 | err_unlock: | 1298 | err_unlock: |
1412 | spin_unlock(&fiq->waitq.lock); | 1299 | spin_unlock(&fiq->lock); |
1413 | return err; | 1300 | return err; |
1414 | } | 1301 | } |
1415 | 1302 | ||
@@ -1728,9 +1615,19 @@ out_finish: | |||
1728 | return err; | 1615 | return err; |
1729 | } | 1616 | } |
1730 | 1617 | ||
1731 | static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) | 1618 | struct fuse_retrieve_args { |
1619 | struct fuse_args_pages ap; | ||
1620 | struct fuse_notify_retrieve_in inarg; | ||
1621 | }; | ||
1622 | |||
1623 | static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args, | ||
1624 | int error) | ||
1732 | { | 1625 | { |
1733 | release_pages(req->pages, req->num_pages); | 1626 | struct fuse_retrieve_args *ra = |
1627 | container_of(args, typeof(*ra), ap.args); | ||
1628 | |||
1629 | release_pages(ra->ap.pages, ra->ap.num_pages); | ||
1630 | kfree(ra); | ||
1734 | } | 1631 | } |
1735 | 1632 | ||
1736 | static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | 1633 | static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, |
@@ -1738,13 +1635,16 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | |||
1738 | { | 1635 | { |
1739 | int err; | 1636 | int err; |
1740 | struct address_space *mapping = inode->i_mapping; | 1637 | struct address_space *mapping = inode->i_mapping; |
1741 | struct fuse_req *req; | ||
1742 | pgoff_t index; | 1638 | pgoff_t index; |
1743 | loff_t file_size; | 1639 | loff_t file_size; |
1744 | unsigned int num; | 1640 | unsigned int num; |
1745 | unsigned int offset; | 1641 | unsigned int offset; |
1746 | size_t total_len = 0; | 1642 | size_t total_len = 0; |
1747 | unsigned int num_pages; | 1643 | unsigned int num_pages; |
1644 | struct fuse_retrieve_args *ra; | ||
1645 | size_t args_size = sizeof(*ra); | ||
1646 | struct fuse_args_pages *ap; | ||
1647 | struct fuse_args *args; | ||
1748 | 1648 | ||
1749 | offset = outarg->offset & ~PAGE_MASK; | 1649 | offset = outarg->offset & ~PAGE_MASK; |
1750 | file_size = i_size_read(inode); | 1650 | file_size = i_size_read(inode); |
@@ -1758,19 +1658,26 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | |||
1758 | num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1658 | num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1759 | num_pages = min(num_pages, fc->max_pages); | 1659 | num_pages = min(num_pages, fc->max_pages); |
1760 | 1660 | ||
1761 | req = fuse_get_req(fc, num_pages); | 1661 | args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0])); |
1762 | if (IS_ERR(req)) | ||
1763 | return PTR_ERR(req); | ||
1764 | 1662 | ||
1765 | req->in.h.opcode = FUSE_NOTIFY_REPLY; | 1663 | ra = kzalloc(args_size, GFP_KERNEL); |
1766 | req->in.h.nodeid = outarg->nodeid; | 1664 | if (!ra) |
1767 | req->in.numargs = 2; | 1665 | return -ENOMEM; |
1768 | req->in.argpages = 1; | 1666 | |
1769 | req->end = fuse_retrieve_end; | 1667 | ap = &ra->ap; |
1668 | ap->pages = (void *) (ra + 1); | ||
1669 | ap->descs = (void *) (ap->pages + num_pages); | ||
1670 | |||
1671 | args = &ap->args; | ||
1672 | args->nodeid = outarg->nodeid; | ||
1673 | args->opcode = FUSE_NOTIFY_REPLY; | ||
1674 | args->in_numargs = 2; | ||
1675 | args->in_pages = true; | ||
1676 | args->end = fuse_retrieve_end; | ||
1770 | 1677 | ||
1771 | index = outarg->offset >> PAGE_SHIFT; | 1678 | index = outarg->offset >> PAGE_SHIFT; |
1772 | 1679 | ||
1773 | while (num && req->num_pages < num_pages) { | 1680 | while (num && ap->num_pages < num_pages) { |
1774 | struct page *page; | 1681 | struct page *page; |
1775 | unsigned int this_num; | 1682 | unsigned int this_num; |
1776 | 1683 | ||
@@ -1779,27 +1686,25 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | |||
1779 | break; | 1686 | break; |
1780 | 1687 | ||
1781 | this_num = min_t(unsigned, num, PAGE_SIZE - offset); | 1688 | this_num = min_t(unsigned, num, PAGE_SIZE - offset); |
1782 | req->pages[req->num_pages] = page; | 1689 | ap->pages[ap->num_pages] = page; |
1783 | req->page_descs[req->num_pages].offset = offset; | 1690 | ap->descs[ap->num_pages].offset = offset; |
1784 | req->page_descs[req->num_pages].length = this_num; | 1691 | ap->descs[ap->num_pages].length = this_num; |
1785 | req->num_pages++; | 1692 | ap->num_pages++; |
1786 | 1693 | ||
1787 | offset = 0; | 1694 | offset = 0; |
1788 | num -= this_num; | 1695 | num -= this_num; |
1789 | total_len += this_num; | 1696 | total_len += this_num; |
1790 | index++; | 1697 | index++; |
1791 | } | 1698 | } |
1792 | req->misc.retrieve_in.offset = outarg->offset; | 1699 | ra->inarg.offset = outarg->offset; |
1793 | req->misc.retrieve_in.size = total_len; | 1700 | ra->inarg.size = total_len; |
1794 | req->in.args[0].size = sizeof(req->misc.retrieve_in); | 1701 | args->in_args[0].size = sizeof(ra->inarg); |
1795 | req->in.args[0].value = &req->misc.retrieve_in; | 1702 | args->in_args[0].value = &ra->inarg; |
1796 | req->in.args[1].size = total_len; | 1703 | args->in_args[1].size = total_len; |
1797 | 1704 | ||
1798 | err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); | 1705 | err = fuse_simple_notify_reply(fc, args, outarg->notify_unique); |
1799 | if (err) { | 1706 | if (err) |
1800 | fuse_retrieve_end(fc, req); | 1707 | fuse_retrieve_end(fc, args, err); |
1801 | fuse_put_request(fc, req); | ||
1802 | } | ||
1803 | 1708 | ||
1804 | return err; | 1709 | return err; |
1805 | } | 1710 | } |
@@ -1885,27 +1790,25 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) | |||
1885 | return NULL; | 1790 | return NULL; |
1886 | } | 1791 | } |
1887 | 1792 | ||
1888 | static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, | 1793 | static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args, |
1889 | unsigned nbytes) | 1794 | unsigned nbytes) |
1890 | { | 1795 | { |
1891 | unsigned reqsize = sizeof(struct fuse_out_header); | 1796 | unsigned reqsize = sizeof(struct fuse_out_header); |
1892 | 1797 | ||
1893 | if (out->h.error) | 1798 | reqsize += fuse_len_args(args->out_numargs, args->out_args); |
1894 | return nbytes != reqsize ? -EINVAL : 0; | ||
1895 | |||
1896 | reqsize += len_args(out->numargs, out->args); | ||
1897 | 1799 | ||
1898 | if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) | 1800 | if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar)) |
1899 | return -EINVAL; | 1801 | return -EINVAL; |
1900 | else if (reqsize > nbytes) { | 1802 | else if (reqsize > nbytes) { |
1901 | struct fuse_arg *lastarg = &out->args[out->numargs-1]; | 1803 | struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1]; |
1902 | unsigned diffsize = reqsize - nbytes; | 1804 | unsigned diffsize = reqsize - nbytes; |
1805 | |||
1903 | if (diffsize > lastarg->size) | 1806 | if (diffsize > lastarg->size) |
1904 | return -EINVAL; | 1807 | return -EINVAL; |
1905 | lastarg->size -= diffsize; | 1808 | lastarg->size -= diffsize; |
1906 | } | 1809 | } |
1907 | return fuse_copy_args(cs, out->numargs, out->argpages, out->args, | 1810 | return fuse_copy_args(cs, args->out_numargs, args->out_pages, |
1908 | out->page_zeroing); | 1811 | args->out_args, args->page_zeroing); |
1909 | } | 1812 | } |
1910 | 1813 | ||
1911 | /* | 1814 | /* |
@@ -1913,7 +1816,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, | |||
1913 | * the write buffer. The request is then searched on the processing | 1816 | * the write buffer. The request is then searched on the processing |
1914 | * list by the unique ID found in the header. If found, then remove | 1817 | * list by the unique ID found in the header. If found, then remove |
1915 | * it from the list and copy the rest of the buffer to the request. | 1818 | * it from the list and copy the rest of the buffer to the request. |
1916 | * The request is finished by calling request_end() | 1819 | * The request is finished by calling fuse_request_end(). |
1917 | */ | 1820 | */ |
1918 | static ssize_t fuse_dev_do_write(struct fuse_dev *fud, | 1821 | static ssize_t fuse_dev_do_write(struct fuse_dev *fud, |
1919 | struct fuse_copy_state *cs, size_t nbytes) | 1822 | struct fuse_copy_state *cs, size_t nbytes) |
@@ -1984,10 +1887,13 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, | |||
1984 | set_bit(FR_LOCKED, &req->flags); | 1887 | set_bit(FR_LOCKED, &req->flags); |
1985 | spin_unlock(&fpq->lock); | 1888 | spin_unlock(&fpq->lock); |
1986 | cs->req = req; | 1889 | cs->req = req; |
1987 | if (!req->out.page_replace) | 1890 | if (!req->args->page_replace) |
1988 | cs->move_pages = 0; | 1891 | cs->move_pages = 0; |
1989 | 1892 | ||
1990 | err = copy_out_args(cs, &req->out, nbytes); | 1893 | if (oh.error) |
1894 | err = nbytes != sizeof(oh) ? -EINVAL : 0; | ||
1895 | else | ||
1896 | err = copy_out_args(cs, req->args, nbytes); | ||
1991 | fuse_copy_finish(cs); | 1897 | fuse_copy_finish(cs); |
1992 | 1898 | ||
1993 | spin_lock(&fpq->lock); | 1899 | spin_lock(&fpq->lock); |
@@ -2000,7 +1906,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, | |||
2000 | list_del_init(&req->list); | 1906 | list_del_init(&req->list); |
2001 | spin_unlock(&fpq->lock); | 1907 | spin_unlock(&fpq->lock); |
2002 | 1908 | ||
2003 | request_end(fc, req); | 1909 | fuse_request_end(fc, req); |
2004 | out: | 1910 | out: |
2005 | return err ? err : nbytes; | 1911 | return err ? err : nbytes; |
2006 | 1912 | ||
@@ -2121,12 +2027,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) | |||
2121 | fiq = &fud->fc->iq; | 2027 | fiq = &fud->fc->iq; |
2122 | poll_wait(file, &fiq->waitq, wait); | 2028 | poll_wait(file, &fiq->waitq, wait); |
2123 | 2029 | ||
2124 | spin_lock(&fiq->waitq.lock); | 2030 | spin_lock(&fiq->lock); |
2125 | if (!fiq->connected) | 2031 | if (!fiq->connected) |
2126 | mask = EPOLLERR; | 2032 | mask = EPOLLERR; |
2127 | else if (request_pending(fiq)) | 2033 | else if (request_pending(fiq)) |
2128 | mask |= EPOLLIN | EPOLLRDNORM; | 2034 | mask |= EPOLLIN | EPOLLRDNORM; |
2129 | spin_unlock(&fiq->waitq.lock); | 2035 | spin_unlock(&fiq->lock); |
2130 | 2036 | ||
2131 | return mask; | 2037 | return mask; |
2132 | } | 2038 | } |
@@ -2140,7 +2046,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) | |||
2140 | req->out.h.error = -ECONNABORTED; | 2046 | req->out.h.error = -ECONNABORTED; |
2141 | clear_bit(FR_SENT, &req->flags); | 2047 | clear_bit(FR_SENT, &req->flags); |
2142 | list_del_init(&req->list); | 2048 | list_del_init(&req->list); |
2143 | request_end(fc, req); | 2049 | fuse_request_end(fc, req); |
2144 | } | 2050 | } |
2145 | } | 2051 | } |
2146 | 2052 | ||
@@ -2221,15 +2127,15 @@ void fuse_abort_conn(struct fuse_conn *fc) | |||
2221 | flush_bg_queue(fc); | 2127 | flush_bg_queue(fc); |
2222 | spin_unlock(&fc->bg_lock); | 2128 | spin_unlock(&fc->bg_lock); |
2223 | 2129 | ||
2224 | spin_lock(&fiq->waitq.lock); | 2130 | spin_lock(&fiq->lock); |
2225 | fiq->connected = 0; | 2131 | fiq->connected = 0; |
2226 | list_for_each_entry(req, &fiq->pending, list) | 2132 | list_for_each_entry(req, &fiq->pending, list) |
2227 | clear_bit(FR_PENDING, &req->flags); | 2133 | clear_bit(FR_PENDING, &req->flags); |
2228 | list_splice_tail_init(&fiq->pending, &to_end); | 2134 | list_splice_tail_init(&fiq->pending, &to_end); |
2229 | while (forget_pending(fiq)) | 2135 | while (forget_pending(fiq)) |
2230 | kfree(dequeue_forget(fiq, 1, NULL)); | 2136 | kfree(fuse_dequeue_forget(fiq, 1, NULL)); |
2231 | wake_up_all_locked(&fiq->waitq); | 2137 | wake_up_all(&fiq->waitq); |
2232 | spin_unlock(&fiq->waitq.lock); | 2138 | spin_unlock(&fiq->lock); |
2233 | kill_fasync(&fiq->fasync, SIGIO, POLL_IN); | 2139 | kill_fasync(&fiq->fasync, SIGIO, POLL_IN); |
2234 | end_polls(fc); | 2140 | end_polls(fc); |
2235 | wake_up_all(&fc->blocked_waitq); | 2141 | wake_up_all(&fc->blocked_waitq); |
@@ -2296,7 +2202,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) | |||
2296 | if (new->private_data) | 2202 | if (new->private_data) |
2297 | return -EINVAL; | 2203 | return -EINVAL; |
2298 | 2204 | ||
2299 | fud = fuse_dev_alloc(fc); | 2205 | fud = fuse_dev_alloc_install(fc); |
2300 | if (!fud) | 2206 | if (!fud) |
2301 | return -ENOMEM; | 2207 | return -ENOMEM; |
2302 | 2208 | ||
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dd0f64f7bc06..d572c900bb0f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -24,20 +24,54 @@ static void fuse_advise_use_readdirplus(struct inode *dir) | |||
24 | set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); | 24 | set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); |
25 | } | 25 | } |
26 | 26 | ||
27 | #if BITS_PER_LONG >= 64 | ||
28 | static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) | ||
29 | { | ||
30 | entry->d_fsdata = (void *) time; | ||
31 | } | ||
32 | |||
33 | static inline u64 fuse_dentry_time(const struct dentry *entry) | ||
34 | { | ||
35 | return (u64)entry->d_fsdata; | ||
36 | } | ||
37 | |||
38 | #else | ||
27 | union fuse_dentry { | 39 | union fuse_dentry { |
28 | u64 time; | 40 | u64 time; |
29 | struct rcu_head rcu; | 41 | struct rcu_head rcu; |
30 | }; | 42 | }; |
31 | 43 | ||
32 | static inline void fuse_dentry_settime(struct dentry *entry, u64 time) | 44 | static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) |
33 | { | 45 | { |
34 | ((union fuse_dentry *) entry->d_fsdata)->time = time; | 46 | ((union fuse_dentry *) dentry->d_fsdata)->time = time; |
35 | } | 47 | } |
36 | 48 | ||
37 | static inline u64 fuse_dentry_time(struct dentry *entry) | 49 | static inline u64 fuse_dentry_time(const struct dentry *entry) |
38 | { | 50 | { |
39 | return ((union fuse_dentry *) entry->d_fsdata)->time; | 51 | return ((union fuse_dentry *) entry->d_fsdata)->time; |
40 | } | 52 | } |
53 | #endif | ||
54 | |||
55 | static void fuse_dentry_settime(struct dentry *dentry, u64 time) | ||
56 | { | ||
57 | struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); | ||
58 | bool delete = !time && fc->delete_stale; | ||
59 | /* | ||
60 | * Mess with DCACHE_OP_DELETE because dput() will be faster without it. | ||
61 | * Don't care about races, either way it's just an optimization | ||
62 | */ | ||
63 | if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) || | ||
64 | (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) { | ||
65 | spin_lock(&dentry->d_lock); | ||
66 | if (!delete) | ||
67 | dentry->d_flags &= ~DCACHE_OP_DELETE; | ||
68 | else | ||
69 | dentry->d_flags |= DCACHE_OP_DELETE; | ||
70 | spin_unlock(&dentry->d_lock); | ||
71 | } | ||
72 | |||
73 | __fuse_dentry_settime(dentry, time); | ||
74 | } | ||
41 | 75 | ||
42 | /* | 76 | /* |
43 | * FUSE caches dentries and attributes with separate timeout. The | 77 | * FUSE caches dentries and attributes with separate timeout. The |
@@ -139,14 +173,14 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, | |||
139 | struct fuse_entry_out *outarg) | 173 | struct fuse_entry_out *outarg) |
140 | { | 174 | { |
141 | memset(outarg, 0, sizeof(struct fuse_entry_out)); | 175 | memset(outarg, 0, sizeof(struct fuse_entry_out)); |
142 | args->in.h.opcode = FUSE_LOOKUP; | 176 | args->opcode = FUSE_LOOKUP; |
143 | args->in.h.nodeid = nodeid; | 177 | args->nodeid = nodeid; |
144 | args->in.numargs = 1; | 178 | args->in_numargs = 1; |
145 | args->in.args[0].size = name->len + 1; | 179 | args->in_args[0].size = name->len + 1; |
146 | args->in.args[0].value = name->name; | 180 | args->in_args[0].value = name->name; |
147 | args->out.numargs = 1; | 181 | args->out_numargs = 1; |
148 | args->out.args[0].size = sizeof(struct fuse_entry_out); | 182 | args->out_args[0].size = sizeof(struct fuse_entry_out); |
149 | args->out.args[0].value = outarg; | 183 | args->out_args[0].value = outarg; |
150 | } | 184 | } |
151 | 185 | ||
152 | /* | 186 | /* |
@@ -242,9 +276,11 @@ invalid: | |||
242 | goto out; | 276 | goto out; |
243 | } | 277 | } |
244 | 278 | ||
279 | #if BITS_PER_LONG < 64 | ||
245 | static int fuse_dentry_init(struct dentry *dentry) | 280 | static int fuse_dentry_init(struct dentry *dentry) |
246 | { | 281 | { |
247 | dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL); | 282 | dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), |
283 | GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); | ||
248 | 284 | ||
249 | return dentry->d_fsdata ? 0 : -ENOMEM; | 285 | return dentry->d_fsdata ? 0 : -ENOMEM; |
250 | } | 286 | } |
@@ -254,16 +290,27 @@ static void fuse_dentry_release(struct dentry *dentry) | |||
254 | 290 | ||
255 | kfree_rcu(fd, rcu); | 291 | kfree_rcu(fd, rcu); |
256 | } | 292 | } |
293 | #endif | ||
294 | |||
295 | static int fuse_dentry_delete(const struct dentry *dentry) | ||
296 | { | ||
297 | return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); | ||
298 | } | ||
257 | 299 | ||
258 | const struct dentry_operations fuse_dentry_operations = { | 300 | const struct dentry_operations fuse_dentry_operations = { |
259 | .d_revalidate = fuse_dentry_revalidate, | 301 | .d_revalidate = fuse_dentry_revalidate, |
302 | .d_delete = fuse_dentry_delete, | ||
303 | #if BITS_PER_LONG < 64 | ||
260 | .d_init = fuse_dentry_init, | 304 | .d_init = fuse_dentry_init, |
261 | .d_release = fuse_dentry_release, | 305 | .d_release = fuse_dentry_release, |
306 | #endif | ||
262 | }; | 307 | }; |
263 | 308 | ||
264 | const struct dentry_operations fuse_root_dentry_operations = { | 309 | const struct dentry_operations fuse_root_dentry_operations = { |
310 | #if BITS_PER_LONG < 64 | ||
265 | .d_init = fuse_dentry_init, | 311 | .d_init = fuse_dentry_init, |
266 | .d_release = fuse_dentry_release, | 312 | .d_release = fuse_dentry_release, |
313 | #endif | ||
267 | }; | 314 | }; |
268 | 315 | ||
269 | int fuse_valid_type(int m) | 316 | int fuse_valid_type(int m) |
@@ -410,18 +457,18 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, | |||
410 | inarg.flags = flags; | 457 | inarg.flags = flags; |
411 | inarg.mode = mode; | 458 | inarg.mode = mode; |
412 | inarg.umask = current_umask(); | 459 | inarg.umask = current_umask(); |
413 | args.in.h.opcode = FUSE_CREATE; | 460 | args.opcode = FUSE_CREATE; |
414 | args.in.h.nodeid = get_node_id(dir); | 461 | args.nodeid = get_node_id(dir); |
415 | args.in.numargs = 2; | 462 | args.in_numargs = 2; |
416 | args.in.args[0].size = sizeof(inarg); | 463 | args.in_args[0].size = sizeof(inarg); |
417 | args.in.args[0].value = &inarg; | 464 | args.in_args[0].value = &inarg; |
418 | args.in.args[1].size = entry->d_name.len + 1; | 465 | args.in_args[1].size = entry->d_name.len + 1; |
419 | args.in.args[1].value = entry->d_name.name; | 466 | args.in_args[1].value = entry->d_name.name; |
420 | args.out.numargs = 2; | 467 | args.out_numargs = 2; |
421 | args.out.args[0].size = sizeof(outentry); | 468 | args.out_args[0].size = sizeof(outentry); |
422 | args.out.args[0].value = &outentry; | 469 | args.out_args[0].value = &outentry; |
423 | args.out.args[1].size = sizeof(outopen); | 470 | args.out_args[1].size = sizeof(outopen); |
424 | args.out.args[1].value = &outopen; | 471 | args.out_args[1].value = &outopen; |
425 | err = fuse_simple_request(fc, &args); | 472 | err = fuse_simple_request(fc, &args); |
426 | if (err) | 473 | if (err) |
427 | goto out_free_ff; | 474 | goto out_free_ff; |
@@ -526,10 +573,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, | |||
526 | return -ENOMEM; | 573 | return -ENOMEM; |
527 | 574 | ||
528 | memset(&outarg, 0, sizeof(outarg)); | 575 | memset(&outarg, 0, sizeof(outarg)); |
529 | args->in.h.nodeid = get_node_id(dir); | 576 | args->nodeid = get_node_id(dir); |
530 | args->out.numargs = 1; | 577 | args->out_numargs = 1; |
531 | args->out.args[0].size = sizeof(outarg); | 578 | args->out_args[0].size = sizeof(outarg); |
532 | args->out.args[0].value = &outarg; | 579 | args->out_args[0].value = &outarg; |
533 | err = fuse_simple_request(fc, args); | 580 | err = fuse_simple_request(fc, args); |
534 | if (err) | 581 | if (err) |
535 | goto out_put_forget_req; | 582 | goto out_put_forget_req; |
@@ -582,12 +629,12 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, | |||
582 | inarg.mode = mode; | 629 | inarg.mode = mode; |
583 | inarg.rdev = new_encode_dev(rdev); | 630 | inarg.rdev = new_encode_dev(rdev); |
584 | inarg.umask = current_umask(); | 631 | inarg.umask = current_umask(); |
585 | args.in.h.opcode = FUSE_MKNOD; | 632 | args.opcode = FUSE_MKNOD; |
586 | args.in.numargs = 2; | 633 | args.in_numargs = 2; |
587 | args.in.args[0].size = sizeof(inarg); | 634 | args.in_args[0].size = sizeof(inarg); |
588 | args.in.args[0].value = &inarg; | 635 | args.in_args[0].value = &inarg; |
589 | args.in.args[1].size = entry->d_name.len + 1; | 636 | args.in_args[1].size = entry->d_name.len + 1; |
590 | args.in.args[1].value = entry->d_name.name; | 637 | args.in_args[1].value = entry->d_name.name; |
591 | return create_new_entry(fc, &args, dir, entry, mode); | 638 | return create_new_entry(fc, &args, dir, entry, mode); |
592 | } | 639 | } |
593 | 640 | ||
@@ -609,12 +656,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) | |||
609 | memset(&inarg, 0, sizeof(inarg)); | 656 | memset(&inarg, 0, sizeof(inarg)); |
610 | inarg.mode = mode; | 657 | inarg.mode = mode; |
611 | inarg.umask = current_umask(); | 658 | inarg.umask = current_umask(); |
612 | args.in.h.opcode = FUSE_MKDIR; | 659 | args.opcode = FUSE_MKDIR; |
613 | args.in.numargs = 2; | 660 | args.in_numargs = 2; |
614 | args.in.args[0].size = sizeof(inarg); | 661 | args.in_args[0].size = sizeof(inarg); |
615 | args.in.args[0].value = &inarg; | 662 | args.in_args[0].value = &inarg; |
616 | args.in.args[1].size = entry->d_name.len + 1; | 663 | args.in_args[1].size = entry->d_name.len + 1; |
617 | args.in.args[1].value = entry->d_name.name; | 664 | args.in_args[1].value = entry->d_name.name; |
618 | return create_new_entry(fc, &args, dir, entry, S_IFDIR); | 665 | return create_new_entry(fc, &args, dir, entry, S_IFDIR); |
619 | } | 666 | } |
620 | 667 | ||
@@ -625,12 +672,12 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, | |||
625 | unsigned len = strlen(link) + 1; | 672 | unsigned len = strlen(link) + 1; |
626 | FUSE_ARGS(args); | 673 | FUSE_ARGS(args); |
627 | 674 | ||
628 | args.in.h.opcode = FUSE_SYMLINK; | 675 | args.opcode = FUSE_SYMLINK; |
629 | args.in.numargs = 2; | 676 | args.in_numargs = 2; |
630 | args.in.args[0].size = entry->d_name.len + 1; | 677 | args.in_args[0].size = entry->d_name.len + 1; |
631 | args.in.args[0].value = entry->d_name.name; | 678 | args.in_args[0].value = entry->d_name.name; |
632 | args.in.args[1].size = len; | 679 | args.in_args[1].size = len; |
633 | args.in.args[1].value = link; | 680 | args.in_args[1].value = link; |
634 | return create_new_entry(fc, &args, dir, entry, S_IFLNK); | 681 | return create_new_entry(fc, &args, dir, entry, S_IFLNK); |
635 | } | 682 | } |
636 | 683 | ||
@@ -648,11 +695,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) | |||
648 | struct fuse_conn *fc = get_fuse_conn(dir); | 695 | struct fuse_conn *fc = get_fuse_conn(dir); |
649 | FUSE_ARGS(args); | 696 | FUSE_ARGS(args); |
650 | 697 | ||
651 | args.in.h.opcode = FUSE_UNLINK; | 698 | args.opcode = FUSE_UNLINK; |
652 | args.in.h.nodeid = get_node_id(dir); | 699 | args.nodeid = get_node_id(dir); |
653 | args.in.numargs = 1; | 700 | args.in_numargs = 1; |
654 | args.in.args[0].size = entry->d_name.len + 1; | 701 | args.in_args[0].size = entry->d_name.len + 1; |
655 | args.in.args[0].value = entry->d_name.name; | 702 | args.in_args[0].value = entry->d_name.name; |
656 | err = fuse_simple_request(fc, &args); | 703 | err = fuse_simple_request(fc, &args); |
657 | if (!err) { | 704 | if (!err) { |
658 | struct inode *inode = d_inode(entry); | 705 | struct inode *inode = d_inode(entry); |
@@ -684,11 +731,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) | |||
684 | struct fuse_conn *fc = get_fuse_conn(dir); | 731 | struct fuse_conn *fc = get_fuse_conn(dir); |
685 | FUSE_ARGS(args); | 732 | FUSE_ARGS(args); |
686 | 733 | ||
687 | args.in.h.opcode = FUSE_RMDIR; | 734 | args.opcode = FUSE_RMDIR; |
688 | args.in.h.nodeid = get_node_id(dir); | 735 | args.nodeid = get_node_id(dir); |
689 | args.in.numargs = 1; | 736 | args.in_numargs = 1; |
690 | args.in.args[0].size = entry->d_name.len + 1; | 737 | args.in_args[0].size = entry->d_name.len + 1; |
691 | args.in.args[0].value = entry->d_name.name; | 738 | args.in_args[0].value = entry->d_name.name; |
692 | err = fuse_simple_request(fc, &args); | 739 | err = fuse_simple_request(fc, &args); |
693 | if (!err) { | 740 | if (!err) { |
694 | clear_nlink(d_inode(entry)); | 741 | clear_nlink(d_inode(entry)); |
@@ -711,15 +758,15 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, | |||
711 | memset(&inarg, 0, argsize); | 758 | memset(&inarg, 0, argsize); |
712 | inarg.newdir = get_node_id(newdir); | 759 | inarg.newdir = get_node_id(newdir); |
713 | inarg.flags = flags; | 760 | inarg.flags = flags; |
714 | args.in.h.opcode = opcode; | 761 | args.opcode = opcode; |
715 | args.in.h.nodeid = get_node_id(olddir); | 762 | args.nodeid = get_node_id(olddir); |
716 | args.in.numargs = 3; | 763 | args.in_numargs = 3; |
717 | args.in.args[0].size = argsize; | 764 | args.in_args[0].size = argsize; |
718 | args.in.args[0].value = &inarg; | 765 | args.in_args[0].value = &inarg; |
719 | args.in.args[1].size = oldent->d_name.len + 1; | 766 | args.in_args[1].size = oldent->d_name.len + 1; |
720 | args.in.args[1].value = oldent->d_name.name; | 767 | args.in_args[1].value = oldent->d_name.name; |
721 | args.in.args[2].size = newent->d_name.len + 1; | 768 | args.in_args[2].size = newent->d_name.len + 1; |
722 | args.in.args[2].value = newent->d_name.name; | 769 | args.in_args[2].value = newent->d_name.name; |
723 | err = fuse_simple_request(fc, &args); | 770 | err = fuse_simple_request(fc, &args); |
724 | if (!err) { | 771 | if (!err) { |
725 | /* ctime changes */ | 772 | /* ctime changes */ |
@@ -796,12 +843,12 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, | |||
796 | 843 | ||
797 | memset(&inarg, 0, sizeof(inarg)); | 844 | memset(&inarg, 0, sizeof(inarg)); |
798 | inarg.oldnodeid = get_node_id(inode); | 845 | inarg.oldnodeid = get_node_id(inode); |
799 | args.in.h.opcode = FUSE_LINK; | 846 | args.opcode = FUSE_LINK; |
800 | args.in.numargs = 2; | 847 | args.in_numargs = 2; |
801 | args.in.args[0].size = sizeof(inarg); | 848 | args.in_args[0].size = sizeof(inarg); |
802 | args.in.args[0].value = &inarg; | 849 | args.in_args[0].value = &inarg; |
803 | args.in.args[1].size = newent->d_name.len + 1; | 850 | args.in_args[1].size = newent->d_name.len + 1; |
804 | args.in.args[1].value = newent->d_name.name; | 851 | args.in_args[1].value = newent->d_name.name; |
805 | err = create_new_entry(fc, &args, newdir, newent, inode->i_mode); | 852 | err = create_new_entry(fc, &args, newdir, newent, inode->i_mode); |
806 | /* Contrary to "normal" filesystems it can happen that link | 853 | /* Contrary to "normal" filesystems it can happen that link |
807 | makes two "logical" inodes point to the same "physical" | 854 | makes two "logical" inodes point to the same "physical" |
@@ -884,14 +931,14 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, | |||
884 | inarg.getattr_flags |= FUSE_GETATTR_FH; | 931 | inarg.getattr_flags |= FUSE_GETATTR_FH; |
885 | inarg.fh = ff->fh; | 932 | inarg.fh = ff->fh; |
886 | } | 933 | } |
887 | args.in.h.opcode = FUSE_GETATTR; | 934 | args.opcode = FUSE_GETATTR; |
888 | args.in.h.nodeid = get_node_id(inode); | 935 | args.nodeid = get_node_id(inode); |
889 | args.in.numargs = 1; | 936 | args.in_numargs = 1; |
890 | args.in.args[0].size = sizeof(inarg); | 937 | args.in_args[0].size = sizeof(inarg); |
891 | args.in.args[0].value = &inarg; | 938 | args.in_args[0].value = &inarg; |
892 | args.out.numargs = 1; | 939 | args.out_numargs = 1; |
893 | args.out.args[0].size = sizeof(outarg); | 940 | args.out_args[0].size = sizeof(outarg); |
894 | args.out.args[0].value = &outarg; | 941 | args.out_args[0].value = &outarg; |
895 | err = fuse_simple_request(fc, &args); | 942 | err = fuse_simple_request(fc, &args); |
896 | if (!err) { | 943 | if (!err) { |
897 | if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { | 944 | if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { |
@@ -1056,11 +1103,11 @@ static int fuse_access(struct inode *inode, int mask) | |||
1056 | 1103 | ||
1057 | memset(&inarg, 0, sizeof(inarg)); | 1104 | memset(&inarg, 0, sizeof(inarg)); |
1058 | inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); | 1105 | inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); |
1059 | args.in.h.opcode = FUSE_ACCESS; | 1106 | args.opcode = FUSE_ACCESS; |
1060 | args.in.h.nodeid = get_node_id(inode); | 1107 | args.nodeid = get_node_id(inode); |
1061 | args.in.numargs = 1; | 1108 | args.in_numargs = 1; |
1062 | args.in.args[0].size = sizeof(inarg); | 1109 | args.in_args[0].size = sizeof(inarg); |
1063 | args.in.args[0].value = &inarg; | 1110 | args.in_args[0].value = &inarg; |
1064 | err = fuse_simple_request(fc, &args); | 1111 | err = fuse_simple_request(fc, &args); |
1065 | if (err == -ENOSYS) { | 1112 | if (err == -ENOSYS) { |
1066 | fc->no_access = 1; | 1113 | fc->no_access = 1; |
@@ -1152,38 +1199,36 @@ static int fuse_permission(struct inode *inode, int mask) | |||
1152 | static int fuse_readlink_page(struct inode *inode, struct page *page) | 1199 | static int fuse_readlink_page(struct inode *inode, struct page *page) |
1153 | { | 1200 | { |
1154 | struct fuse_conn *fc = get_fuse_conn(inode); | 1201 | struct fuse_conn *fc = get_fuse_conn(inode); |
1155 | struct fuse_req *req; | 1202 | struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; |
1156 | int err; | 1203 | struct fuse_args_pages ap = { |
1204 | .num_pages = 1, | ||
1205 | .pages = &page, | ||
1206 | .descs = &desc, | ||
1207 | }; | ||
1208 | char *link; | ||
1209 | ssize_t res; | ||
1210 | |||
1211 | ap.args.opcode = FUSE_READLINK; | ||
1212 | ap.args.nodeid = get_node_id(inode); | ||
1213 | ap.args.out_pages = true; | ||
1214 | ap.args.out_argvar = true; | ||
1215 | ap.args.page_zeroing = true; | ||
1216 | ap.args.out_numargs = 1; | ||
1217 | ap.args.out_args[0].size = desc.length; | ||
1218 | res = fuse_simple_request(fc, &ap.args); | ||
1157 | 1219 | ||
1158 | req = fuse_get_req(fc, 1); | 1220 | fuse_invalidate_atime(inode); |
1159 | if (IS_ERR(req)) | ||
1160 | return PTR_ERR(req); | ||
1161 | |||
1162 | req->out.page_zeroing = 1; | ||
1163 | req->out.argpages = 1; | ||
1164 | req->num_pages = 1; | ||
1165 | req->pages[0] = page; | ||
1166 | req->page_descs[0].length = PAGE_SIZE - 1; | ||
1167 | req->in.h.opcode = FUSE_READLINK; | ||
1168 | req->in.h.nodeid = get_node_id(inode); | ||
1169 | req->out.argvar = 1; | ||
1170 | req->out.numargs = 1; | ||
1171 | req->out.args[0].size = PAGE_SIZE - 1; | ||
1172 | fuse_request_send(fc, req); | ||
1173 | err = req->out.h.error; | ||
1174 | 1221 | ||
1175 | if (!err) { | 1222 | if (res < 0) |
1176 | char *link = page_address(page); | 1223 | return res; |
1177 | size_t len = req->out.args[0].size; | ||
1178 | 1224 | ||
1179 | BUG_ON(len >= PAGE_SIZE); | 1225 | if (WARN_ON(res >= PAGE_SIZE)) |
1180 | link[len] = '\0'; | 1226 | return -EIO; |
1181 | } | ||
1182 | 1227 | ||
1183 | fuse_put_request(fc, req); | 1228 | link = page_address(page); |
1184 | fuse_invalidate_atime(inode); | 1229 | link[res] = '\0'; |
1185 | 1230 | ||
1186 | return err; | 1231 | return 0; |
1187 | } | 1232 | } |
1188 | 1233 | ||
1189 | static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, | 1234 | static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, |
@@ -1383,14 +1428,14 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, | |||
1383 | struct fuse_setattr_in *inarg_p, | 1428 | struct fuse_setattr_in *inarg_p, |
1384 | struct fuse_attr_out *outarg_p) | 1429 | struct fuse_attr_out *outarg_p) |
1385 | { | 1430 | { |
1386 | args->in.h.opcode = FUSE_SETATTR; | 1431 | args->opcode = FUSE_SETATTR; |
1387 | args->in.h.nodeid = get_node_id(inode); | 1432 | args->nodeid = get_node_id(inode); |
1388 | args->in.numargs = 1; | 1433 | args->in_numargs = 1; |
1389 | args->in.args[0].size = sizeof(*inarg_p); | 1434 | args->in_args[0].size = sizeof(*inarg_p); |
1390 | args->in.args[0].value = inarg_p; | 1435 | args->in_args[0].value = inarg_p; |
1391 | args->out.numargs = 1; | 1436 | args->out_numargs = 1; |
1392 | args->out.args[0].size = sizeof(*outarg_p); | 1437 | args->out_args[0].size = sizeof(*outarg_p); |
1393 | args->out.args[0].value = outarg_p; | 1438 | args->out_args[0].value = outarg_p; |
1394 | } | 1439 | } |
1395 | 1440 | ||
1396 | /* | 1441 | /* |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5ae2828beb00..0f0225686aee 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -19,6 +19,18 @@ | |||
19 | #include <linux/falloc.h> | 19 | #include <linux/falloc.h> |
20 | #include <linux/uio.h> | 20 | #include <linux/uio.h> |
21 | 21 | ||
22 | static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, | ||
23 | struct fuse_page_desc **desc) | ||
24 | { | ||
25 | struct page **pages; | ||
26 | |||
27 | pages = kzalloc(npages * (sizeof(struct page *) + | ||
28 | sizeof(struct fuse_page_desc)), flags); | ||
29 | *desc = (void *) (pages + npages); | ||
30 | |||
31 | return pages; | ||
32 | } | ||
33 | |||
22 | static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | 34 | static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
23 | int opcode, struct fuse_open_out *outargp) | 35 | int opcode, struct fuse_open_out *outargp) |
24 | { | 36 | { |
@@ -29,29 +41,36 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | |||
29 | inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); | 41 | inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); |
30 | if (!fc->atomic_o_trunc) | 42 | if (!fc->atomic_o_trunc) |
31 | inarg.flags &= ~O_TRUNC; | 43 | inarg.flags &= ~O_TRUNC; |
32 | args.in.h.opcode = opcode; | 44 | args.opcode = opcode; |
33 | args.in.h.nodeid = nodeid; | 45 | args.nodeid = nodeid; |
34 | args.in.numargs = 1; | 46 | args.in_numargs = 1; |
35 | args.in.args[0].size = sizeof(inarg); | 47 | args.in_args[0].size = sizeof(inarg); |
36 | args.in.args[0].value = &inarg; | 48 | args.in_args[0].value = &inarg; |
37 | args.out.numargs = 1; | 49 | args.out_numargs = 1; |
38 | args.out.args[0].size = sizeof(*outargp); | 50 | args.out_args[0].size = sizeof(*outargp); |
39 | args.out.args[0].value = outargp; | 51 | args.out_args[0].value = outargp; |
40 | 52 | ||
41 | return fuse_simple_request(fc, &args); | 53 | return fuse_simple_request(fc, &args); |
42 | } | 54 | } |
43 | 55 | ||
56 | struct fuse_release_args { | ||
57 | struct fuse_args args; | ||
58 | struct fuse_release_in inarg; | ||
59 | struct inode *inode; | ||
60 | }; | ||
61 | |||
44 | struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) | 62 | struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) |
45 | { | 63 | { |
46 | struct fuse_file *ff; | 64 | struct fuse_file *ff; |
47 | 65 | ||
48 | ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL); | 66 | ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT); |
49 | if (unlikely(!ff)) | 67 | if (unlikely(!ff)) |
50 | return NULL; | 68 | return NULL; |
51 | 69 | ||
52 | ff->fc = fc; | 70 | ff->fc = fc; |
53 | ff->reserved_req = fuse_request_alloc(0); | 71 | ff->release_args = kzalloc(sizeof(*ff->release_args), |
54 | if (unlikely(!ff->reserved_req)) { | 72 | GFP_KERNEL_ACCOUNT); |
73 | if (!ff->release_args) { | ||
55 | kfree(ff); | 74 | kfree(ff); |
56 | return NULL; | 75 | return NULL; |
57 | } | 76 | } |
@@ -69,7 +88,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) | |||
69 | 88 | ||
70 | void fuse_file_free(struct fuse_file *ff) | 89 | void fuse_file_free(struct fuse_file *ff) |
71 | { | 90 | { |
72 | fuse_request_free(ff->reserved_req); | 91 | kfree(ff->release_args); |
73 | mutex_destroy(&ff->readdir.lock); | 92 | mutex_destroy(&ff->readdir.lock); |
74 | kfree(ff); | 93 | kfree(ff); |
75 | } | 94 | } |
@@ -80,34 +99,31 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) | |||
80 | return ff; | 99 | return ff; |
81 | } | 100 | } |
82 | 101 | ||
83 | static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) | 102 | static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args, |
103 | int error) | ||
84 | { | 104 | { |
85 | iput(req->misc.release.inode); | 105 | struct fuse_release_args *ra = container_of(args, typeof(*ra), args); |
106 | |||
107 | iput(ra->inode); | ||
108 | kfree(ra); | ||
86 | } | 109 | } |
87 | 110 | ||
88 | static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) | 111 | static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) |
89 | { | 112 | { |
90 | if (refcount_dec_and_test(&ff->count)) { | 113 | if (refcount_dec_and_test(&ff->count)) { |
91 | struct fuse_req *req = ff->reserved_req; | 114 | struct fuse_args *args = &ff->release_args->args; |
92 | 115 | ||
93 | if (isdir ? ff->fc->no_opendir : ff->fc->no_open) { | 116 | if (isdir ? ff->fc->no_opendir : ff->fc->no_open) { |
94 | /* | 117 | /* Do nothing when client does not implement 'open' */ |
95 | * Drop the release request when client does not | 118 | fuse_release_end(ff->fc, args, 0); |
96 | * implement 'open' | ||
97 | */ | ||
98 | __clear_bit(FR_BACKGROUND, &req->flags); | ||
99 | iput(req->misc.release.inode); | ||
100 | fuse_put_request(ff->fc, req); | ||
101 | } else if (sync) { | 119 | } else if (sync) { |
102 | __set_bit(FR_FORCE, &req->flags); | 120 | fuse_simple_request(ff->fc, args); |
103 | __clear_bit(FR_BACKGROUND, &req->flags); | 121 | fuse_release_end(ff->fc, args, 0); |
104 | fuse_request_send(ff->fc, req); | ||
105 | iput(req->misc.release.inode); | ||
106 | fuse_put_request(ff->fc, req); | ||
107 | } else { | 122 | } else { |
108 | req->end = fuse_release_end; | 123 | args->end = fuse_release_end; |
109 | __set_bit(FR_BACKGROUND, &req->flags); | 124 | if (fuse_simple_background(ff->fc, args, |
110 | fuse_request_send_background(ff->fc, req); | 125 | GFP_KERNEL | __GFP_NOFAIL)) |
126 | fuse_release_end(ff->fc, args, -ENOTCONN); | ||
111 | } | 127 | } |
112 | kfree(ff); | 128 | kfree(ff); |
113 | } | 129 | } |
@@ -227,8 +243,7 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, | |||
227 | int flags, int opcode) | 243 | int flags, int opcode) |
228 | { | 244 | { |
229 | struct fuse_conn *fc = ff->fc; | 245 | struct fuse_conn *fc = ff->fc; |
230 | struct fuse_req *req = ff->reserved_req; | 246 | struct fuse_release_args *ra = ff->release_args; |
231 | struct fuse_release_in *inarg = &req->misc.release.in; | ||
232 | 247 | ||
233 | /* Inode is NULL on error path of fuse_create_open() */ | 248 | /* Inode is NULL on error path of fuse_create_open() */ |
234 | if (likely(fi)) { | 249 | if (likely(fi)) { |
@@ -243,32 +258,33 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, | |||
243 | 258 | ||
244 | wake_up_interruptible_all(&ff->poll_wait); | 259 | wake_up_interruptible_all(&ff->poll_wait); |
245 | 260 | ||
246 | inarg->fh = ff->fh; | 261 | ra->inarg.fh = ff->fh; |
247 | inarg->flags = flags; | 262 | ra->inarg.flags = flags; |
248 | req->in.h.opcode = opcode; | 263 | ra->args.in_numargs = 1; |
249 | req->in.h.nodeid = ff->nodeid; | 264 | ra->args.in_args[0].size = sizeof(struct fuse_release_in); |
250 | req->in.numargs = 1; | 265 | ra->args.in_args[0].value = &ra->inarg; |
251 | req->in.args[0].size = sizeof(struct fuse_release_in); | 266 | ra->args.opcode = opcode; |
252 | req->in.args[0].value = inarg; | 267 | ra->args.nodeid = ff->nodeid; |
268 | ra->args.force = true; | ||
269 | ra->args.nocreds = true; | ||
253 | } | 270 | } |
254 | 271 | ||
255 | void fuse_release_common(struct file *file, bool isdir) | 272 | void fuse_release_common(struct file *file, bool isdir) |
256 | { | 273 | { |
257 | struct fuse_inode *fi = get_fuse_inode(file_inode(file)); | 274 | struct fuse_inode *fi = get_fuse_inode(file_inode(file)); |
258 | struct fuse_file *ff = file->private_data; | 275 | struct fuse_file *ff = file->private_data; |
259 | struct fuse_req *req = ff->reserved_req; | 276 | struct fuse_release_args *ra = ff->release_args; |
260 | int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; | 277 | int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; |
261 | 278 | ||
262 | fuse_prepare_release(fi, ff, file->f_flags, opcode); | 279 | fuse_prepare_release(fi, ff, file->f_flags, opcode); |
263 | 280 | ||
264 | if (ff->flock) { | 281 | if (ff->flock) { |
265 | struct fuse_release_in *inarg = &req->misc.release.in; | 282 | ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; |
266 | inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; | 283 | ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc, |
267 | inarg->lock_owner = fuse_lock_owner_id(ff->fc, | 284 | (fl_owner_t) file); |
268 | (fl_owner_t) file); | ||
269 | } | 285 | } |
270 | /* Hold inode until release is finished */ | 286 | /* Hold inode until release is finished */ |
271 | req->misc.release.inode = igrab(file_inode(file)); | 287 | ra->inode = igrab(file_inode(file)); |
272 | 288 | ||
273 | /* | 289 | /* |
274 | * Normally this will send the RELEASE request, however if | 290 | * Normally this will send the RELEASE request, however if |
@@ -279,7 +295,7 @@ void fuse_release_common(struct file *file, bool isdir) | |||
279 | * synchronous RELEASE is allowed (and desirable) in this case | 295 | * synchronous RELEASE is allowed (and desirable) in this case |
280 | * because the server can be trusted not to screw up. | 296 | * because the server can be trusted not to screw up. |
281 | */ | 297 | */ |
282 | fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir); | 298 | fuse_file_put(ff, ff->fc->destroy, isdir); |
283 | } | 299 | } |
284 | 300 | ||
285 | static int fuse_open(struct inode *inode, struct file *file) | 301 | static int fuse_open(struct inode *inode, struct file *file) |
@@ -335,19 +351,27 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) | |||
335 | return (u64) v0 + ((u64) v1 << 32); | 351 | return (u64) v0 + ((u64) v1 << 32); |
336 | } | 352 | } |
337 | 353 | ||
338 | static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi, | 354 | struct fuse_writepage_args { |
355 | struct fuse_io_args ia; | ||
356 | struct list_head writepages_entry; | ||
357 | struct list_head queue_entry; | ||
358 | struct fuse_writepage_args *next; | ||
359 | struct inode *inode; | ||
360 | }; | ||
361 | |||
362 | static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi, | ||
339 | pgoff_t idx_from, pgoff_t idx_to) | 363 | pgoff_t idx_from, pgoff_t idx_to) |
340 | { | 364 | { |
341 | struct fuse_req *req; | 365 | struct fuse_writepage_args *wpa; |
342 | 366 | ||
343 | list_for_each_entry(req, &fi->writepages, writepages_entry) { | 367 | list_for_each_entry(wpa, &fi->writepages, writepages_entry) { |
344 | pgoff_t curr_index; | 368 | pgoff_t curr_index; |
345 | 369 | ||
346 | WARN_ON(get_fuse_inode(req->inode) != fi); | 370 | WARN_ON(get_fuse_inode(wpa->inode) != fi); |
347 | curr_index = req->misc.write.in.offset >> PAGE_SHIFT; | 371 | curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; |
348 | if (idx_from < curr_index + req->num_pages && | 372 | if (idx_from < curr_index + wpa->ia.ap.num_pages && |
349 | curr_index <= idx_to) { | 373 | curr_index <= idx_to) { |
350 | return req; | 374 | return wpa; |
351 | } | 375 | } |
352 | } | 376 | } |
353 | return NULL; | 377 | return NULL; |
@@ -383,12 +407,11 @@ static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | |||
383 | * Since fuse doesn't rely on the VM writeback tracking, this has to | 407 | * Since fuse doesn't rely on the VM writeback tracking, this has to |
384 | * use some other means. | 408 | * use some other means. |
385 | */ | 409 | */ |
386 | static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) | 410 | static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) |
387 | { | 411 | { |
388 | struct fuse_inode *fi = get_fuse_inode(inode); | 412 | struct fuse_inode *fi = get_fuse_inode(inode); |
389 | 413 | ||
390 | wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); | 414 | wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); |
391 | return 0; | ||
392 | } | 415 | } |
393 | 416 | ||
394 | /* | 417 | /* |
@@ -411,8 +434,8 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
411 | struct inode *inode = file_inode(file); | 434 | struct inode *inode = file_inode(file); |
412 | struct fuse_conn *fc = get_fuse_conn(inode); | 435 | struct fuse_conn *fc = get_fuse_conn(inode); |
413 | struct fuse_file *ff = file->private_data; | 436 | struct fuse_file *ff = file->private_data; |
414 | struct fuse_req *req; | ||
415 | struct fuse_flush_in inarg; | 437 | struct fuse_flush_in inarg; |
438 | FUSE_ARGS(args); | ||
416 | int err; | 439 | int err; |
417 | 440 | ||
418 | if (is_bad_inode(inode)) | 441 | if (is_bad_inode(inode)) |
@@ -433,19 +456,17 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
433 | if (err) | 456 | if (err) |
434 | return err; | 457 | return err; |
435 | 458 | ||
436 | req = fuse_get_req_nofail_nopages(fc, file); | ||
437 | memset(&inarg, 0, sizeof(inarg)); | 459 | memset(&inarg, 0, sizeof(inarg)); |
438 | inarg.fh = ff->fh; | 460 | inarg.fh = ff->fh; |
439 | inarg.lock_owner = fuse_lock_owner_id(fc, id); | 461 | inarg.lock_owner = fuse_lock_owner_id(fc, id); |
440 | req->in.h.opcode = FUSE_FLUSH; | 462 | args.opcode = FUSE_FLUSH; |
441 | req->in.h.nodeid = get_node_id(inode); | 463 | args.nodeid = get_node_id(inode); |
442 | req->in.numargs = 1; | 464 | args.in_numargs = 1; |
443 | req->in.args[0].size = sizeof(inarg); | 465 | args.in_args[0].size = sizeof(inarg); |
444 | req->in.args[0].value = &inarg; | 466 | args.in_args[0].value = &inarg; |
445 | __set_bit(FR_FORCE, &req->flags); | 467 | args.force = true; |
446 | fuse_request_send(fc, req); | 468 | |
447 | err = req->out.h.error; | 469 | err = fuse_simple_request(fc, &args); |
448 | fuse_put_request(fc, req); | ||
449 | if (err == -ENOSYS) { | 470 | if (err == -ENOSYS) { |
450 | fc->no_flush = 1; | 471 | fc->no_flush = 1; |
451 | err = 0; | 472 | err = 0; |
@@ -465,11 +486,11 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, | |||
465 | memset(&inarg, 0, sizeof(inarg)); | 486 | memset(&inarg, 0, sizeof(inarg)); |
466 | inarg.fh = ff->fh; | 487 | inarg.fh = ff->fh; |
467 | inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0; | 488 | inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0; |
468 | args.in.h.opcode = opcode; | 489 | args.opcode = opcode; |
469 | args.in.h.nodeid = get_node_id(inode); | 490 | args.nodeid = get_node_id(inode); |
470 | args.in.numargs = 1; | 491 | args.in_numargs = 1; |
471 | args.in.args[0].size = sizeof(inarg); | 492 | args.in_args[0].size = sizeof(inarg); |
472 | args.in.args[0].value = &inarg; | 493 | args.in_args[0].value = &inarg; |
473 | return fuse_simple_request(fc, &args); | 494 | return fuse_simple_request(fc, &args); |
474 | } | 495 | } |
475 | 496 | ||
@@ -523,35 +544,35 @@ out: | |||
523 | return err; | 544 | return err; |
524 | } | 545 | } |
525 | 546 | ||
526 | void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, | 547 | void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, |
527 | size_t count, int opcode) | 548 | size_t count, int opcode) |
528 | { | 549 | { |
529 | struct fuse_read_in *inarg = &req->misc.read.in; | ||
530 | struct fuse_file *ff = file->private_data; | 550 | struct fuse_file *ff = file->private_data; |
551 | struct fuse_args *args = &ia->ap.args; | ||
531 | 552 | ||
532 | inarg->fh = ff->fh; | 553 | ia->read.in.fh = ff->fh; |
533 | inarg->offset = pos; | 554 | ia->read.in.offset = pos; |
534 | inarg->size = count; | 555 | ia->read.in.size = count; |
535 | inarg->flags = file->f_flags; | 556 | ia->read.in.flags = file->f_flags; |
536 | req->in.h.opcode = opcode; | 557 | args->opcode = opcode; |
537 | req->in.h.nodeid = ff->nodeid; | 558 | args->nodeid = ff->nodeid; |
538 | req->in.numargs = 1; | 559 | args->in_numargs = 1; |
539 | req->in.args[0].size = sizeof(struct fuse_read_in); | 560 | args->in_args[0].size = sizeof(ia->read.in); |
540 | req->in.args[0].value = inarg; | 561 | args->in_args[0].value = &ia->read.in; |
541 | req->out.argvar = 1; | 562 | args->out_argvar = true; |
542 | req->out.numargs = 1; | 563 | args->out_numargs = 1; |
543 | req->out.args[0].size = count; | 564 | args->out_args[0].size = count; |
544 | } | 565 | } |
545 | 566 | ||
546 | static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty) | 567 | static void fuse_release_user_pages(struct fuse_args_pages *ap, |
568 | bool should_dirty) | ||
547 | { | 569 | { |
548 | unsigned i; | 570 | unsigned int i; |
549 | 571 | ||
550 | for (i = 0; i < req->num_pages; i++) { | 572 | for (i = 0; i < ap->num_pages; i++) { |
551 | struct page *page = req->pages[i]; | ||
552 | if (should_dirty) | 573 | if (should_dirty) |
553 | set_page_dirty_lock(page); | 574 | set_page_dirty_lock(ap->pages[i]); |
554 | put_page(page); | 575 | put_page(ap->pages[i]); |
555 | } | 576 | } |
556 | } | 577 | } |
557 | 578 | ||
@@ -621,64 +642,94 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) | |||
621 | kref_put(&io->refcnt, fuse_io_release); | 642 | kref_put(&io->refcnt, fuse_io_release); |
622 | } | 643 | } |
623 | 644 | ||
624 | static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) | 645 | static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, |
646 | unsigned int npages) | ||
647 | { | ||
648 | struct fuse_io_args *ia; | ||
649 | |||
650 | ia = kzalloc(sizeof(*ia), GFP_KERNEL); | ||
651 | if (ia) { | ||
652 | ia->io = io; | ||
653 | ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL, | ||
654 | &ia->ap.descs); | ||
655 | if (!ia->ap.pages) { | ||
656 | kfree(ia); | ||
657 | ia = NULL; | ||
658 | } | ||
659 | } | ||
660 | return ia; | ||
661 | } | ||
662 | |||
663 | static void fuse_io_free(struct fuse_io_args *ia) | ||
664 | { | ||
665 | kfree(ia->ap.pages); | ||
666 | kfree(ia); | ||
667 | } | ||
668 | |||
669 | static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args, | ||
670 | int err) | ||
625 | { | 671 | { |
626 | struct fuse_io_priv *io = req->io; | 672 | struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); |
673 | struct fuse_io_priv *io = ia->io; | ||
627 | ssize_t pos = -1; | 674 | ssize_t pos = -1; |
628 | 675 | ||
629 | fuse_release_user_pages(req, io->should_dirty); | 676 | fuse_release_user_pages(&ia->ap, io->should_dirty); |
630 | 677 | ||
631 | if (io->write) { | 678 | if (err) { |
632 | if (req->misc.write.in.size != req->misc.write.out.size) | 679 | /* Nothing */ |
633 | pos = req->misc.write.in.offset - io->offset + | 680 | } else if (io->write) { |
634 | req->misc.write.out.size; | 681 | if (ia->write.out.size > ia->write.in.size) { |
682 | err = -EIO; | ||
683 | } else if (ia->write.in.size != ia->write.out.size) { | ||
684 | pos = ia->write.in.offset - io->offset + | ||
685 | ia->write.out.size; | ||
686 | } | ||
635 | } else { | 687 | } else { |
636 | if (req->misc.read.in.size != req->out.args[0].size) | 688 | u32 outsize = args->out_args[0].size; |
637 | pos = req->misc.read.in.offset - io->offset + | 689 | |
638 | req->out.args[0].size; | 690 | if (ia->read.in.size != outsize) |
691 | pos = ia->read.in.offset - io->offset + outsize; | ||
639 | } | 692 | } |
640 | 693 | ||
641 | fuse_aio_complete(io, req->out.h.error, pos); | 694 | fuse_aio_complete(io, err, pos); |
695 | fuse_io_free(ia); | ||
642 | } | 696 | } |
643 | 697 | ||
644 | static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, | 698 | static ssize_t fuse_async_req_send(struct fuse_conn *fc, |
645 | size_t num_bytes, struct fuse_io_priv *io) | 699 | struct fuse_io_args *ia, size_t num_bytes) |
646 | { | 700 | { |
701 | ssize_t err; | ||
702 | struct fuse_io_priv *io = ia->io; | ||
703 | |||
647 | spin_lock(&io->lock); | 704 | spin_lock(&io->lock); |
648 | kref_get(&io->refcnt); | 705 | kref_get(&io->refcnt); |
649 | io->size += num_bytes; | 706 | io->size += num_bytes; |
650 | io->reqs++; | 707 | io->reqs++; |
651 | spin_unlock(&io->lock); | 708 | spin_unlock(&io->lock); |
652 | 709 | ||
653 | req->io = io; | 710 | ia->ap.args.end = fuse_aio_complete_req; |
654 | req->end = fuse_aio_complete_req; | 711 | err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL); |
655 | 712 | ||
656 | __fuse_get_request(req); | 713 | return err ?: num_bytes; |
657 | fuse_request_send_background(fc, req); | ||
658 | |||
659 | return num_bytes; | ||
660 | } | 714 | } |
661 | 715 | ||
662 | static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io, | 716 | static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count, |
663 | loff_t pos, size_t count, fl_owner_t owner) | 717 | fl_owner_t owner) |
664 | { | 718 | { |
665 | struct file *file = io->iocb->ki_filp; | 719 | struct file *file = ia->io->iocb->ki_filp; |
666 | struct fuse_file *ff = file->private_data; | 720 | struct fuse_file *ff = file->private_data; |
667 | struct fuse_conn *fc = ff->fc; | 721 | struct fuse_conn *fc = ff->fc; |
668 | 722 | ||
669 | fuse_read_fill(req, file, pos, count, FUSE_READ); | 723 | fuse_read_args_fill(ia, file, pos, count, FUSE_READ); |
670 | if (owner != NULL) { | 724 | if (owner != NULL) { |
671 | struct fuse_read_in *inarg = &req->misc.read.in; | 725 | ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; |
672 | 726 | ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner); | |
673 | inarg->read_flags |= FUSE_READ_LOCKOWNER; | ||
674 | inarg->lock_owner = fuse_lock_owner_id(fc, owner); | ||
675 | } | 727 | } |
676 | 728 | ||
677 | if (io->async) | 729 | if (ia->io->async) |
678 | return fuse_async_req_send(fc, req, count, io); | 730 | return fuse_async_req_send(fc, ia, count); |
679 | 731 | ||
680 | fuse_request_send(fc, req); | 732 | return fuse_simple_request(fc, &ia->ap.args); |
681 | return req->out.args[0].size; | ||
682 | } | 733 | } |
683 | 734 | ||
684 | static void fuse_read_update_size(struct inode *inode, loff_t size, | 735 | static void fuse_read_update_size(struct inode *inode, loff_t size, |
@@ -696,10 +747,9 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, | |||
696 | spin_unlock(&fi->lock); | 747 | spin_unlock(&fi->lock); |
697 | } | 748 | } |
698 | 749 | ||
699 | static void fuse_short_read(struct fuse_req *req, struct inode *inode, | 750 | static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read, |
700 | u64 attr_ver) | 751 | struct fuse_args_pages *ap) |
701 | { | 752 | { |
702 | size_t num_read = req->out.args[0].size; | ||
703 | struct fuse_conn *fc = get_fuse_conn(inode); | 753 | struct fuse_conn *fc = get_fuse_conn(inode); |
704 | 754 | ||
705 | if (fc->writeback_cache) { | 755 | if (fc->writeback_cache) { |
@@ -712,28 +762,31 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode, | |||
712 | int start_idx = num_read >> PAGE_SHIFT; | 762 | int start_idx = num_read >> PAGE_SHIFT; |
713 | size_t off = num_read & (PAGE_SIZE - 1); | 763 | size_t off = num_read & (PAGE_SIZE - 1); |
714 | 764 | ||
715 | for (i = start_idx; i < req->num_pages; i++) { | 765 | for (i = start_idx; i < ap->num_pages; i++) { |
716 | zero_user_segment(req->pages[i], off, PAGE_SIZE); | 766 | zero_user_segment(ap->pages[i], off, PAGE_SIZE); |
717 | off = 0; | 767 | off = 0; |
718 | } | 768 | } |
719 | } else { | 769 | } else { |
720 | loff_t pos = page_offset(req->pages[0]) + num_read; | 770 | loff_t pos = page_offset(ap->pages[0]) + num_read; |
721 | fuse_read_update_size(inode, pos, attr_ver); | 771 | fuse_read_update_size(inode, pos, attr_ver); |
722 | } | 772 | } |
723 | } | 773 | } |
724 | 774 | ||
725 | static int fuse_do_readpage(struct file *file, struct page *page) | 775 | static int fuse_do_readpage(struct file *file, struct page *page) |
726 | { | 776 | { |
727 | struct kiocb iocb; | ||
728 | struct fuse_io_priv io; | ||
729 | struct inode *inode = page->mapping->host; | 777 | struct inode *inode = page->mapping->host; |
730 | struct fuse_conn *fc = get_fuse_conn(inode); | 778 | struct fuse_conn *fc = get_fuse_conn(inode); |
731 | struct fuse_req *req; | ||
732 | size_t num_read; | ||
733 | loff_t pos = page_offset(page); | 779 | loff_t pos = page_offset(page); |
734 | size_t count = PAGE_SIZE; | 780 | struct fuse_page_desc desc = { .length = PAGE_SIZE }; |
781 | struct fuse_io_args ia = { | ||
782 | .ap.args.page_zeroing = true, | ||
783 | .ap.args.out_pages = true, | ||
784 | .ap.num_pages = 1, | ||
785 | .ap.pages = &page, | ||
786 | .ap.descs = &desc, | ||
787 | }; | ||
788 | ssize_t res; | ||
735 | u64 attr_ver; | 789 | u64 attr_ver; |
736 | int err; | ||
737 | 790 | ||
738 | /* | 791 | /* |
739 | * Page writeback can extend beyond the lifetime of the | 792 | * Page writeback can extend beyond the lifetime of the |
@@ -742,35 +795,21 @@ static int fuse_do_readpage(struct file *file, struct page *page) | |||
742 | */ | 795 | */ |
743 | fuse_wait_on_page_writeback(inode, page->index); | 796 | fuse_wait_on_page_writeback(inode, page->index); |
744 | 797 | ||
745 | req = fuse_get_req(fc, 1); | ||
746 | if (IS_ERR(req)) | ||
747 | return PTR_ERR(req); | ||
748 | |||
749 | attr_ver = fuse_get_attr_version(fc); | 798 | attr_ver = fuse_get_attr_version(fc); |
750 | 799 | ||
751 | req->out.page_zeroing = 1; | 800 | fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); |
752 | req->out.argpages = 1; | 801 | res = fuse_simple_request(fc, &ia.ap.args); |
753 | req->num_pages = 1; | 802 | if (res < 0) |
754 | req->pages[0] = page; | 803 | return res; |
755 | req->page_descs[0].length = count; | 804 | /* |
756 | init_sync_kiocb(&iocb, file); | 805 | * Short read means EOF. If file size is larger, truncate it |
757 | io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb); | 806 | */ |
758 | num_read = fuse_send_read(req, &io, pos, count, NULL); | 807 | if (res < desc.length) |
759 | err = req->out.h.error; | 808 | fuse_short_read(inode, attr_ver, res, &ia.ap); |
760 | |||
761 | if (!err) { | ||
762 | /* | ||
763 | * Short read means EOF. If file size is larger, truncate it | ||
764 | */ | ||
765 | if (num_read < count) | ||
766 | fuse_short_read(req, inode, attr_ver); | ||
767 | |||
768 | SetPageUptodate(page); | ||
769 | } | ||
770 | 809 | ||
771 | fuse_put_request(fc, req); | 810 | SetPageUptodate(page); |
772 | 811 | ||
773 | return err; | 812 | return 0; |
774 | } | 813 | } |
775 | 814 | ||
776 | static int fuse_readpage(struct file *file, struct page *page) | 815 | static int fuse_readpage(struct file *file, struct page *page) |
@@ -789,15 +828,18 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
789 | return err; | 828 | return err; |
790 | } | 829 | } |
791 | 830 | ||
792 | static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | 831 | static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args, |
832 | int err) | ||
793 | { | 833 | { |
794 | int i; | 834 | int i; |
795 | size_t count = req->misc.read.in.size; | 835 | struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); |
796 | size_t num_read = req->out.args[0].size; | 836 | struct fuse_args_pages *ap = &ia->ap; |
837 | size_t count = ia->read.in.size; | ||
838 | size_t num_read = args->out_args[0].size; | ||
797 | struct address_space *mapping = NULL; | 839 | struct address_space *mapping = NULL; |
798 | 840 | ||
799 | for (i = 0; mapping == NULL && i < req->num_pages; i++) | 841 | for (i = 0; mapping == NULL && i < ap->num_pages; i++) |
800 | mapping = req->pages[i]->mapping; | 842 | mapping = ap->pages[i]->mapping; |
801 | 843 | ||
802 | if (mapping) { | 844 | if (mapping) { |
803 | struct inode *inode = mapping->host; | 845 | struct inode *inode = mapping->host; |
@@ -805,93 +847,97 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | |||
805 | /* | 847 | /* |
806 | * Short read means EOF. If file size is larger, truncate it | 848 | * Short read means EOF. If file size is larger, truncate it |
807 | */ | 849 | */ |
808 | if (!req->out.h.error && num_read < count) | 850 | if (!err && num_read < count) |
809 | fuse_short_read(req, inode, req->misc.read.attr_ver); | 851 | fuse_short_read(inode, ia->read.attr_ver, num_read, ap); |
810 | 852 | ||
811 | fuse_invalidate_atime(inode); | 853 | fuse_invalidate_atime(inode); |
812 | } | 854 | } |
813 | 855 | ||
814 | for (i = 0; i < req->num_pages; i++) { | 856 | for (i = 0; i < ap->num_pages; i++) { |
815 | struct page *page = req->pages[i]; | 857 | struct page *page = ap->pages[i]; |
816 | if (!req->out.h.error) | 858 | |
859 | if (!err) | ||
817 | SetPageUptodate(page); | 860 | SetPageUptodate(page); |
818 | else | 861 | else |
819 | SetPageError(page); | 862 | SetPageError(page); |
820 | unlock_page(page); | 863 | unlock_page(page); |
821 | put_page(page); | 864 | put_page(page); |
822 | } | 865 | } |
823 | if (req->ff) | 866 | if (ia->ff) |
824 | fuse_file_put(req->ff, false, false); | 867 | fuse_file_put(ia->ff, false, false); |
868 | |||
869 | fuse_io_free(ia); | ||
825 | } | 870 | } |
826 | 871 | ||
827 | static void fuse_send_readpages(struct fuse_req *req, struct file *file) | 872 | static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) |
828 | { | 873 | { |
829 | struct fuse_file *ff = file->private_data; | 874 | struct fuse_file *ff = file->private_data; |
830 | struct fuse_conn *fc = ff->fc; | 875 | struct fuse_conn *fc = ff->fc; |
831 | loff_t pos = page_offset(req->pages[0]); | 876 | struct fuse_args_pages *ap = &ia->ap; |
832 | size_t count = req->num_pages << PAGE_SHIFT; | 877 | loff_t pos = page_offset(ap->pages[0]); |
833 | 878 | size_t count = ap->num_pages << PAGE_SHIFT; | |
834 | req->out.argpages = 1; | 879 | int err; |
835 | req->out.page_zeroing = 1; | 880 | |
836 | req->out.page_replace = 1; | 881 | ap->args.out_pages = true; |
837 | fuse_read_fill(req, file, pos, count, FUSE_READ); | 882 | ap->args.page_zeroing = true; |
838 | req->misc.read.attr_ver = fuse_get_attr_version(fc); | 883 | ap->args.page_replace = true; |
884 | fuse_read_args_fill(ia, file, pos, count, FUSE_READ); | ||
885 | ia->read.attr_ver = fuse_get_attr_version(fc); | ||
839 | if (fc->async_read) { | 886 | if (fc->async_read) { |
840 | req->ff = fuse_file_get(ff); | 887 | ia->ff = fuse_file_get(ff); |
841 | req->end = fuse_readpages_end; | 888 | ap->args.end = fuse_readpages_end; |
842 | fuse_request_send_background(fc, req); | 889 | err = fuse_simple_background(fc, &ap->args, GFP_KERNEL); |
890 | if (!err) | ||
891 | return; | ||
843 | } else { | 892 | } else { |
844 | fuse_request_send(fc, req); | 893 | err = fuse_simple_request(fc, &ap->args); |
845 | fuse_readpages_end(fc, req); | ||
846 | fuse_put_request(fc, req); | ||
847 | } | 894 | } |
895 | fuse_readpages_end(fc, &ap->args, err); | ||
848 | } | 896 | } |
849 | 897 | ||
850 | struct fuse_fill_data { | 898 | struct fuse_fill_data { |
851 | struct fuse_req *req; | 899 | struct fuse_io_args *ia; |
852 | struct file *file; | 900 | struct file *file; |
853 | struct inode *inode; | 901 | struct inode *inode; |
854 | unsigned nr_pages; | 902 | unsigned int nr_pages; |
903 | unsigned int max_pages; | ||
855 | }; | 904 | }; |
856 | 905 | ||
857 | static int fuse_readpages_fill(void *_data, struct page *page) | 906 | static int fuse_readpages_fill(void *_data, struct page *page) |
858 | { | 907 | { |
859 | struct fuse_fill_data *data = _data; | 908 | struct fuse_fill_data *data = _data; |
860 | struct fuse_req *req = data->req; | 909 | struct fuse_io_args *ia = data->ia; |
910 | struct fuse_args_pages *ap = &ia->ap; | ||
861 | struct inode *inode = data->inode; | 911 | struct inode *inode = data->inode; |
862 | struct fuse_conn *fc = get_fuse_conn(inode); | 912 | struct fuse_conn *fc = get_fuse_conn(inode); |
863 | 913 | ||
864 | fuse_wait_on_page_writeback(inode, page->index); | 914 | fuse_wait_on_page_writeback(inode, page->index); |
865 | 915 | ||
866 | if (req->num_pages && | 916 | if (ap->num_pages && |
867 | (req->num_pages == fc->max_pages || | 917 | (ap->num_pages == fc->max_pages || |
868 | (req->num_pages + 1) * PAGE_SIZE > fc->max_read || | 918 | (ap->num_pages + 1) * PAGE_SIZE > fc->max_read || |
869 | req->pages[req->num_pages - 1]->index + 1 != page->index)) { | 919 | ap->pages[ap->num_pages - 1]->index + 1 != page->index)) { |
870 | unsigned int nr_alloc = min_t(unsigned int, data->nr_pages, | 920 | data->max_pages = min_t(unsigned int, data->nr_pages, |
871 | fc->max_pages); | 921 | fc->max_pages); |
872 | fuse_send_readpages(req, data->file); | 922 | fuse_send_readpages(ia, data->file); |
873 | if (fc->async_read) | 923 | data->ia = ia = fuse_io_alloc(NULL, data->max_pages); |
874 | req = fuse_get_req_for_background(fc, nr_alloc); | 924 | if (!ia) { |
875 | else | ||
876 | req = fuse_get_req(fc, nr_alloc); | ||
877 | |||
878 | data->req = req; | ||
879 | if (IS_ERR(req)) { | ||
880 | unlock_page(page); | 925 | unlock_page(page); |
881 | return PTR_ERR(req); | 926 | return -ENOMEM; |
882 | } | 927 | } |
928 | ap = &ia->ap; | ||
883 | } | 929 | } |
884 | 930 | ||
885 | if (WARN_ON(req->num_pages >= req->max_pages)) { | 931 | if (WARN_ON(ap->num_pages >= data->max_pages)) { |
886 | unlock_page(page); | 932 | unlock_page(page); |
887 | fuse_put_request(fc, req); | 933 | fuse_io_free(ia); |
888 | return -EIO; | 934 | return -EIO; |
889 | } | 935 | } |
890 | 936 | ||
891 | get_page(page); | 937 | get_page(page); |
892 | req->pages[req->num_pages] = page; | 938 | ap->pages[ap->num_pages] = page; |
893 | req->page_descs[req->num_pages].length = PAGE_SIZE; | 939 | ap->descs[ap->num_pages].length = PAGE_SIZE; |
894 | req->num_pages++; | 940 | ap->num_pages++; |
895 | data->nr_pages--; | 941 | data->nr_pages--; |
896 | return 0; | 942 | return 0; |
897 | } | 943 | } |
@@ -903,7 +949,6 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, | |||
903 | struct fuse_conn *fc = get_fuse_conn(inode); | 949 | struct fuse_conn *fc = get_fuse_conn(inode); |
904 | struct fuse_fill_data data; | 950 | struct fuse_fill_data data; |
905 | int err; | 951 | int err; |
906 | unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages); | ||
907 | 952 | ||
908 | err = -EIO; | 953 | err = -EIO; |
909 | if (is_bad_inode(inode)) | 954 | if (is_bad_inode(inode)) |
@@ -911,21 +956,20 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, | |||
911 | 956 | ||
912 | data.file = file; | 957 | data.file = file; |
913 | data.inode = inode; | 958 | data.inode = inode; |
914 | if (fc->async_read) | ||
915 | data.req = fuse_get_req_for_background(fc, nr_alloc); | ||
916 | else | ||
917 | data.req = fuse_get_req(fc, nr_alloc); | ||
918 | data.nr_pages = nr_pages; | 959 | data.nr_pages = nr_pages; |
919 | err = PTR_ERR(data.req); | 960 | data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages); |
920 | if (IS_ERR(data.req)) | 961 | ; |
962 | data.ia = fuse_io_alloc(NULL, data.max_pages); | ||
963 | err = -ENOMEM; | ||
964 | if (!data.ia) | ||
921 | goto out; | 965 | goto out; |
922 | 966 | ||
923 | err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); | 967 | err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); |
924 | if (!err) { | 968 | if (!err) { |
925 | if (data.req->num_pages) | 969 | if (data.ia->ap.num_pages) |
926 | fuse_send_readpages(data.req, file); | 970 | fuse_send_readpages(data.ia, file); |
927 | else | 971 | else |
928 | fuse_put_request(fc, data.req); | 972 | fuse_io_free(data.ia); |
929 | } | 973 | } |
930 | out: | 974 | out: |
931 | return err; | 975 | return err; |
@@ -952,54 +996,65 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
952 | return generic_file_read_iter(iocb, to); | 996 | return generic_file_read_iter(iocb, to); |
953 | } | 997 | } |
954 | 998 | ||
955 | static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, | 999 | static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff, |
956 | loff_t pos, size_t count) | 1000 | loff_t pos, size_t count) |
957 | { | 1001 | { |
958 | struct fuse_write_in *inarg = &req->misc.write.in; | 1002 | struct fuse_args *args = &ia->ap.args; |
959 | struct fuse_write_out *outarg = &req->misc.write.out; | ||
960 | 1003 | ||
961 | inarg->fh = ff->fh; | 1004 | ia->write.in.fh = ff->fh; |
962 | inarg->offset = pos; | 1005 | ia->write.in.offset = pos; |
963 | inarg->size = count; | 1006 | ia->write.in.size = count; |
964 | req->in.h.opcode = FUSE_WRITE; | 1007 | args->opcode = FUSE_WRITE; |
965 | req->in.h.nodeid = ff->nodeid; | 1008 | args->nodeid = ff->nodeid; |
966 | req->in.numargs = 2; | 1009 | args->in_numargs = 2; |
967 | if (ff->fc->minor < 9) | 1010 | if (ff->fc->minor < 9) |
968 | req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; | 1011 | args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; |
969 | else | 1012 | else |
970 | req->in.args[0].size = sizeof(struct fuse_write_in); | 1013 | args->in_args[0].size = sizeof(ia->write.in); |
971 | req->in.args[0].value = inarg; | 1014 | args->in_args[0].value = &ia->write.in; |
972 | req->in.args[1].size = count; | 1015 | args->in_args[1].size = count; |
973 | req->out.numargs = 1; | 1016 | args->out_numargs = 1; |
974 | req->out.args[0].size = sizeof(struct fuse_write_out); | 1017 | args->out_args[0].size = sizeof(ia->write.out); |
975 | req->out.args[0].value = outarg; | 1018 | args->out_args[0].value = &ia->write.out; |
976 | } | 1019 | } |
977 | 1020 | ||
978 | static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, | 1021 | static unsigned int fuse_write_flags(struct kiocb *iocb) |
979 | loff_t pos, size_t count, fl_owner_t owner) | ||
980 | { | 1022 | { |
981 | struct kiocb *iocb = io->iocb; | 1023 | unsigned int flags = iocb->ki_filp->f_flags; |
1024 | |||
1025 | if (iocb->ki_flags & IOCB_DSYNC) | ||
1026 | flags |= O_DSYNC; | ||
1027 | if (iocb->ki_flags & IOCB_SYNC) | ||
1028 | flags |= O_SYNC; | ||
1029 | |||
1030 | return flags; | ||
1031 | } | ||
1032 | |||
1033 | static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, | ||
1034 | size_t count, fl_owner_t owner) | ||
1035 | { | ||
1036 | struct kiocb *iocb = ia->io->iocb; | ||
982 | struct file *file = iocb->ki_filp; | 1037 | struct file *file = iocb->ki_filp; |
983 | struct fuse_file *ff = file->private_data; | 1038 | struct fuse_file *ff = file->private_data; |
984 | struct fuse_conn *fc = ff->fc; | 1039 | struct fuse_conn *fc = ff->fc; |
985 | struct fuse_write_in *inarg = &req->misc.write.in; | 1040 | struct fuse_write_in *inarg = &ia->write.in; |
1041 | ssize_t err; | ||
986 | 1042 | ||
987 | fuse_write_fill(req, ff, pos, count); | 1043 | fuse_write_args_fill(ia, ff, pos, count); |
988 | inarg->flags = file->f_flags; | 1044 | inarg->flags = fuse_write_flags(iocb); |
989 | if (iocb->ki_flags & IOCB_DSYNC) | ||
990 | inarg->flags |= O_DSYNC; | ||
991 | if (iocb->ki_flags & IOCB_SYNC) | ||
992 | inarg->flags |= O_SYNC; | ||
993 | if (owner != NULL) { | 1045 | if (owner != NULL) { |
994 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; | 1046 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; |
995 | inarg->lock_owner = fuse_lock_owner_id(fc, owner); | 1047 | inarg->lock_owner = fuse_lock_owner_id(fc, owner); |
996 | } | 1048 | } |
997 | 1049 | ||
998 | if (io->async) | 1050 | if (ia->io->async) |
999 | return fuse_async_req_send(fc, req, count, io); | 1051 | return fuse_async_req_send(fc, ia, count); |
1052 | |||
1053 | err = fuse_simple_request(fc, &ia->ap.args); | ||
1054 | if (!err && ia->write.out.size > count) | ||
1055 | err = -EIO; | ||
1000 | 1056 | ||
1001 | fuse_request_send(fc, req); | 1057 | return err ?: ia->write.out.size; |
1002 | return req->misc.write.out.size; | ||
1003 | } | 1058 | } |
1004 | 1059 | ||
1005 | bool fuse_write_update_size(struct inode *inode, loff_t pos) | 1060 | bool fuse_write_update_size(struct inode *inode, loff_t pos) |
@@ -1019,26 +1074,31 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos) | |||
1019 | return ret; | 1074 | return ret; |
1020 | } | 1075 | } |
1021 | 1076 | ||
1022 | static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb, | 1077 | static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, |
1023 | struct inode *inode, loff_t pos, | 1078 | struct kiocb *iocb, struct inode *inode, |
1024 | size_t count) | 1079 | loff_t pos, size_t count) |
1025 | { | 1080 | { |
1026 | size_t res; | 1081 | struct fuse_args_pages *ap = &ia->ap; |
1027 | unsigned offset; | 1082 | struct file *file = iocb->ki_filp; |
1028 | unsigned i; | 1083 | struct fuse_file *ff = file->private_data; |
1029 | struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); | 1084 | struct fuse_conn *fc = ff->fc; |
1085 | unsigned int offset, i; | ||
1086 | int err; | ||
1030 | 1087 | ||
1031 | for (i = 0; i < req->num_pages; i++) | 1088 | for (i = 0; i < ap->num_pages; i++) |
1032 | fuse_wait_on_page_writeback(inode, req->pages[i]->index); | 1089 | fuse_wait_on_page_writeback(inode, ap->pages[i]->index); |
1033 | 1090 | ||
1034 | res = fuse_send_write(req, &io, pos, count, NULL); | 1091 | fuse_write_args_fill(ia, ff, pos, count); |
1092 | ia->write.in.flags = fuse_write_flags(iocb); | ||
1035 | 1093 | ||
1036 | offset = req->page_descs[0].offset; | 1094 | err = fuse_simple_request(fc, &ap->args); |
1037 | count = res; | ||
1038 | for (i = 0; i < req->num_pages; i++) { | ||
1039 | struct page *page = req->pages[i]; | ||
1040 | 1095 | ||
1041 | if (!req->out.h.error && !offset && count >= PAGE_SIZE) | 1096 | offset = ap->descs[0].offset; |
1097 | count = ia->write.out.size; | ||
1098 | for (i = 0; i < ap->num_pages; i++) { | ||
1099 | struct page *page = ap->pages[i]; | ||
1100 | |||
1101 | if (!err && !offset && count >= PAGE_SIZE) | ||
1042 | SetPageUptodate(page); | 1102 | SetPageUptodate(page); |
1043 | 1103 | ||
1044 | if (count > PAGE_SIZE - offset) | 1104 | if (count > PAGE_SIZE - offset) |
@@ -1051,20 +1111,21 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb, | |||
1051 | put_page(page); | 1111 | put_page(page); |
1052 | } | 1112 | } |
1053 | 1113 | ||
1054 | return res; | 1114 | return err; |
1055 | } | 1115 | } |
1056 | 1116 | ||
1057 | static ssize_t fuse_fill_write_pages(struct fuse_req *req, | 1117 | static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap, |
1058 | struct address_space *mapping, | 1118 | struct address_space *mapping, |
1059 | struct iov_iter *ii, loff_t pos) | 1119 | struct iov_iter *ii, loff_t pos, |
1120 | unsigned int max_pages) | ||
1060 | { | 1121 | { |
1061 | struct fuse_conn *fc = get_fuse_conn(mapping->host); | 1122 | struct fuse_conn *fc = get_fuse_conn(mapping->host); |
1062 | unsigned offset = pos & (PAGE_SIZE - 1); | 1123 | unsigned offset = pos & (PAGE_SIZE - 1); |
1063 | size_t count = 0; | 1124 | size_t count = 0; |
1064 | int err; | 1125 | int err; |
1065 | 1126 | ||
1066 | req->in.argpages = 1; | 1127 | ap->args.in_pages = true; |
1067 | req->page_descs[0].offset = offset; | 1128 | ap->descs[0].offset = offset; |
1068 | 1129 | ||
1069 | do { | 1130 | do { |
1070 | size_t tmp; | 1131 | size_t tmp; |
@@ -1100,9 +1161,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, | |||
1100 | } | 1161 | } |
1101 | 1162 | ||
1102 | err = 0; | 1163 | err = 0; |
1103 | req->pages[req->num_pages] = page; | 1164 | ap->pages[ap->num_pages] = page; |
1104 | req->page_descs[req->num_pages].length = tmp; | 1165 | ap->descs[ap->num_pages].length = tmp; |
1105 | req->num_pages++; | 1166 | ap->num_pages++; |
1106 | 1167 | ||
1107 | count += tmp; | 1168 | count += tmp; |
1108 | pos += tmp; | 1169 | pos += tmp; |
@@ -1113,7 +1174,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, | |||
1113 | if (!fc->big_writes) | 1174 | if (!fc->big_writes) |
1114 | break; | 1175 | break; |
1115 | } while (iov_iter_count(ii) && count < fc->max_write && | 1176 | } while (iov_iter_count(ii) && count < fc->max_write && |
1116 | req->num_pages < req->max_pages && offset == 0); | 1177 | ap->num_pages < max_pages && offset == 0); |
1117 | 1178 | ||
1118 | return count > 0 ? count : err; | 1179 | return count > 0 ? count : err; |
1119 | } | 1180 | } |
@@ -1141,27 +1202,27 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, | |||
1141 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); | 1202 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); |
1142 | 1203 | ||
1143 | do { | 1204 | do { |
1144 | struct fuse_req *req; | ||
1145 | ssize_t count; | 1205 | ssize_t count; |
1206 | struct fuse_io_args ia = {}; | ||
1207 | struct fuse_args_pages *ap = &ia.ap; | ||
1146 | unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii), | 1208 | unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii), |
1147 | fc->max_pages); | 1209 | fc->max_pages); |
1148 | 1210 | ||
1149 | req = fuse_get_req(fc, nr_pages); | 1211 | ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs); |
1150 | if (IS_ERR(req)) { | 1212 | if (!ap->pages) { |
1151 | err = PTR_ERR(req); | 1213 | err = -ENOMEM; |
1152 | break; | 1214 | break; |
1153 | } | 1215 | } |
1154 | 1216 | ||
1155 | count = fuse_fill_write_pages(req, mapping, ii, pos); | 1217 | count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages); |
1156 | if (count <= 0) { | 1218 | if (count <= 0) { |
1157 | err = count; | 1219 | err = count; |
1158 | } else { | 1220 | } else { |
1159 | size_t num_written; | 1221 | err = fuse_send_write_pages(&ia, iocb, inode, |
1160 | 1222 | pos, count); | |
1161 | num_written = fuse_send_write_pages(req, iocb, inode, | ||
1162 | pos, count); | ||
1163 | err = req->out.h.error; | ||
1164 | if (!err) { | 1223 | if (!err) { |
1224 | size_t num_written = ia.write.out.size; | ||
1225 | |||
1165 | res += num_written; | 1226 | res += num_written; |
1166 | pos += num_written; | 1227 | pos += num_written; |
1167 | 1228 | ||
@@ -1170,7 +1231,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, | |||
1170 | err = -EIO; | 1231 | err = -EIO; |
1171 | } | 1232 | } |
1172 | } | 1233 | } |
1173 | fuse_put_request(fc, req); | 1234 | kfree(ap->pages); |
1174 | } while (!err && iov_iter_count(ii)); | 1235 | } while (!err && iov_iter_count(ii)); |
1175 | 1236 | ||
1176 | if (res > 0) | 1237 | if (res > 0) |
@@ -1258,14 +1319,14 @@ out: | |||
1258 | return written ? written : err; | 1319 | return written ? written : err; |
1259 | } | 1320 | } |
1260 | 1321 | ||
1261 | static inline void fuse_page_descs_length_init(struct fuse_req *req, | 1322 | static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, |
1262 | unsigned index, unsigned nr_pages) | 1323 | unsigned int index, |
1324 | unsigned int nr_pages) | ||
1263 | { | 1325 | { |
1264 | int i; | 1326 | int i; |
1265 | 1327 | ||
1266 | for (i = index; i < index + nr_pages; i++) | 1328 | for (i = index; i < index + nr_pages; i++) |
1267 | req->page_descs[i].length = PAGE_SIZE - | 1329 | descs[i].length = PAGE_SIZE - descs[i].offset; |
1268 | req->page_descs[i].offset; | ||
1269 | } | 1330 | } |
1270 | 1331 | ||
1271 | static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) | 1332 | static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) |
@@ -1279,8 +1340,9 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii, | |||
1279 | return min(iov_iter_single_seg_count(ii), max_size); | 1340 | return min(iov_iter_single_seg_count(ii), max_size); |
1280 | } | 1341 | } |
1281 | 1342 | ||
1282 | static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | 1343 | static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, |
1283 | size_t *nbytesp, int write) | 1344 | size_t *nbytesp, int write, |
1345 | unsigned int max_pages) | ||
1284 | { | 1346 | { |
1285 | size_t nbytes = 0; /* # bytes already packed in req */ | 1347 | size_t nbytes = 0; /* # bytes already packed in req */ |
1286 | ssize_t ret = 0; | 1348 | ssize_t ret = 0; |
@@ -1291,21 +1353,21 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | |||
1291 | size_t frag_size = fuse_get_frag_size(ii, *nbytesp); | 1353 | size_t frag_size = fuse_get_frag_size(ii, *nbytesp); |
1292 | 1354 | ||
1293 | if (write) | 1355 | if (write) |
1294 | req->in.args[1].value = (void *) user_addr; | 1356 | ap->args.in_args[1].value = (void *) user_addr; |
1295 | else | 1357 | else |
1296 | req->out.args[0].value = (void *) user_addr; | 1358 | ap->args.out_args[0].value = (void *) user_addr; |
1297 | 1359 | ||
1298 | iov_iter_advance(ii, frag_size); | 1360 | iov_iter_advance(ii, frag_size); |
1299 | *nbytesp = frag_size; | 1361 | *nbytesp = frag_size; |
1300 | return 0; | 1362 | return 0; |
1301 | } | 1363 | } |
1302 | 1364 | ||
1303 | while (nbytes < *nbytesp && req->num_pages < req->max_pages) { | 1365 | while (nbytes < *nbytesp && ap->num_pages < max_pages) { |
1304 | unsigned npages; | 1366 | unsigned npages; |
1305 | size_t start; | 1367 | size_t start; |
1306 | ret = iov_iter_get_pages(ii, &req->pages[req->num_pages], | 1368 | ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages], |
1307 | *nbytesp - nbytes, | 1369 | *nbytesp - nbytes, |
1308 | req->max_pages - req->num_pages, | 1370 | max_pages - ap->num_pages, |
1309 | &start); | 1371 | &start); |
1310 | if (ret < 0) | 1372 | if (ret < 0) |
1311 | break; | 1373 | break; |
@@ -1316,18 +1378,18 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | |||
1316 | ret += start; | 1378 | ret += start; |
1317 | npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; | 1379 | npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; |
1318 | 1380 | ||
1319 | req->page_descs[req->num_pages].offset = start; | 1381 | ap->descs[ap->num_pages].offset = start; |
1320 | fuse_page_descs_length_init(req, req->num_pages, npages); | 1382 | fuse_page_descs_length_init(ap->descs, ap->num_pages, npages); |
1321 | 1383 | ||
1322 | req->num_pages += npages; | 1384 | ap->num_pages += npages; |
1323 | req->page_descs[req->num_pages - 1].length -= | 1385 | ap->descs[ap->num_pages - 1].length -= |
1324 | (PAGE_SIZE - ret) & (PAGE_SIZE - 1); | 1386 | (PAGE_SIZE - ret) & (PAGE_SIZE - 1); |
1325 | } | 1387 | } |
1326 | 1388 | ||
1327 | if (write) | 1389 | if (write) |
1328 | req->in.argpages = 1; | 1390 | ap->args.in_pages = 1; |
1329 | else | 1391 | else |
1330 | req->out.argpages = 1; | 1392 | ap->args.out_pages = 1; |
1331 | 1393 | ||
1332 | *nbytesp = nbytes; | 1394 | *nbytesp = nbytes; |
1333 | 1395 | ||
@@ -1349,17 +1411,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, | |||
1349 | pgoff_t idx_from = pos >> PAGE_SHIFT; | 1411 | pgoff_t idx_from = pos >> PAGE_SHIFT; |
1350 | pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT; | 1412 | pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT; |
1351 | ssize_t res = 0; | 1413 | ssize_t res = 0; |
1352 | struct fuse_req *req; | ||
1353 | int err = 0; | 1414 | int err = 0; |
1415 | struct fuse_io_args *ia; | ||
1416 | unsigned int max_pages; | ||
1354 | 1417 | ||
1355 | if (io->async) | 1418 | max_pages = iov_iter_npages(iter, fc->max_pages); |
1356 | req = fuse_get_req_for_background(fc, iov_iter_npages(iter, | 1419 | ia = fuse_io_alloc(io, max_pages); |
1357 | fc->max_pages)); | 1420 | if (!ia) |
1358 | else | 1421 | return -ENOMEM; |
1359 | req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages)); | ||
1360 | if (IS_ERR(req)) | ||
1361 | return PTR_ERR(req); | ||
1362 | 1422 | ||
1423 | ia->io = io; | ||
1363 | if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { | 1424 | if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { |
1364 | if (!write) | 1425 | if (!write) |
1365 | inode_lock(inode); | 1426 | inode_lock(inode); |
@@ -1370,54 +1431,49 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, | |||
1370 | 1431 | ||
1371 | io->should_dirty = !write && iter_is_iovec(iter); | 1432 | io->should_dirty = !write && iter_is_iovec(iter); |
1372 | while (count) { | 1433 | while (count) { |
1373 | size_t nres; | 1434 | ssize_t nres; |
1374 | fl_owner_t owner = current->files; | 1435 | fl_owner_t owner = current->files; |
1375 | size_t nbytes = min(count, nmax); | 1436 | size_t nbytes = min(count, nmax); |
1376 | err = fuse_get_user_pages(req, iter, &nbytes, write); | 1437 | |
1438 | err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write, | ||
1439 | max_pages); | ||
1377 | if (err && !nbytes) | 1440 | if (err && !nbytes) |
1378 | break; | 1441 | break; |
1379 | 1442 | ||
1380 | if (write) { | 1443 | if (write) { |
1381 | if (!capable(CAP_FSETID)) { | 1444 | if (!capable(CAP_FSETID)) |
1382 | struct fuse_write_in *inarg; | 1445 | ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV; |
1383 | 1446 | ||
1384 | inarg = &req->misc.write.in; | 1447 | nres = fuse_send_write(ia, pos, nbytes, owner); |
1385 | inarg->write_flags |= FUSE_WRITE_KILL_PRIV; | ||
1386 | } | ||
1387 | nres = fuse_send_write(req, io, pos, nbytes, owner); | ||
1388 | } else { | 1448 | } else { |
1389 | nres = fuse_send_read(req, io, pos, nbytes, owner); | 1449 | nres = fuse_send_read(ia, pos, nbytes, owner); |
1390 | } | 1450 | } |
1391 | 1451 | ||
1392 | if (!io->async) | 1452 | if (!io->async || nres < 0) { |
1393 | fuse_release_user_pages(req, io->should_dirty); | 1453 | fuse_release_user_pages(&ia->ap, io->should_dirty); |
1394 | if (req->out.h.error) { | 1454 | fuse_io_free(ia); |
1395 | err = req->out.h.error; | 1455 | } |
1396 | break; | 1456 | ia = NULL; |
1397 | } else if (nres > nbytes) { | 1457 | if (nres < 0) { |
1398 | res = 0; | 1458 | err = nres; |
1399 | err = -EIO; | ||
1400 | break; | 1459 | break; |
1401 | } | 1460 | } |
1461 | WARN_ON(nres > nbytes); | ||
1462 | |||
1402 | count -= nres; | 1463 | count -= nres; |
1403 | res += nres; | 1464 | res += nres; |
1404 | pos += nres; | 1465 | pos += nres; |
1405 | if (nres != nbytes) | 1466 | if (nres != nbytes) |
1406 | break; | 1467 | break; |
1407 | if (count) { | 1468 | if (count) { |
1408 | fuse_put_request(fc, req); | 1469 | max_pages = iov_iter_npages(iter, fc->max_pages); |
1409 | if (io->async) | 1470 | ia = fuse_io_alloc(io, max_pages); |
1410 | req = fuse_get_req_for_background(fc, | 1471 | if (!ia) |
1411 | iov_iter_npages(iter, fc->max_pages)); | ||
1412 | else | ||
1413 | req = fuse_get_req(fc, iov_iter_npages(iter, | ||
1414 | fc->max_pages)); | ||
1415 | if (IS_ERR(req)) | ||
1416 | break; | 1472 | break; |
1417 | } | 1473 | } |
1418 | } | 1474 | } |
1419 | if (!IS_ERR(req)) | 1475 | if (ia) |
1420 | fuse_put_request(fc, req); | 1476 | fuse_io_free(ia); |
1421 | if (res > 0) | 1477 | if (res > 0) |
1422 | *ppos = pos; | 1478 | *ppos = pos; |
1423 | 1479 | ||
@@ -1509,45 +1565,53 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
1509 | return fuse_direct_write_iter(iocb, from); | 1565 | return fuse_direct_write_iter(iocb, from); |
1510 | } | 1566 | } |
1511 | 1567 | ||
1512 | static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) | 1568 | static void fuse_writepage_free(struct fuse_writepage_args *wpa) |
1513 | { | 1569 | { |
1570 | struct fuse_args_pages *ap = &wpa->ia.ap; | ||
1514 | int i; | 1571 | int i; |
1515 | 1572 | ||
1516 | for (i = 0; i < req->num_pages; i++) | 1573 | for (i = 0; i < ap->num_pages; i++) |
1517 | __free_page(req->pages[i]); | 1574 | __free_page(ap->pages[i]); |
1575 | |||
1576 | if (wpa->ia.ff) | ||
1577 | fuse_file_put(wpa->ia.ff, false, false); | ||
1518 | 1578 | ||
1519 | if (req->ff) | 1579 | kfree(ap->pages); |
1520 | fuse_file_put(req->ff, false, false); | 1580 | kfree(wpa); |
1521 | } | 1581 | } |
1522 | 1582 | ||
1523 | static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) | 1583 | static void fuse_writepage_finish(struct fuse_conn *fc, |
1584 | struct fuse_writepage_args *wpa) | ||
1524 | { | 1585 | { |
1525 | struct inode *inode = req->inode; | 1586 | struct fuse_args_pages *ap = &wpa->ia.ap; |
1587 | struct inode *inode = wpa->inode; | ||
1526 | struct fuse_inode *fi = get_fuse_inode(inode); | 1588 | struct fuse_inode *fi = get_fuse_inode(inode); |
1527 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 1589 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
1528 | int i; | 1590 | int i; |
1529 | 1591 | ||
1530 | list_del(&req->writepages_entry); | 1592 | list_del(&wpa->writepages_entry); |
1531 | for (i = 0; i < req->num_pages; i++) { | 1593 | for (i = 0; i < ap->num_pages; i++) { |
1532 | dec_wb_stat(&bdi->wb, WB_WRITEBACK); | 1594 | dec_wb_stat(&bdi->wb, WB_WRITEBACK); |
1533 | dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP); | 1595 | dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); |
1534 | wb_writeout_inc(&bdi->wb); | 1596 | wb_writeout_inc(&bdi->wb); |
1535 | } | 1597 | } |
1536 | wake_up(&fi->page_waitq); | 1598 | wake_up(&fi->page_waitq); |
1537 | } | 1599 | } |
1538 | 1600 | ||
1539 | /* Called under fi->lock, may release and reacquire it */ | 1601 | /* Called under fi->lock, may release and reacquire it */ |
1540 | static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req, | 1602 | static void fuse_send_writepage(struct fuse_conn *fc, |
1541 | loff_t size) | 1603 | struct fuse_writepage_args *wpa, loff_t size) |
1542 | __releases(fi->lock) | 1604 | __releases(fi->lock) |
1543 | __acquires(fi->lock) | 1605 | __acquires(fi->lock) |
1544 | { | 1606 | { |
1545 | struct fuse_req *aux, *next; | 1607 | struct fuse_writepage_args *aux, *next; |
1546 | struct fuse_inode *fi = get_fuse_inode(req->inode); | 1608 | struct fuse_inode *fi = get_fuse_inode(wpa->inode); |
1547 | struct fuse_write_in *inarg = &req->misc.write.in; | 1609 | struct fuse_write_in *inarg = &wpa->ia.write.in; |
1548 | __u64 data_size = req->num_pages * PAGE_SIZE; | 1610 | struct fuse_args *args = &wpa->ia.ap.args; |
1549 | bool queued; | 1611 | __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE; |
1612 | int err; | ||
1550 | 1613 | ||
1614 | fi->writectr++; | ||
1551 | if (inarg->offset + data_size <= size) { | 1615 | if (inarg->offset + data_size <= size) { |
1552 | inarg->size = data_size; | 1616 | inarg->size = data_size; |
1553 | } else if (inarg->offset < size) { | 1617 | } else if (inarg->offset < size) { |
@@ -1557,29 +1621,36 @@ __acquires(fi->lock) | |||
1557 | goto out_free; | 1621 | goto out_free; |
1558 | } | 1622 | } |
1559 | 1623 | ||
1560 | req->in.args[1].size = inarg->size; | 1624 | args->in_args[1].size = inarg->size; |
1561 | queued = fuse_request_queue_background(fc, req); | 1625 | args->force = true; |
1626 | args->nocreds = true; | ||
1627 | |||
1628 | err = fuse_simple_background(fc, args, GFP_ATOMIC); | ||
1629 | if (err == -ENOMEM) { | ||
1630 | spin_unlock(&fi->lock); | ||
1631 | err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL); | ||
1632 | spin_lock(&fi->lock); | ||
1633 | } | ||
1634 | |||
1562 | /* Fails on broken connection only */ | 1635 | /* Fails on broken connection only */ |
1563 | if (unlikely(!queued)) | 1636 | if (unlikely(err)) |
1564 | goto out_free; | 1637 | goto out_free; |
1565 | 1638 | ||
1566 | fi->writectr++; | ||
1567 | return; | 1639 | return; |
1568 | 1640 | ||
1569 | out_free: | 1641 | out_free: |
1570 | fuse_writepage_finish(fc, req); | 1642 | fi->writectr--; |
1643 | fuse_writepage_finish(fc, wpa); | ||
1571 | spin_unlock(&fi->lock); | 1644 | spin_unlock(&fi->lock); |
1572 | 1645 | ||
1573 | /* After fuse_writepage_finish() aux request list is private */ | 1646 | /* After fuse_writepage_finish() aux request list is private */ |
1574 | for (aux = req->misc.write.next; aux; aux = next) { | 1647 | for (aux = wpa->next; aux; aux = next) { |
1575 | next = aux->misc.write.next; | 1648 | next = aux->next; |
1576 | aux->misc.write.next = NULL; | 1649 | aux->next = NULL; |
1577 | fuse_writepage_free(fc, aux); | 1650 | fuse_writepage_free(aux); |
1578 | fuse_put_request(fc, aux); | ||
1579 | } | 1651 | } |
1580 | 1652 | ||
1581 | fuse_writepage_free(fc, req); | 1653 | fuse_writepage_free(wpa); |
1582 | fuse_put_request(fc, req); | ||
1583 | spin_lock(&fi->lock); | 1654 | spin_lock(&fi->lock); |
1584 | } | 1655 | } |
1585 | 1656 | ||
@@ -1596,29 +1667,34 @@ __acquires(fi->lock) | |||
1596 | struct fuse_conn *fc = get_fuse_conn(inode); | 1667 | struct fuse_conn *fc = get_fuse_conn(inode); |
1597 | struct fuse_inode *fi = get_fuse_inode(inode); | 1668 | struct fuse_inode *fi = get_fuse_inode(inode); |
1598 | loff_t crop = i_size_read(inode); | 1669 | loff_t crop = i_size_read(inode); |
1599 | struct fuse_req *req; | 1670 | struct fuse_writepage_args *wpa; |
1600 | 1671 | ||
1601 | while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { | 1672 | while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { |
1602 | req = list_entry(fi->queued_writes.next, struct fuse_req, list); | 1673 | wpa = list_entry(fi->queued_writes.next, |
1603 | list_del_init(&req->list); | 1674 | struct fuse_writepage_args, queue_entry); |
1604 | fuse_send_writepage(fc, req, crop); | 1675 | list_del_init(&wpa->queue_entry); |
1676 | fuse_send_writepage(fc, wpa, crop); | ||
1605 | } | 1677 | } |
1606 | } | 1678 | } |
1607 | 1679 | ||
1608 | static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) | 1680 | static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, |
1681 | int error) | ||
1609 | { | 1682 | { |
1610 | struct inode *inode = req->inode; | 1683 | struct fuse_writepage_args *wpa = |
1684 | container_of(args, typeof(*wpa), ia.ap.args); | ||
1685 | struct inode *inode = wpa->inode; | ||
1611 | struct fuse_inode *fi = get_fuse_inode(inode); | 1686 | struct fuse_inode *fi = get_fuse_inode(inode); |
1612 | 1687 | ||
1613 | mapping_set_error(inode->i_mapping, req->out.h.error); | 1688 | mapping_set_error(inode->i_mapping, error); |
1614 | spin_lock(&fi->lock); | 1689 | spin_lock(&fi->lock); |
1615 | while (req->misc.write.next) { | 1690 | while (wpa->next) { |
1616 | struct fuse_conn *fc = get_fuse_conn(inode); | 1691 | struct fuse_conn *fc = get_fuse_conn(inode); |
1617 | struct fuse_write_in *inarg = &req->misc.write.in; | 1692 | struct fuse_write_in *inarg = &wpa->ia.write.in; |
1618 | struct fuse_req *next = req->misc.write.next; | 1693 | struct fuse_writepage_args *next = wpa->next; |
1619 | req->misc.write.next = next->misc.write.next; | 1694 | |
1620 | next->misc.write.next = NULL; | 1695 | wpa->next = next->next; |
1621 | next->ff = fuse_file_get(req->ff); | 1696 | next->next = NULL; |
1697 | next->ia.ff = fuse_file_get(wpa->ia.ff); | ||
1622 | list_add(&next->writepages_entry, &fi->writepages); | 1698 | list_add(&next->writepages_entry, &fi->writepages); |
1623 | 1699 | ||
1624 | /* | 1700 | /* |
@@ -1647,9 +1723,9 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) | |||
1647 | fuse_send_writepage(fc, next, inarg->offset + inarg->size); | 1723 | fuse_send_writepage(fc, next, inarg->offset + inarg->size); |
1648 | } | 1724 | } |
1649 | fi->writectr--; | 1725 | fi->writectr--; |
1650 | fuse_writepage_finish(fc, req); | 1726 | fuse_writepage_finish(fc, wpa); |
1651 | spin_unlock(&fi->lock); | 1727 | spin_unlock(&fi->lock); |
1652 | fuse_writepage_free(fc, req); | 1728 | fuse_writepage_free(wpa); |
1653 | } | 1729 | } |
1654 | 1730 | ||
1655 | static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, | 1731 | static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, |
@@ -1691,52 +1767,71 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1691 | return err; | 1767 | return err; |
1692 | } | 1768 | } |
1693 | 1769 | ||
1770 | static struct fuse_writepage_args *fuse_writepage_args_alloc(void) | ||
1771 | { | ||
1772 | struct fuse_writepage_args *wpa; | ||
1773 | struct fuse_args_pages *ap; | ||
1774 | |||
1775 | wpa = kzalloc(sizeof(*wpa), GFP_NOFS); | ||
1776 | if (wpa) { | ||
1777 | ap = &wpa->ia.ap; | ||
1778 | ap->num_pages = 0; | ||
1779 | ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs); | ||
1780 | if (!ap->pages) { | ||
1781 | kfree(wpa); | ||
1782 | wpa = NULL; | ||
1783 | } | ||
1784 | } | ||
1785 | return wpa; | ||
1786 | |||
1787 | } | ||
1788 | |||
1694 | static int fuse_writepage_locked(struct page *page) | 1789 | static int fuse_writepage_locked(struct page *page) |
1695 | { | 1790 | { |
1696 | struct address_space *mapping = page->mapping; | 1791 | struct address_space *mapping = page->mapping; |
1697 | struct inode *inode = mapping->host; | 1792 | struct inode *inode = mapping->host; |
1698 | struct fuse_conn *fc = get_fuse_conn(inode); | 1793 | struct fuse_conn *fc = get_fuse_conn(inode); |
1699 | struct fuse_inode *fi = get_fuse_inode(inode); | 1794 | struct fuse_inode *fi = get_fuse_inode(inode); |
1700 | struct fuse_req *req; | 1795 | struct fuse_writepage_args *wpa; |
1796 | struct fuse_args_pages *ap; | ||
1701 | struct page *tmp_page; | 1797 | struct page *tmp_page; |
1702 | int error = -ENOMEM; | 1798 | int error = -ENOMEM; |
1703 | 1799 | ||
1704 | set_page_writeback(page); | 1800 | set_page_writeback(page); |
1705 | 1801 | ||
1706 | req = fuse_request_alloc_nofs(1); | 1802 | wpa = fuse_writepage_args_alloc(); |
1707 | if (!req) | 1803 | if (!wpa) |
1708 | goto err; | 1804 | goto err; |
1805 | ap = &wpa->ia.ap; | ||
1709 | 1806 | ||
1710 | /* writeback always goes to bg_queue */ | ||
1711 | __set_bit(FR_BACKGROUND, &req->flags); | ||
1712 | tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 1807 | tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
1713 | if (!tmp_page) | 1808 | if (!tmp_page) |
1714 | goto err_free; | 1809 | goto err_free; |
1715 | 1810 | ||
1716 | error = -EIO; | 1811 | error = -EIO; |
1717 | req->ff = fuse_write_file_get(fc, fi); | 1812 | wpa->ia.ff = fuse_write_file_get(fc, fi); |
1718 | if (!req->ff) | 1813 | if (!wpa->ia.ff) |
1719 | goto err_nofile; | 1814 | goto err_nofile; |
1720 | 1815 | ||
1721 | fuse_write_fill(req, req->ff, page_offset(page), 0); | 1816 | fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0); |
1722 | 1817 | ||
1723 | copy_highpage(tmp_page, page); | 1818 | copy_highpage(tmp_page, page); |
1724 | req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; | 1819 | wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; |
1725 | req->misc.write.next = NULL; | 1820 | wpa->next = NULL; |
1726 | req->in.argpages = 1; | 1821 | ap->args.in_pages = true; |
1727 | req->num_pages = 1; | 1822 | ap->num_pages = 1; |
1728 | req->pages[0] = tmp_page; | 1823 | ap->pages[0] = tmp_page; |
1729 | req->page_descs[0].offset = 0; | 1824 | ap->descs[0].offset = 0; |
1730 | req->page_descs[0].length = PAGE_SIZE; | 1825 | ap->descs[0].length = PAGE_SIZE; |
1731 | req->end = fuse_writepage_end; | 1826 | ap->args.end = fuse_writepage_end; |
1732 | req->inode = inode; | 1827 | wpa->inode = inode; |
1733 | 1828 | ||
1734 | inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); | 1829 | inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); |
1735 | inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); | 1830 | inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); |
1736 | 1831 | ||
1737 | spin_lock(&fi->lock); | 1832 | spin_lock(&fi->lock); |
1738 | list_add(&req->writepages_entry, &fi->writepages); | 1833 | list_add(&wpa->writepages_entry, &fi->writepages); |
1739 | list_add_tail(&req->list, &fi->queued_writes); | 1834 | list_add_tail(&wpa->queue_entry, &fi->queued_writes); |
1740 | fuse_flush_writepages(inode); | 1835 | fuse_flush_writepages(inode); |
1741 | spin_unlock(&fi->lock); | 1836 | spin_unlock(&fi->lock); |
1742 | 1837 | ||
@@ -1747,7 +1842,7 @@ static int fuse_writepage_locked(struct page *page) | |||
1747 | err_nofile: | 1842 | err_nofile: |
1748 | __free_page(tmp_page); | 1843 | __free_page(tmp_page); |
1749 | err_free: | 1844 | err_free: |
1750 | fuse_request_free(req); | 1845 | kfree(wpa); |
1751 | err: | 1846 | err: |
1752 | mapping_set_error(page->mapping, error); | 1847 | mapping_set_error(page->mapping, error); |
1753 | end_page_writeback(page); | 1848 | end_page_writeback(page); |
@@ -1767,6 +1862,7 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc) | |||
1767 | WARN_ON(wbc->sync_mode == WB_SYNC_ALL); | 1862 | WARN_ON(wbc->sync_mode == WB_SYNC_ALL); |
1768 | 1863 | ||
1769 | redirty_page_for_writepage(wbc, page); | 1864 | redirty_page_for_writepage(wbc, page); |
1865 | unlock_page(page); | ||
1770 | return 0; | 1866 | return 0; |
1771 | } | 1867 | } |
1772 | 1868 | ||
@@ -1777,23 +1873,50 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc) | |||
1777 | } | 1873 | } |
1778 | 1874 | ||
1779 | struct fuse_fill_wb_data { | 1875 | struct fuse_fill_wb_data { |
1780 | struct fuse_req *req; | 1876 | struct fuse_writepage_args *wpa; |
1781 | struct fuse_file *ff; | 1877 | struct fuse_file *ff; |
1782 | struct inode *inode; | 1878 | struct inode *inode; |
1783 | struct page **orig_pages; | 1879 | struct page **orig_pages; |
1880 | unsigned int max_pages; | ||
1784 | }; | 1881 | }; |
1785 | 1882 | ||
1883 | static bool fuse_pages_realloc(struct fuse_fill_wb_data *data) | ||
1884 | { | ||
1885 | struct fuse_args_pages *ap = &data->wpa->ia.ap; | ||
1886 | struct fuse_conn *fc = get_fuse_conn(data->inode); | ||
1887 | struct page **pages; | ||
1888 | struct fuse_page_desc *descs; | ||
1889 | unsigned int npages = min_t(unsigned int, | ||
1890 | max_t(unsigned int, data->max_pages * 2, | ||
1891 | FUSE_DEFAULT_MAX_PAGES_PER_REQ), | ||
1892 | fc->max_pages); | ||
1893 | WARN_ON(npages <= data->max_pages); | ||
1894 | |||
1895 | pages = fuse_pages_alloc(npages, GFP_NOFS, &descs); | ||
1896 | if (!pages) | ||
1897 | return false; | ||
1898 | |||
1899 | memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages); | ||
1900 | memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages); | ||
1901 | kfree(ap->pages); | ||
1902 | ap->pages = pages; | ||
1903 | ap->descs = descs; | ||
1904 | data->max_pages = npages; | ||
1905 | |||
1906 | return true; | ||
1907 | } | ||
1908 | |||
1786 | static void fuse_writepages_send(struct fuse_fill_wb_data *data) | 1909 | static void fuse_writepages_send(struct fuse_fill_wb_data *data) |
1787 | { | 1910 | { |
1788 | struct fuse_req *req = data->req; | 1911 | struct fuse_writepage_args *wpa = data->wpa; |
1789 | struct inode *inode = data->inode; | 1912 | struct inode *inode = data->inode; |
1790 | struct fuse_inode *fi = get_fuse_inode(inode); | 1913 | struct fuse_inode *fi = get_fuse_inode(inode); |
1791 | int num_pages = req->num_pages; | 1914 | int num_pages = wpa->ia.ap.num_pages; |
1792 | int i; | 1915 | int i; |
1793 | 1916 | ||
1794 | req->ff = fuse_file_get(data->ff); | 1917 | wpa->ia.ff = fuse_file_get(data->ff); |
1795 | spin_lock(&fi->lock); | 1918 | spin_lock(&fi->lock); |
1796 | list_add_tail(&req->list, &fi->queued_writes); | 1919 | list_add_tail(&wpa->queue_entry, &fi->queued_writes); |
1797 | fuse_flush_writepages(inode); | 1920 | fuse_flush_writepages(inode); |
1798 | spin_unlock(&fi->lock); | 1921 | spin_unlock(&fi->lock); |
1799 | 1922 | ||
@@ -1808,54 +1931,52 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) | |||
1808 | * this new request onto the auxiliary list, otherwise reuse the existing one by | 1931 | * this new request onto the auxiliary list, otherwise reuse the existing one by |
1809 | * copying the new page contents over to the old temporary page. | 1932 | * copying the new page contents over to the old temporary page. |
1810 | */ | 1933 | */ |
1811 | static bool fuse_writepage_in_flight(struct fuse_req *new_req, | 1934 | static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, |
1812 | struct page *page) | 1935 | struct page *page) |
1813 | { | 1936 | { |
1814 | struct fuse_conn *fc = get_fuse_conn(new_req->inode); | 1937 | struct fuse_inode *fi = get_fuse_inode(new_wpa->inode); |
1815 | struct fuse_inode *fi = get_fuse_inode(new_req->inode); | 1938 | struct fuse_writepage_args *tmp; |
1816 | struct fuse_req *tmp; | 1939 | struct fuse_writepage_args *old_wpa; |
1817 | struct fuse_req *old_req; | 1940 | struct fuse_args_pages *new_ap = &new_wpa->ia.ap; |
1818 | 1941 | ||
1819 | WARN_ON(new_req->num_pages != 0); | 1942 | WARN_ON(new_ap->num_pages != 0); |
1820 | 1943 | ||
1821 | spin_lock(&fi->lock); | 1944 | spin_lock(&fi->lock); |
1822 | list_del(&new_req->writepages_entry); | 1945 | list_del(&new_wpa->writepages_entry); |
1823 | old_req = fuse_find_writeback(fi, page->index, page->index); | 1946 | old_wpa = fuse_find_writeback(fi, page->index, page->index); |
1824 | if (!old_req) { | 1947 | if (!old_wpa) { |
1825 | list_add(&new_req->writepages_entry, &fi->writepages); | 1948 | list_add(&new_wpa->writepages_entry, &fi->writepages); |
1826 | spin_unlock(&fi->lock); | 1949 | spin_unlock(&fi->lock); |
1827 | return false; | 1950 | return false; |
1828 | } | 1951 | } |
1829 | 1952 | ||
1830 | new_req->num_pages = 1; | 1953 | new_ap->num_pages = 1; |
1831 | for (tmp = old_req->misc.write.next; tmp; tmp = tmp->misc.write.next) { | 1954 | for (tmp = old_wpa->next; tmp; tmp = tmp->next) { |
1832 | pgoff_t curr_index; | 1955 | pgoff_t curr_index; |
1833 | 1956 | ||
1834 | WARN_ON(tmp->inode != new_req->inode); | 1957 | WARN_ON(tmp->inode != new_wpa->inode); |
1835 | curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT; | 1958 | curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT; |
1836 | if (curr_index == page->index) { | 1959 | if (curr_index == page->index) { |
1837 | WARN_ON(tmp->num_pages != 1); | 1960 | WARN_ON(tmp->ia.ap.num_pages != 1); |
1838 | WARN_ON(!test_bit(FR_PENDING, &tmp->flags)); | 1961 | swap(tmp->ia.ap.pages[0], new_ap->pages[0]); |
1839 | swap(tmp->pages[0], new_req->pages[0]); | ||
1840 | break; | 1962 | break; |
1841 | } | 1963 | } |
1842 | } | 1964 | } |
1843 | 1965 | ||
1844 | if (!tmp) { | 1966 | if (!tmp) { |
1845 | new_req->misc.write.next = old_req->misc.write.next; | 1967 | new_wpa->next = old_wpa->next; |
1846 | old_req->misc.write.next = new_req; | 1968 | old_wpa->next = new_wpa; |
1847 | } | 1969 | } |
1848 | 1970 | ||
1849 | spin_unlock(&fi->lock); | 1971 | spin_unlock(&fi->lock); |
1850 | 1972 | ||
1851 | if (tmp) { | 1973 | if (tmp) { |
1852 | struct backing_dev_info *bdi = inode_to_bdi(new_req->inode); | 1974 | struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode); |
1853 | 1975 | ||
1854 | dec_wb_stat(&bdi->wb, WB_WRITEBACK); | 1976 | dec_wb_stat(&bdi->wb, WB_WRITEBACK); |
1855 | dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP); | 1977 | dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP); |
1856 | wb_writeout_inc(&bdi->wb); | 1978 | wb_writeout_inc(&bdi->wb); |
1857 | fuse_writepage_free(fc, new_req); | 1979 | fuse_writepage_free(new_wpa); |
1858 | fuse_request_free(new_req); | ||
1859 | } | 1980 | } |
1860 | 1981 | ||
1861 | return true; | 1982 | return true; |
@@ -1865,7 +1986,8 @@ static int fuse_writepages_fill(struct page *page, | |||
1865 | struct writeback_control *wbc, void *_data) | 1986 | struct writeback_control *wbc, void *_data) |
1866 | { | 1987 | { |
1867 | struct fuse_fill_wb_data *data = _data; | 1988 | struct fuse_fill_wb_data *data = _data; |
1868 | struct fuse_req *req = data->req; | 1989 | struct fuse_writepage_args *wpa = data->wpa; |
1990 | struct fuse_args_pages *ap = &wpa->ia.ap; | ||
1869 | struct inode *inode = data->inode; | 1991 | struct inode *inode = data->inode; |
1870 | struct fuse_inode *fi = get_fuse_inode(inode); | 1992 | struct fuse_inode *fi = get_fuse_inode(inode); |
1871 | struct fuse_conn *fc = get_fuse_conn(inode); | 1993 | struct fuse_conn *fc = get_fuse_conn(inode); |
@@ -1888,16 +2010,16 @@ static int fuse_writepages_fill(struct page *page, | |||
1888 | */ | 2010 | */ |
1889 | is_writeback = fuse_page_is_writeback(inode, page->index); | 2011 | is_writeback = fuse_page_is_writeback(inode, page->index); |
1890 | 2012 | ||
1891 | if (req && req->num_pages && | 2013 | if (wpa && ap->num_pages && |
1892 | (is_writeback || req->num_pages == fc->max_pages || | 2014 | (is_writeback || ap->num_pages == fc->max_pages || |
1893 | (req->num_pages + 1) * PAGE_SIZE > fc->max_write || | 2015 | (ap->num_pages + 1) * PAGE_SIZE > fc->max_write || |
1894 | data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) { | 2016 | data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)) { |
1895 | fuse_writepages_send(data); | 2017 | fuse_writepages_send(data); |
1896 | data->req = NULL; | 2018 | data->wpa = NULL; |
1897 | } else if (req && req->num_pages == req->max_pages) { | 2019 | } else if (wpa && ap->num_pages == data->max_pages) { |
1898 | if (!fuse_req_realloc_pages(fc, req, GFP_NOFS)) { | 2020 | if (!fuse_pages_realloc(data)) { |
1899 | fuse_writepages_send(data); | 2021 | fuse_writepages_send(data); |
1900 | req = data->req = NULL; | 2022 | data->wpa = NULL; |
1901 | } | 2023 | } |
1902 | } | 2024 | } |
1903 | 2025 | ||
@@ -1915,59 +2037,60 @@ static int fuse_writepages_fill(struct page *page, | |||
1915 | * This is ensured by holding the page lock in page_mkwrite() while | 2037 | * This is ensured by holding the page lock in page_mkwrite() while |
1916 | * checking fuse_page_is_writeback(). We already hold the page lock | 2038 | * checking fuse_page_is_writeback(). We already hold the page lock |
1917 | * since clear_page_dirty_for_io() and keep it held until we add the | 2039 | * since clear_page_dirty_for_io() and keep it held until we add the |
1918 | * request to the fi->writepages list and increment req->num_pages. | 2040 | * request to the fi->writepages list and increment ap->num_pages. |
1919 | * After this fuse_page_is_writeback() will indicate that the page is | 2041 | * After this fuse_page_is_writeback() will indicate that the page is |
1920 | * under writeback, so we can release the page lock. | 2042 | * under writeback, so we can release the page lock. |
1921 | */ | 2043 | */ |
1922 | if (data->req == NULL) { | 2044 | if (data->wpa == NULL) { |
1923 | struct fuse_inode *fi = get_fuse_inode(inode); | 2045 | struct fuse_inode *fi = get_fuse_inode(inode); |
1924 | 2046 | ||
1925 | err = -ENOMEM; | 2047 | err = -ENOMEM; |
1926 | req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES); | 2048 | wpa = fuse_writepage_args_alloc(); |
1927 | if (!req) { | 2049 | if (!wpa) { |
1928 | __free_page(tmp_page); | 2050 | __free_page(tmp_page); |
1929 | goto out_unlock; | 2051 | goto out_unlock; |
1930 | } | 2052 | } |
2053 | data->max_pages = 1; | ||
1931 | 2054 | ||
1932 | fuse_write_fill(req, data->ff, page_offset(page), 0); | 2055 | ap = &wpa->ia.ap; |
1933 | req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; | 2056 | fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0); |
1934 | req->misc.write.next = NULL; | 2057 | wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; |
1935 | req->in.argpages = 1; | 2058 | wpa->next = NULL; |
1936 | __set_bit(FR_BACKGROUND, &req->flags); | 2059 | ap->args.in_pages = true; |
1937 | req->num_pages = 0; | 2060 | ap->args.end = fuse_writepage_end; |
1938 | req->end = fuse_writepage_end; | 2061 | ap->num_pages = 0; |
1939 | req->inode = inode; | 2062 | wpa->inode = inode; |
1940 | 2063 | ||
1941 | spin_lock(&fi->lock); | 2064 | spin_lock(&fi->lock); |
1942 | list_add(&req->writepages_entry, &fi->writepages); | 2065 | list_add(&wpa->writepages_entry, &fi->writepages); |
1943 | spin_unlock(&fi->lock); | 2066 | spin_unlock(&fi->lock); |
1944 | 2067 | ||
1945 | data->req = req; | 2068 | data->wpa = wpa; |
1946 | } | 2069 | } |
1947 | set_page_writeback(page); | 2070 | set_page_writeback(page); |
1948 | 2071 | ||
1949 | copy_highpage(tmp_page, page); | 2072 | copy_highpage(tmp_page, page); |
1950 | req->pages[req->num_pages] = tmp_page; | 2073 | ap->pages[ap->num_pages] = tmp_page; |
1951 | req->page_descs[req->num_pages].offset = 0; | 2074 | ap->descs[ap->num_pages].offset = 0; |
1952 | req->page_descs[req->num_pages].length = PAGE_SIZE; | 2075 | ap->descs[ap->num_pages].length = PAGE_SIZE; |
1953 | 2076 | ||
1954 | inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); | 2077 | inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); |
1955 | inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); | 2078 | inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); |
1956 | 2079 | ||
1957 | err = 0; | 2080 | err = 0; |
1958 | if (is_writeback && fuse_writepage_in_flight(req, page)) { | 2081 | if (is_writeback && fuse_writepage_in_flight(wpa, page)) { |
1959 | end_page_writeback(page); | 2082 | end_page_writeback(page); |
1960 | data->req = NULL; | 2083 | data->wpa = NULL; |
1961 | goto out_unlock; | 2084 | goto out_unlock; |
1962 | } | 2085 | } |
1963 | data->orig_pages[req->num_pages] = page; | 2086 | data->orig_pages[ap->num_pages] = page; |
1964 | 2087 | ||
1965 | /* | 2088 | /* |
1966 | * Protected by fi->lock against concurrent access by | 2089 | * Protected by fi->lock against concurrent access by |
1967 | * fuse_page_is_writeback(). | 2090 | * fuse_page_is_writeback(). |
1968 | */ | 2091 | */ |
1969 | spin_lock(&fi->lock); | 2092 | spin_lock(&fi->lock); |
1970 | req->num_pages++; | 2093 | ap->num_pages++; |
1971 | spin_unlock(&fi->lock); | 2094 | spin_unlock(&fi->lock); |
1972 | 2095 | ||
1973 | out_unlock: | 2096 | out_unlock: |
@@ -1989,7 +2112,7 @@ static int fuse_writepages(struct address_space *mapping, | |||
1989 | goto out; | 2112 | goto out; |
1990 | 2113 | ||
1991 | data.inode = inode; | 2114 | data.inode = inode; |
1992 | data.req = NULL; | 2115 | data.wpa = NULL; |
1993 | data.ff = NULL; | 2116 | data.ff = NULL; |
1994 | 2117 | ||
1995 | err = -ENOMEM; | 2118 | err = -ENOMEM; |
@@ -2000,9 +2123,9 @@ static int fuse_writepages(struct address_space *mapping, | |||
2000 | goto out; | 2123 | goto out; |
2001 | 2124 | ||
2002 | err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); | 2125 | err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); |
2003 | if (data.req) { | 2126 | if (data.wpa) { |
2004 | /* Ignore errors if we can write at least one page */ | 2127 | /* Ignore errors if we can write at least one page */ |
2005 | BUG_ON(!data.req->num_pages); | 2128 | WARN_ON(!data.wpa->ia.ap.num_pages); |
2006 | fuse_writepages_send(&data); | 2129 | fuse_writepages_send(&data); |
2007 | err = 0; | 2130 | err = 0; |
2008 | } | 2131 | } |
@@ -2222,11 +2345,11 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file, | |||
2222 | inarg->lk.pid = pid; | 2345 | inarg->lk.pid = pid; |
2223 | if (flock) | 2346 | if (flock) |
2224 | inarg->lk_flags |= FUSE_LK_FLOCK; | 2347 | inarg->lk_flags |= FUSE_LK_FLOCK; |
2225 | args->in.h.opcode = opcode; | 2348 | args->opcode = opcode; |
2226 | args->in.h.nodeid = get_node_id(inode); | 2349 | args->nodeid = get_node_id(inode); |
2227 | args->in.numargs = 1; | 2350 | args->in_numargs = 1; |
2228 | args->in.args[0].size = sizeof(*inarg); | 2351 | args->in_args[0].size = sizeof(*inarg); |
2229 | args->in.args[0].value = inarg; | 2352 | args->in_args[0].value = inarg; |
2230 | } | 2353 | } |
2231 | 2354 | ||
2232 | static int fuse_getlk(struct file *file, struct file_lock *fl) | 2355 | static int fuse_getlk(struct file *file, struct file_lock *fl) |
@@ -2239,9 +2362,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) | |||
2239 | int err; | 2362 | int err; |
2240 | 2363 | ||
2241 | fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg); | 2364 | fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg); |
2242 | args.out.numargs = 1; | 2365 | args.out_numargs = 1; |
2243 | args.out.args[0].size = sizeof(outarg); | 2366 | args.out_args[0].size = sizeof(outarg); |
2244 | args.out.args[0].value = &outarg; | 2367 | args.out_args[0].value = &outarg; |
2245 | err = fuse_simple_request(fc, &args); | 2368 | err = fuse_simple_request(fc, &args); |
2246 | if (!err) | 2369 | if (!err) |
2247 | err = convert_fuse_file_lock(fc, &outarg.lk, fl); | 2370 | err = convert_fuse_file_lock(fc, &outarg.lk, fl); |
@@ -2336,14 +2459,14 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) | |||
2336 | memset(&inarg, 0, sizeof(inarg)); | 2459 | memset(&inarg, 0, sizeof(inarg)); |
2337 | inarg.block = block; | 2460 | inarg.block = block; |
2338 | inarg.blocksize = inode->i_sb->s_blocksize; | 2461 | inarg.blocksize = inode->i_sb->s_blocksize; |
2339 | args.in.h.opcode = FUSE_BMAP; | 2462 | args.opcode = FUSE_BMAP; |
2340 | args.in.h.nodeid = get_node_id(inode); | 2463 | args.nodeid = get_node_id(inode); |
2341 | args.in.numargs = 1; | 2464 | args.in_numargs = 1; |
2342 | args.in.args[0].size = sizeof(inarg); | 2465 | args.in_args[0].size = sizeof(inarg); |
2343 | args.in.args[0].value = &inarg; | 2466 | args.in_args[0].value = &inarg; |
2344 | args.out.numargs = 1; | 2467 | args.out_numargs = 1; |
2345 | args.out.args[0].size = sizeof(outarg); | 2468 | args.out_args[0].size = sizeof(outarg); |
2346 | args.out.args[0].value = &outarg; | 2469 | args.out_args[0].value = &outarg; |
2347 | err = fuse_simple_request(fc, &args); | 2470 | err = fuse_simple_request(fc, &args); |
2348 | if (err == -ENOSYS) | 2471 | if (err == -ENOSYS) |
2349 | fc->no_bmap = 1; | 2472 | fc->no_bmap = 1; |
@@ -2368,14 +2491,14 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) | |||
2368 | if (fc->no_lseek) | 2491 | if (fc->no_lseek) |
2369 | goto fallback; | 2492 | goto fallback; |
2370 | 2493 | ||
2371 | args.in.h.opcode = FUSE_LSEEK; | 2494 | args.opcode = FUSE_LSEEK; |
2372 | args.in.h.nodeid = ff->nodeid; | 2495 | args.nodeid = ff->nodeid; |
2373 | args.in.numargs = 1; | 2496 | args.in_numargs = 1; |
2374 | args.in.args[0].size = sizeof(inarg); | 2497 | args.in_args[0].size = sizeof(inarg); |
2375 | args.in.args[0].value = &inarg; | 2498 | args.in_args[0].value = &inarg; |
2376 | args.out.numargs = 1; | 2499 | args.out_numargs = 1; |
2377 | args.out.args[0].size = sizeof(outarg); | 2500 | args.out_args[0].size = sizeof(outarg); |
2378 | args.out.args[0].value = &outarg; | 2501 | args.out_args[0].value = &outarg; |
2379 | err = fuse_simple_request(fc, &args); | 2502 | err = fuse_simple_request(fc, &args); |
2380 | if (err) { | 2503 | if (err) { |
2381 | if (err == -ENOSYS) { | 2504 | if (err == -ENOSYS) { |
@@ -2573,14 +2696,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
2573 | .flags = flags | 2696 | .flags = flags |
2574 | }; | 2697 | }; |
2575 | struct fuse_ioctl_out outarg; | 2698 | struct fuse_ioctl_out outarg; |
2576 | struct fuse_req *req = NULL; | ||
2577 | struct page **pages = NULL; | ||
2578 | struct iovec *iov_page = NULL; | 2699 | struct iovec *iov_page = NULL; |
2579 | struct iovec *in_iov = NULL, *out_iov = NULL; | 2700 | struct iovec *in_iov = NULL, *out_iov = NULL; |
2580 | unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; | 2701 | unsigned int in_iovs = 0, out_iovs = 0, max_pages; |
2581 | size_t in_size, out_size, transferred, c; | 2702 | size_t in_size, out_size, c; |
2703 | ssize_t transferred; | ||
2582 | int err, i; | 2704 | int err, i; |
2583 | struct iov_iter ii; | 2705 | struct iov_iter ii; |
2706 | struct fuse_args_pages ap = {}; | ||
2584 | 2707 | ||
2585 | #if BITS_PER_LONG == 32 | 2708 | #if BITS_PER_LONG == 32 |
2586 | inarg.flags |= FUSE_IOCTL_32BIT; | 2709 | inarg.flags |= FUSE_IOCTL_32BIT; |
@@ -2598,11 +2721,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
2598 | BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); | 2721 | BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); |
2599 | 2722 | ||
2600 | err = -ENOMEM; | 2723 | err = -ENOMEM; |
2601 | pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL); | 2724 | ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs); |
2602 | iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); | 2725 | iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); |
2603 | if (!pages || !iov_page) | 2726 | if (!ap.pages || !iov_page) |
2604 | goto out; | 2727 | goto out; |
2605 | 2728 | ||
2729 | fuse_page_descs_length_init(ap.descs, 0, fc->max_pages); | ||
2730 | |||
2606 | /* | 2731 | /* |
2607 | * If restricted, initialize IO parameters as encoded in @cmd. | 2732 | * If restricted, initialize IO parameters as encoded in @cmd. |
2608 | * RETRY from server is not allowed. | 2733 | * RETRY from server is not allowed. |
@@ -2639,56 +2764,44 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
2639 | err = -ENOMEM; | 2764 | err = -ENOMEM; |
2640 | if (max_pages > fc->max_pages) | 2765 | if (max_pages > fc->max_pages) |
2641 | goto out; | 2766 | goto out; |
2642 | while (num_pages < max_pages) { | 2767 | while (ap.num_pages < max_pages) { |
2643 | pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 2768 | ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); |
2644 | if (!pages[num_pages]) | 2769 | if (!ap.pages[ap.num_pages]) |
2645 | goto out; | 2770 | goto out; |
2646 | num_pages++; | 2771 | ap.num_pages++; |
2647 | } | 2772 | } |
2648 | 2773 | ||
2649 | req = fuse_get_req(fc, num_pages); | ||
2650 | if (IS_ERR(req)) { | ||
2651 | err = PTR_ERR(req); | ||
2652 | req = NULL; | ||
2653 | goto out; | ||
2654 | } | ||
2655 | memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); | ||
2656 | req->num_pages = num_pages; | ||
2657 | fuse_page_descs_length_init(req, 0, req->num_pages); | ||
2658 | 2774 | ||
2659 | /* okay, let's send it to the client */ | 2775 | /* okay, let's send it to the client */ |
2660 | req->in.h.opcode = FUSE_IOCTL; | 2776 | ap.args.opcode = FUSE_IOCTL; |
2661 | req->in.h.nodeid = ff->nodeid; | 2777 | ap.args.nodeid = ff->nodeid; |
2662 | req->in.numargs = 1; | 2778 | ap.args.in_numargs = 1; |
2663 | req->in.args[0].size = sizeof(inarg); | 2779 | ap.args.in_args[0].size = sizeof(inarg); |
2664 | req->in.args[0].value = &inarg; | 2780 | ap.args.in_args[0].value = &inarg; |
2665 | if (in_size) { | 2781 | if (in_size) { |
2666 | req->in.numargs++; | 2782 | ap.args.in_numargs++; |
2667 | req->in.args[1].size = in_size; | 2783 | ap.args.in_args[1].size = in_size; |
2668 | req->in.argpages = 1; | 2784 | ap.args.in_pages = true; |
2669 | 2785 | ||
2670 | err = -EFAULT; | 2786 | err = -EFAULT; |
2671 | iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); | 2787 | iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); |
2672 | for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) { | 2788 | for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { |
2673 | c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii); | 2789 | c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); |
2674 | if (c != PAGE_SIZE && iov_iter_count(&ii)) | 2790 | if (c != PAGE_SIZE && iov_iter_count(&ii)) |
2675 | goto out; | 2791 | goto out; |
2676 | } | 2792 | } |
2677 | } | 2793 | } |
2678 | 2794 | ||
2679 | req->out.numargs = 2; | 2795 | ap.args.out_numargs = 2; |
2680 | req->out.args[0].size = sizeof(outarg); | 2796 | ap.args.out_args[0].size = sizeof(outarg); |
2681 | req->out.args[0].value = &outarg; | 2797 | ap.args.out_args[0].value = &outarg; |
2682 | req->out.args[1].size = out_size; | 2798 | ap.args.out_args[1].size = out_size; |
2683 | req->out.argpages = 1; | 2799 | ap.args.out_pages = true; |
2684 | req->out.argvar = 1; | 2800 | ap.args.out_argvar = true; |
2685 | 2801 | ||
2686 | fuse_request_send(fc, req); | 2802 | transferred = fuse_simple_request(fc, &ap.args); |
2687 | err = req->out.h.error; | 2803 | err = transferred; |
2688 | transferred = req->out.args[1].size; | 2804 | if (transferred < 0) |
2689 | fuse_put_request(fc, req); | ||
2690 | req = NULL; | ||
2691 | if (err) | ||
2692 | goto out; | 2805 | goto out; |
2693 | 2806 | ||
2694 | /* did it ask for retry? */ | 2807 | /* did it ask for retry? */ |
@@ -2713,7 +2826,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
2713 | in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) | 2826 | in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) |
2714 | goto out; | 2827 | goto out; |
2715 | 2828 | ||
2716 | vaddr = kmap_atomic(pages[0]); | 2829 | vaddr = kmap_atomic(ap.pages[0]); |
2717 | err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, | 2830 | err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, |
2718 | transferred, in_iovs + out_iovs, | 2831 | transferred, in_iovs + out_iovs, |
2719 | (flags & FUSE_IOCTL_COMPAT) != 0); | 2832 | (flags & FUSE_IOCTL_COMPAT) != 0); |
@@ -2741,19 +2854,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
2741 | 2854 | ||
2742 | err = -EFAULT; | 2855 | err = -EFAULT; |
2743 | iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); | 2856 | iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); |
2744 | for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) { | 2857 | for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { |
2745 | c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii); | 2858 | c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); |
2746 | if (c != PAGE_SIZE && iov_iter_count(&ii)) | 2859 | if (c != PAGE_SIZE && iov_iter_count(&ii)) |
2747 | goto out; | 2860 | goto out; |
2748 | } | 2861 | } |
2749 | err = 0; | 2862 | err = 0; |
2750 | out: | 2863 | out: |
2751 | if (req) | ||
2752 | fuse_put_request(fc, req); | ||
2753 | free_page((unsigned long) iov_page); | 2864 | free_page((unsigned long) iov_page); |
2754 | while (num_pages) | 2865 | while (ap.num_pages) |
2755 | __free_page(pages[--num_pages]); | 2866 | __free_page(ap.pages[--ap.num_pages]); |
2756 | kfree(pages); | 2867 | kfree(ap.pages); |
2757 | 2868 | ||
2758 | return err ? err : outarg.result; | 2869 | return err ? err : outarg.result; |
2759 | } | 2870 | } |
@@ -2861,14 +2972,14 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait) | |||
2861 | fuse_register_polled_file(fc, ff); | 2972 | fuse_register_polled_file(fc, ff); |
2862 | } | 2973 | } |
2863 | 2974 | ||
2864 | args.in.h.opcode = FUSE_POLL; | 2975 | args.opcode = FUSE_POLL; |
2865 | args.in.h.nodeid = ff->nodeid; | 2976 | args.nodeid = ff->nodeid; |
2866 | args.in.numargs = 1; | 2977 | args.in_numargs = 1; |
2867 | args.in.args[0].size = sizeof(inarg); | 2978 | args.in_args[0].size = sizeof(inarg); |
2868 | args.in.args[0].value = &inarg; | 2979 | args.in_args[0].value = &inarg; |
2869 | args.out.numargs = 1; | 2980 | args.out_numargs = 1; |
2870 | args.out.args[0].size = sizeof(outarg); | 2981 | args.out_args[0].size = sizeof(outarg); |
2871 | args.out.args[0].value = &outarg; | 2982 | args.out_args[0].value = &outarg; |
2872 | err = fuse_simple_request(fc, &args); | 2983 | err = fuse_simple_request(fc, &args); |
2873 | 2984 | ||
2874 | if (!err) | 2985 | if (!err) |
@@ -3076,11 +3187,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, | |||
3076 | if (!(mode & FALLOC_FL_KEEP_SIZE)) | 3187 | if (!(mode & FALLOC_FL_KEEP_SIZE)) |
3077 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); | 3188 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); |
3078 | 3189 | ||
3079 | args.in.h.opcode = FUSE_FALLOCATE; | 3190 | args.opcode = FUSE_FALLOCATE; |
3080 | args.in.h.nodeid = ff->nodeid; | 3191 | args.nodeid = ff->nodeid; |
3081 | args.in.numargs = 1; | 3192 | args.in_numargs = 1; |
3082 | args.in.args[0].size = sizeof(inarg); | 3193 | args.in_args[0].size = sizeof(inarg); |
3083 | args.in.args[0].value = &inarg; | 3194 | args.in_args[0].value = &inarg; |
3084 | err = fuse_simple_request(fc, &args); | 3195 | err = fuse_simple_request(fc, &args); |
3085 | if (err == -ENOSYS) { | 3196 | if (err == -ENOSYS) { |
3086 | fc->no_fallocate = 1; | 3197 | fc->no_fallocate = 1; |
@@ -3168,14 +3279,14 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, | |||
3168 | if (is_unstable) | 3279 | if (is_unstable) |
3169 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); | 3280 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); |
3170 | 3281 | ||
3171 | args.in.h.opcode = FUSE_COPY_FILE_RANGE; | 3282 | args.opcode = FUSE_COPY_FILE_RANGE; |
3172 | args.in.h.nodeid = ff_in->nodeid; | 3283 | args.nodeid = ff_in->nodeid; |
3173 | args.in.numargs = 1; | 3284 | args.in_numargs = 1; |
3174 | args.in.args[0].size = sizeof(inarg); | 3285 | args.in_args[0].size = sizeof(inarg); |
3175 | args.in.args[0].value = &inarg; | 3286 | args.in_args[0].value = &inarg; |
3176 | args.out.numargs = 1; | 3287 | args.out_numargs = 1; |
3177 | args.out.args[0].size = sizeof(outarg); | 3288 | args.out_args[0].size = sizeof(outarg); |
3178 | args.out.args[0].value = &outarg; | 3289 | args.out_args[0].value = &outarg; |
3179 | err = fuse_simple_request(fc, &args); | 3290 | err = fuse_simple_request(fc, &args); |
3180 | if (err == -ENOSYS) { | 3291 | if (err == -ENOSYS) { |
3181 | fc->no_copy_file_range = 1; | 3292 | fc->no_copy_file_range = 1; |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 24dbca777775..fc89cb40e874 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -47,9 +47,6 @@ | |||
47 | /** Number of dentries for each connection in the control filesystem */ | 47 | /** Number of dentries for each connection in the control filesystem */ |
48 | #define FUSE_CTL_NUM_DENTRIES 5 | 48 | #define FUSE_CTL_NUM_DENTRIES 5 |
49 | 49 | ||
50 | /** Number of page pointers embedded in fuse_req */ | ||
51 | #define FUSE_REQ_INLINE_PAGES 1 | ||
52 | |||
53 | /** List of active connections */ | 50 | /** List of active connections */ |
54 | extern struct list_head fuse_conn_list; | 51 | extern struct list_head fuse_conn_list; |
55 | 52 | ||
@@ -164,17 +161,15 @@ enum { | |||
164 | }; | 161 | }; |
165 | 162 | ||
166 | struct fuse_conn; | 163 | struct fuse_conn; |
164 | struct fuse_release_args; | ||
167 | 165 | ||
168 | /** FUSE specific file data */ | 166 | /** FUSE specific file data */ |
169 | struct fuse_file { | 167 | struct fuse_file { |
170 | /** Fuse connection for this file */ | 168 | /** Fuse connection for this file */ |
171 | struct fuse_conn *fc; | 169 | struct fuse_conn *fc; |
172 | 170 | ||
173 | /* | 171 | /* Argument space reserved for release */ |
174 | * Request reserved for flush and release. | 172 | struct fuse_release_args *release_args; |
175 | * Modified under relative fuse_inode::lock. | ||
176 | */ | ||
177 | struct fuse_req *reserved_req; | ||
178 | 173 | ||
179 | /** Kernel file handle guaranteed to be unique */ | 174 | /** Kernel file handle guaranteed to be unique */ |
180 | u64 kh; | 175 | u64 kh; |
@@ -229,57 +224,12 @@ struct fuse_in_arg { | |||
229 | const void *value; | 224 | const void *value; |
230 | }; | 225 | }; |
231 | 226 | ||
232 | /** The request input */ | ||
233 | struct fuse_in { | ||
234 | /** The request header */ | ||
235 | struct fuse_in_header h; | ||
236 | |||
237 | /** True if the data for the last argument is in req->pages */ | ||
238 | unsigned argpages:1; | ||
239 | |||
240 | /** Number of arguments */ | ||
241 | unsigned numargs; | ||
242 | |||
243 | /** Array of arguments */ | ||
244 | struct fuse_in_arg args[3]; | ||
245 | }; | ||
246 | |||
247 | /** One output argument of a request */ | 227 | /** One output argument of a request */ |
248 | struct fuse_arg { | 228 | struct fuse_arg { |
249 | unsigned size; | 229 | unsigned size; |
250 | void *value; | 230 | void *value; |
251 | }; | 231 | }; |
252 | 232 | ||
253 | /** The request output */ | ||
254 | struct fuse_out { | ||
255 | /** Header returned from userspace */ | ||
256 | struct fuse_out_header h; | ||
257 | |||
258 | /* | ||
259 | * The following bitfields are not changed during the request | ||
260 | * processing | ||
261 | */ | ||
262 | |||
263 | /** Last argument is variable length (can be shorter than | ||
264 | arg->size) */ | ||
265 | unsigned argvar:1; | ||
266 | |||
267 | /** Last argument is a list of pages to copy data to */ | ||
268 | unsigned argpages:1; | ||
269 | |||
270 | /** Zero partially or not copied pages */ | ||
271 | unsigned page_zeroing:1; | ||
272 | |||
273 | /** Pages may be replaced with new ones */ | ||
274 | unsigned page_replace:1; | ||
275 | |||
276 | /** Number or arguments */ | ||
277 | unsigned numargs; | ||
278 | |||
279 | /** Array of arguments */ | ||
280 | struct fuse_arg args[2]; | ||
281 | }; | ||
282 | |||
283 | /** FUSE page descriptor */ | 233 | /** FUSE page descriptor */ |
284 | struct fuse_page_desc { | 234 | struct fuse_page_desc { |
285 | unsigned int length; | 235 | unsigned int length; |
@@ -287,20 +237,28 @@ struct fuse_page_desc { | |||
287 | }; | 237 | }; |
288 | 238 | ||
289 | struct fuse_args { | 239 | struct fuse_args { |
290 | struct { | 240 | uint64_t nodeid; |
291 | struct { | 241 | uint32_t opcode; |
292 | uint32_t opcode; | 242 | unsigned short in_numargs; |
293 | uint64_t nodeid; | 243 | unsigned short out_numargs; |
294 | } h; | 244 | bool force:1; |
295 | unsigned numargs; | 245 | bool noreply:1; |
296 | struct fuse_in_arg args[3]; | 246 | bool nocreds:1; |
247 | bool in_pages:1; | ||
248 | bool out_pages:1; | ||
249 | bool out_argvar:1; | ||
250 | bool page_zeroing:1; | ||
251 | bool page_replace:1; | ||
252 | struct fuse_in_arg in_args[3]; | ||
253 | struct fuse_arg out_args[2]; | ||
254 | void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error); | ||
255 | }; | ||
297 | 256 | ||
298 | } in; | 257 | struct fuse_args_pages { |
299 | struct { | 258 | struct fuse_args args; |
300 | unsigned argvar:1; | 259 | struct page **pages; |
301 | unsigned numargs; | 260 | struct fuse_page_desc *descs; |
302 | struct fuse_arg args[2]; | 261 | unsigned int num_pages; |
303 | } out; | ||
304 | }; | 262 | }; |
305 | 263 | ||
306 | #define FUSE_ARGS(args) struct fuse_args args = {} | 264 | #define FUSE_ARGS(args) struct fuse_args args = {} |
@@ -373,83 +331,70 @@ struct fuse_req { | |||
373 | /** Entry on the interrupts list */ | 331 | /** Entry on the interrupts list */ |
374 | struct list_head intr_entry; | 332 | struct list_head intr_entry; |
375 | 333 | ||
334 | /* Input/output arguments */ | ||
335 | struct fuse_args *args; | ||
336 | |||
376 | /** refcount */ | 337 | /** refcount */ |
377 | refcount_t count; | 338 | refcount_t count; |
378 | 339 | ||
379 | /* Request flags, updated with test/set/clear_bit() */ | 340 | /* Request flags, updated with test/set/clear_bit() */ |
380 | unsigned long flags; | 341 | unsigned long flags; |
381 | 342 | ||
382 | /** The request input */ | 343 | /* The request input header */ |
383 | struct fuse_in in; | 344 | struct { |
345 | struct fuse_in_header h; | ||
346 | } in; | ||
384 | 347 | ||
385 | /** The request output */ | 348 | /* The request output header */ |
386 | struct fuse_out out; | 349 | struct { |
350 | struct fuse_out_header h; | ||
351 | } out; | ||
387 | 352 | ||
388 | /** Used to wake up the task waiting for completion of request*/ | 353 | /** Used to wake up the task waiting for completion of request*/ |
389 | wait_queue_head_t waitq; | 354 | wait_queue_head_t waitq; |
390 | 355 | ||
391 | /** Data for asynchronous requests */ | 356 | }; |
392 | union { | ||
393 | struct { | ||
394 | struct fuse_release_in in; | ||
395 | struct inode *inode; | ||
396 | } release; | ||
397 | struct fuse_init_in init_in; | ||
398 | struct fuse_init_out init_out; | ||
399 | struct cuse_init_in cuse_init_in; | ||
400 | struct { | ||
401 | struct fuse_read_in in; | ||
402 | u64 attr_ver; | ||
403 | } read; | ||
404 | struct { | ||
405 | struct fuse_write_in in; | ||
406 | struct fuse_write_out out; | ||
407 | struct fuse_req *next; | ||
408 | } write; | ||
409 | struct fuse_notify_retrieve_in retrieve_in; | ||
410 | } misc; | ||
411 | |||
412 | /** page vector */ | ||
413 | struct page **pages; | ||
414 | |||
415 | /** page-descriptor vector */ | ||
416 | struct fuse_page_desc *page_descs; | ||
417 | |||
418 | /** size of the 'pages' array */ | ||
419 | unsigned max_pages; | ||
420 | |||
421 | /** inline page vector */ | ||
422 | struct page *inline_pages[FUSE_REQ_INLINE_PAGES]; | ||
423 | |||
424 | /** inline page-descriptor vector */ | ||
425 | struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES]; | ||
426 | |||
427 | /** number of pages in vector */ | ||
428 | unsigned num_pages; | ||
429 | |||
430 | /** File used in the request (or NULL) */ | ||
431 | struct fuse_file *ff; | ||
432 | |||
433 | /** Inode used in the request or NULL */ | ||
434 | struct inode *inode; | ||
435 | 357 | ||
436 | /** AIO control block */ | 358 | struct fuse_iqueue; |
437 | struct fuse_io_priv *io; | ||
438 | 359 | ||
439 | /** Link on fi->writepages */ | 360 | /** |
440 | struct list_head writepages_entry; | 361 | * Input queue callbacks |
362 | * | ||
363 | * Input queue signalling is device-specific. For example, the /dev/fuse file | ||
364 | * uses fiq->waitq and fasync to wake processes that are waiting on queue | ||
365 | * readiness. These callbacks allow other device types to respond to input | ||
366 | * queue activity. | ||
367 | */ | ||
368 | struct fuse_iqueue_ops { | ||
369 | /** | ||
370 | * Signal that a forget has been queued | ||
371 | */ | ||
372 | void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) | ||
373 | __releases(fiq->lock); | ||
441 | 374 | ||
442 | /** Request completion callback */ | 375 | /** |
443 | void (*end)(struct fuse_conn *, struct fuse_req *); | 376 | * Signal that an INTERRUPT request has been queued |
377 | */ | ||
378 | void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) | ||
379 | __releases(fiq->lock); | ||
444 | 380 | ||
445 | /** Request is stolen from fuse_file->reserved_req */ | 381 | /** |
446 | struct file *stolen_file; | 382 | * Signal that a request has been queued |
383 | */ | ||
384 | void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) | ||
385 | __releases(fiq->lock); | ||
447 | }; | 386 | }; |
448 | 387 | ||
388 | /** /dev/fuse input queue operations */ | ||
389 | extern const struct fuse_iqueue_ops fuse_dev_fiq_ops; | ||
390 | |||
449 | struct fuse_iqueue { | 391 | struct fuse_iqueue { |
450 | /** Connection established */ | 392 | /** Connection established */ |
451 | unsigned connected; | 393 | unsigned connected; |
452 | 394 | ||
395 | /** Lock protecting accesses to members of this structure */ | ||
396 | spinlock_t lock; | ||
397 | |||
453 | /** Readers of the connection are waiting on this */ | 398 | /** Readers of the connection are waiting on this */ |
454 | wait_queue_head_t waitq; | 399 | wait_queue_head_t waitq; |
455 | 400 | ||
@@ -471,6 +416,12 @@ struct fuse_iqueue { | |||
471 | 416 | ||
472 | /** O_ASYNC requests */ | 417 | /** O_ASYNC requests */ |
473 | struct fasync_struct *fasync; | 418 | struct fasync_struct *fasync; |
419 | |||
420 | /** Device-specific callbacks */ | ||
421 | const struct fuse_iqueue_ops *ops; | ||
422 | |||
423 | /** Device-specific state */ | ||
424 | void *priv; | ||
474 | }; | 425 | }; |
475 | 426 | ||
476 | #define FUSE_PQ_HASH_BITS 8 | 427 | #define FUSE_PQ_HASH_BITS 8 |
@@ -504,6 +455,29 @@ struct fuse_dev { | |||
504 | struct list_head entry; | 455 | struct list_head entry; |
505 | }; | 456 | }; |
506 | 457 | ||
458 | struct fuse_fs_context { | ||
459 | int fd; | ||
460 | unsigned int rootmode; | ||
461 | kuid_t user_id; | ||
462 | kgid_t group_id; | ||
463 | bool is_bdev:1; | ||
464 | bool fd_present:1; | ||
465 | bool rootmode_present:1; | ||
466 | bool user_id_present:1; | ||
467 | bool group_id_present:1; | ||
468 | bool default_permissions:1; | ||
469 | bool allow_other:1; | ||
470 | bool destroy:1; | ||
471 | bool no_control:1; | ||
472 | bool no_force_umount:1; | ||
473 | unsigned int max_read; | ||
474 | unsigned int blksize; | ||
475 | const char *subtype; | ||
476 | |||
477 | /* fuse_dev pointer to fill in, should contain NULL on entry */ | ||
478 | void **fudptr; | ||
479 | }; | ||
480 | |||
507 | /** | 481 | /** |
508 | * A Fuse connection. | 482 | * A Fuse connection. |
509 | * | 483 | * |
@@ -584,9 +558,6 @@ struct fuse_conn { | |||
584 | /** waitq for blocked connection */ | 558 | /** waitq for blocked connection */ |
585 | wait_queue_head_t blocked_waitq; | 559 | wait_queue_head_t blocked_waitq; |
586 | 560 | ||
587 | /** waitq for reserved requests */ | ||
588 | wait_queue_head_t reserved_req_waitq; | ||
589 | |||
590 | /** Connection established, cleared on umount, connection | 561 | /** Connection established, cleared on umount, connection |
591 | abort and device release */ | 562 | abort and device release */ |
592 | unsigned connected; | 563 | unsigned connected; |
@@ -721,6 +692,18 @@ struct fuse_conn { | |||
721 | /** Does the filesystem support copy_file_range? */ | 692 | /** Does the filesystem support copy_file_range? */ |
722 | unsigned no_copy_file_range:1; | 693 | unsigned no_copy_file_range:1; |
723 | 694 | ||
695 | /* Send DESTROY request */ | ||
696 | unsigned int destroy:1; | ||
697 | |||
698 | /* Delete dentries that have gone stale */ | ||
699 | unsigned int delete_stale:1; | ||
700 | |||
701 | /** Do not create entry in fusectl fs */ | ||
702 | unsigned int no_control:1; | ||
703 | |||
704 | /** Do not allow MNT_FORCE umount */ | ||
705 | unsigned int no_force_umount:1; | ||
706 | |||
724 | /** The number of requests waiting for completion */ | 707 | /** The number of requests waiting for completion */ |
725 | atomic_t num_waiting; | 708 | atomic_t num_waiting; |
726 | 709 | ||
@@ -742,9 +725,6 @@ struct fuse_conn { | |||
742 | /** Key for lock owner ID scrambling */ | 725 | /** Key for lock owner ID scrambling */ |
743 | u32 scramble_key[4]; | 726 | u32 scramble_key[4]; |
744 | 727 | ||
745 | /** Reserved request for the DESTROY message */ | ||
746 | struct fuse_req *destroy_req; | ||
747 | |||
748 | /** Version counter for attribute changes */ | 728 | /** Version counter for attribute changes */ |
749 | atomic64_t attr_version; | 729 | atomic64_t attr_version; |
750 | 730 | ||
@@ -820,14 +800,32 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, | |||
820 | 800 | ||
821 | struct fuse_forget_link *fuse_alloc_forget(void); | 801 | struct fuse_forget_link *fuse_alloc_forget(void); |
822 | 802 | ||
823 | /* Used by READDIRPLUS */ | 803 | struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, |
824 | void fuse_force_forget(struct file *file, u64 nodeid); | 804 | unsigned int max, |
805 | unsigned int *countp); | ||
825 | 806 | ||
826 | /** | 807 | /* |
827 | * Initialize READ or READDIR request | 808 | * Initialize READ or READDIR request |
828 | */ | 809 | */ |
829 | void fuse_read_fill(struct fuse_req *req, struct file *file, | 810 | struct fuse_io_args { |
830 | loff_t pos, size_t count, int opcode); | 811 | union { |
812 | struct { | ||
813 | struct fuse_read_in in; | ||
814 | u64 attr_ver; | ||
815 | } read; | ||
816 | struct { | ||
817 | struct fuse_write_in in; | ||
818 | struct fuse_write_out out; | ||
819 | } write; | ||
820 | }; | ||
821 | struct fuse_args_pages ap; | ||
822 | struct fuse_io_priv *io; | ||
823 | struct fuse_file *ff; | ||
824 | }; | ||
825 | |||
826 | void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, | ||
827 | size_t count, int opcode); | ||
828 | |||
831 | 829 | ||
832 | /** | 830 | /** |
833 | * Send OPEN or OPENDIR request | 831 | * Send OPEN or OPENDIR request |
@@ -900,61 +898,16 @@ int fuse_ctl_init(void); | |||
900 | void __exit fuse_ctl_cleanup(void); | 898 | void __exit fuse_ctl_cleanup(void); |
901 | 899 | ||
902 | /** | 900 | /** |
903 | * Allocate a request | ||
904 | */ | ||
905 | struct fuse_req *fuse_request_alloc(unsigned npages); | ||
906 | |||
907 | struct fuse_req *fuse_request_alloc_nofs(unsigned npages); | ||
908 | |||
909 | bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req, | ||
910 | gfp_t flags); | ||
911 | |||
912 | |||
913 | /** | ||
914 | * Free a request | ||
915 | */ | ||
916 | void fuse_request_free(struct fuse_req *req); | ||
917 | |||
918 | /** | ||
919 | * Get a request, may fail with -ENOMEM, | ||
920 | * caller should specify # elements in req->pages[] explicitly | ||
921 | */ | ||
922 | struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); | ||
923 | struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, | ||
924 | unsigned npages); | ||
925 | |||
926 | /* | ||
927 | * Increment reference count on request | ||
928 | */ | ||
929 | void __fuse_get_request(struct fuse_req *req); | ||
930 | |||
931 | /** | ||
932 | * Gets a requests for a file operation, always succeeds | ||
933 | */ | ||
934 | struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, | ||
935 | struct file *file); | ||
936 | |||
937 | /** | ||
938 | * Decrement reference count of a request. If count goes to zero free | ||
939 | * the request. | ||
940 | */ | ||
941 | void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); | ||
942 | |||
943 | /** | ||
944 | * Send a request (synchronous) | ||
945 | */ | ||
946 | void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); | ||
947 | |||
948 | /** | ||
949 | * Simple request sending that does request allocation and freeing | 901 | * Simple request sending that does request allocation and freeing |
950 | */ | 902 | */ |
951 | ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); | 903 | ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); |
904 | int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, | ||
905 | gfp_t gfp_flags); | ||
952 | 906 | ||
953 | /** | 907 | /** |
954 | * Send a request in the background | 908 | * End a finished request |
955 | */ | 909 | */ |
956 | void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); | 910 | void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req); |
957 | bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req); | ||
958 | 911 | ||
959 | /* Abort all requests */ | 912 | /* Abort all requests */ |
960 | void fuse_abort_conn(struct fuse_conn *fc); | 913 | void fuse_abort_conn(struct fuse_conn *fc); |
@@ -980,15 +933,33 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); | |||
980 | /** | 933 | /** |
981 | * Initialize fuse_conn | 934 | * Initialize fuse_conn |
982 | */ | 935 | */ |
983 | void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns); | 936 | void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, |
937 | const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); | ||
984 | 938 | ||
985 | /** | 939 | /** |
986 | * Release reference to fuse_conn | 940 | * Release reference to fuse_conn |
987 | */ | 941 | */ |
988 | void fuse_conn_put(struct fuse_conn *fc); | 942 | void fuse_conn_put(struct fuse_conn *fc); |
989 | 943 | ||
990 | struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc); | 944 | struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); |
945 | struct fuse_dev *fuse_dev_alloc(void); | ||
946 | void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); | ||
991 | void fuse_dev_free(struct fuse_dev *fud); | 947 | void fuse_dev_free(struct fuse_dev *fud); |
948 | void fuse_send_init(struct fuse_conn *fc); | ||
949 | |||
950 | /** | ||
951 | * Fill in superblock and initialize fuse connection | ||
952 | * @sb: partially-initialized superblock to fill in | ||
953 | * @ctx: mount context | ||
954 | */ | ||
955 | int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); | ||
956 | |||
957 | /** | ||
958 | * Disassociate fuse connection from superblock and kill the superblock | ||
959 | * | ||
960 | * Calls kill_anon_super(), do not use with bdev mounts. | ||
961 | */ | ||
962 | void fuse_kill_sb_anon(struct super_block *sb); | ||
992 | 963 | ||
993 | /** | 964 | /** |
994 | * Add connection to control filesystem | 965 | * Add connection to control filesystem |
@@ -1093,4 +1064,15 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type); | |||
1093 | /* readdir.c */ | 1064 | /* readdir.c */ |
1094 | int fuse_readdir(struct file *file, struct dir_context *ctx); | 1065 | int fuse_readdir(struct file *file, struct dir_context *ctx); |
1095 | 1066 | ||
1067 | /** | ||
1068 | * Return the number of bytes in an arguments list | ||
1069 | */ | ||
1070 | unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); | ||
1071 | |||
1072 | /** | ||
1073 | * Get the next unique ID for a request | ||
1074 | */ | ||
1075 | u64 fuse_get_unique(struct fuse_iqueue *fiq); | ||
1076 | void fuse_free_conn(struct fuse_conn *fc); | ||
1077 | |||
1096 | #endif /* _FS_FUSE_I_H */ | 1078 | #endif /* _FS_FUSE_I_H */ |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4bb885b0f032..51cb471f4dc3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -15,7 +15,8 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/moduleparam.h> | 17 | #include <linux/moduleparam.h> |
18 | #include <linux/parser.h> | 18 | #include <linux/fs_context.h> |
19 | #include <linux/fs_parser.h> | ||
19 | #include <linux/statfs.h> | 20 | #include <linux/statfs.h> |
20 | #include <linux/random.h> | 21 | #include <linux/random.h> |
21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
@@ -59,24 +60,13 @@ MODULE_PARM_DESC(max_user_congthresh, | |||
59 | /** Congestion starts at 75% of maximum */ | 60 | /** Congestion starts at 75% of maximum */ |
60 | #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) | 61 | #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) |
61 | 62 | ||
62 | struct fuse_mount_data { | 63 | #ifdef CONFIG_BLOCK |
63 | int fd; | 64 | static struct file_system_type fuseblk_fs_type; |
64 | unsigned rootmode; | 65 | #endif |
65 | kuid_t user_id; | ||
66 | kgid_t group_id; | ||
67 | unsigned fd_present:1; | ||
68 | unsigned rootmode_present:1; | ||
69 | unsigned user_id_present:1; | ||
70 | unsigned group_id_present:1; | ||
71 | unsigned default_permissions:1; | ||
72 | unsigned allow_other:1; | ||
73 | unsigned max_read; | ||
74 | unsigned blksize; | ||
75 | }; | ||
76 | 66 | ||
77 | struct fuse_forget_link *fuse_alloc_forget(void) | 67 | struct fuse_forget_link *fuse_alloc_forget(void) |
78 | { | 68 | { |
79 | return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); | 69 | return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); |
80 | } | 70 | } |
81 | 71 | ||
82 | static struct inode *fuse_alloc_inode(struct super_block *sb) | 72 | static struct inode *fuse_alloc_inode(struct super_block *sb) |
@@ -374,19 +364,21 @@ void fuse_unlock_inode(struct inode *inode, bool locked) | |||
374 | 364 | ||
375 | static void fuse_umount_begin(struct super_block *sb) | 365 | static void fuse_umount_begin(struct super_block *sb) |
376 | { | 366 | { |
377 | fuse_abort_conn(get_fuse_conn_super(sb)); | 367 | struct fuse_conn *fc = get_fuse_conn_super(sb); |
368 | |||
369 | if (!fc->no_force_umount) | ||
370 | fuse_abort_conn(fc); | ||
378 | } | 371 | } |
379 | 372 | ||
380 | static void fuse_send_destroy(struct fuse_conn *fc) | 373 | static void fuse_send_destroy(struct fuse_conn *fc) |
381 | { | 374 | { |
382 | struct fuse_req *req = fc->destroy_req; | 375 | if (fc->conn_init) { |
383 | if (req && fc->conn_init) { | 376 | FUSE_ARGS(args); |
384 | fc->destroy_req = NULL; | 377 | |
385 | req->in.h.opcode = FUSE_DESTROY; | 378 | args.opcode = FUSE_DESTROY; |
386 | __set_bit(FR_FORCE, &req->flags); | 379 | args.force = true; |
387 | __clear_bit(FR_BACKGROUND, &req->flags); | 380 | args.nocreds = true; |
388 | fuse_request_send(fc, req); | 381 | fuse_simple_request(fc, &args); |
389 | fuse_put_request(fc, req); | ||
390 | } | 382 | } |
391 | } | 383 | } |
392 | 384 | ||
@@ -430,12 +422,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
430 | } | 422 | } |
431 | 423 | ||
432 | memset(&outarg, 0, sizeof(outarg)); | 424 | memset(&outarg, 0, sizeof(outarg)); |
433 | args.in.numargs = 0; | 425 | args.in_numargs = 0; |
434 | args.in.h.opcode = FUSE_STATFS; | 426 | args.opcode = FUSE_STATFS; |
435 | args.in.h.nodeid = get_node_id(d_inode(dentry)); | 427 | args.nodeid = get_node_id(d_inode(dentry)); |
436 | args.out.numargs = 1; | 428 | args.out_numargs = 1; |
437 | args.out.args[0].size = sizeof(outarg); | 429 | args.out_args[0].size = sizeof(outarg); |
438 | args.out.args[0].value = &outarg; | 430 | args.out_args[0].value = &outarg; |
439 | err = fuse_simple_request(fc, &args); | 431 | err = fuse_simple_request(fc, &args); |
440 | if (!err) | 432 | if (!err) |
441 | convert_fuse_statfs(buf, &outarg.st); | 433 | convert_fuse_statfs(buf, &outarg.st); |
@@ -443,6 +435,8 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
443 | } | 435 | } |
444 | 436 | ||
445 | enum { | 437 | enum { |
438 | OPT_SOURCE, | ||
439 | OPT_SUBTYPE, | ||
446 | OPT_FD, | 440 | OPT_FD, |
447 | OPT_ROOTMODE, | 441 | OPT_ROOTMODE, |
448 | OPT_USER_ID, | 442 | OPT_USER_ID, |
@@ -454,111 +448,109 @@ enum { | |||
454 | OPT_ERR | 448 | OPT_ERR |
455 | }; | 449 | }; |
456 | 450 | ||
457 | static const match_table_t tokens = { | 451 | static const struct fs_parameter_spec fuse_param_specs[] = { |
458 | {OPT_FD, "fd=%u"}, | 452 | fsparam_string ("source", OPT_SOURCE), |
459 | {OPT_ROOTMODE, "rootmode=%o"}, | 453 | fsparam_u32 ("fd", OPT_FD), |
460 | {OPT_USER_ID, "user_id=%u"}, | 454 | fsparam_u32oct ("rootmode", OPT_ROOTMODE), |
461 | {OPT_GROUP_ID, "group_id=%u"}, | 455 | fsparam_u32 ("user_id", OPT_USER_ID), |
462 | {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, | 456 | fsparam_u32 ("group_id", OPT_GROUP_ID), |
463 | {OPT_ALLOW_OTHER, "allow_other"}, | 457 | fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), |
464 | {OPT_MAX_READ, "max_read=%u"}, | 458 | fsparam_flag ("allow_other", OPT_ALLOW_OTHER), |
465 | {OPT_BLKSIZE, "blksize=%u"}, | 459 | fsparam_u32 ("max_read", OPT_MAX_READ), |
466 | {OPT_ERR, NULL} | 460 | fsparam_u32 ("blksize", OPT_BLKSIZE), |
461 | fsparam_string ("subtype", OPT_SUBTYPE), | ||
462 | {} | ||
463 | }; | ||
464 | |||
465 | static const struct fs_parameter_description fuse_fs_parameters = { | ||
466 | .name = "fuse", | ||
467 | .specs = fuse_param_specs, | ||
467 | }; | 468 | }; |
468 | 469 | ||
469 | static int fuse_match_uint(substring_t *s, unsigned int *res) | 470 | static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) |
470 | { | 471 | { |
471 | int err = -ENOMEM; | 472 | struct fs_parse_result result; |
472 | char *buf = match_strdup(s); | 473 | struct fuse_fs_context *ctx = fc->fs_private; |
473 | if (buf) { | 474 | int opt; |
474 | err = kstrtouint(buf, 10, res); | 475 | |
475 | kfree(buf); | 476 | opt = fs_parse(fc, &fuse_fs_parameters, param, &result); |
477 | if (opt < 0) | ||
478 | return opt; | ||
479 | |||
480 | switch (opt) { | ||
481 | case OPT_SOURCE: | ||
482 | if (fc->source) | ||
483 | return invalf(fc, "fuse: Multiple sources specified"); | ||
484 | fc->source = param->string; | ||
485 | param->string = NULL; | ||
486 | break; | ||
487 | |||
488 | case OPT_SUBTYPE: | ||
489 | if (ctx->subtype) | ||
490 | return invalf(fc, "fuse: Multiple subtypes specified"); | ||
491 | ctx->subtype = param->string; | ||
492 | param->string = NULL; | ||
493 | return 0; | ||
494 | |||
495 | case OPT_FD: | ||
496 | ctx->fd = result.uint_32; | ||
497 | ctx->fd_present = 1; | ||
498 | break; | ||
499 | |||
500 | case OPT_ROOTMODE: | ||
501 | if (!fuse_valid_type(result.uint_32)) | ||
502 | return invalf(fc, "fuse: Invalid rootmode"); | ||
503 | ctx->rootmode = result.uint_32; | ||
504 | ctx->rootmode_present = 1; | ||
505 | break; | ||
506 | |||
507 | case OPT_USER_ID: | ||
508 | ctx->user_id = make_kuid(fc->user_ns, result.uint_32); | ||
509 | if (!uid_valid(ctx->user_id)) | ||
510 | return invalf(fc, "fuse: Invalid user_id"); | ||
511 | ctx->user_id_present = 1; | ||
512 | break; | ||
513 | |||
514 | case OPT_GROUP_ID: | ||
515 | ctx->group_id = make_kgid(fc->user_ns, result.uint_32); | ||
516 | if (!gid_valid(ctx->group_id)) | ||
517 | return invalf(fc, "fuse: Invalid group_id"); | ||
518 | ctx->group_id_present = 1; | ||
519 | break; | ||
520 | |||
521 | case OPT_DEFAULT_PERMISSIONS: | ||
522 | ctx->default_permissions = 1; | ||
523 | break; | ||
524 | |||
525 | case OPT_ALLOW_OTHER: | ||
526 | ctx->allow_other = 1; | ||
527 | break; | ||
528 | |||
529 | case OPT_MAX_READ: | ||
530 | ctx->max_read = result.uint_32; | ||
531 | break; | ||
532 | |||
533 | case OPT_BLKSIZE: | ||
534 | if (!ctx->is_bdev) | ||
535 | return invalf(fc, "fuse: blksize only supported for fuseblk"); | ||
536 | ctx->blksize = result.uint_32; | ||
537 | break; | ||
538 | |||
539 | default: | ||
540 | return -EINVAL; | ||
476 | } | 541 | } |
477 | return err; | 542 | |
543 | return 0; | ||
478 | } | 544 | } |
479 | 545 | ||
480 | static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev, | 546 | static void fuse_free_fc(struct fs_context *fc) |
481 | struct user_namespace *user_ns) | ||
482 | { | 547 | { |
483 | char *p; | 548 | struct fuse_fs_context *ctx = fc->fs_private; |
484 | memset(d, 0, sizeof(struct fuse_mount_data)); | ||
485 | d->max_read = ~0; | ||
486 | d->blksize = FUSE_DEFAULT_BLKSIZE; | ||
487 | |||
488 | while ((p = strsep(&opt, ",")) != NULL) { | ||
489 | int token; | ||
490 | int value; | ||
491 | unsigned uv; | ||
492 | substring_t args[MAX_OPT_ARGS]; | ||
493 | if (!*p) | ||
494 | continue; | ||
495 | |||
496 | token = match_token(p, tokens, args); | ||
497 | switch (token) { | ||
498 | case OPT_FD: | ||
499 | if (match_int(&args[0], &value)) | ||
500 | return 0; | ||
501 | d->fd = value; | ||
502 | d->fd_present = 1; | ||
503 | break; | ||
504 | |||
505 | case OPT_ROOTMODE: | ||
506 | if (match_octal(&args[0], &value)) | ||
507 | return 0; | ||
508 | if (!fuse_valid_type(value)) | ||
509 | return 0; | ||
510 | d->rootmode = value; | ||
511 | d->rootmode_present = 1; | ||
512 | break; | ||
513 | |||
514 | case OPT_USER_ID: | ||
515 | if (fuse_match_uint(&args[0], &uv)) | ||
516 | return 0; | ||
517 | d->user_id = make_kuid(user_ns, uv); | ||
518 | if (!uid_valid(d->user_id)) | ||
519 | return 0; | ||
520 | d->user_id_present = 1; | ||
521 | break; | ||
522 | |||
523 | case OPT_GROUP_ID: | ||
524 | if (fuse_match_uint(&args[0], &uv)) | ||
525 | return 0; | ||
526 | d->group_id = make_kgid(user_ns, uv); | ||
527 | if (!gid_valid(d->group_id)) | ||
528 | return 0; | ||
529 | d->group_id_present = 1; | ||
530 | break; | ||
531 | |||
532 | case OPT_DEFAULT_PERMISSIONS: | ||
533 | d->default_permissions = 1; | ||
534 | break; | ||
535 | |||
536 | case OPT_ALLOW_OTHER: | ||
537 | d->allow_other = 1; | ||
538 | break; | ||
539 | |||
540 | case OPT_MAX_READ: | ||
541 | if (match_int(&args[0], &value)) | ||
542 | return 0; | ||
543 | d->max_read = value; | ||
544 | break; | ||
545 | |||
546 | case OPT_BLKSIZE: | ||
547 | if (!is_bdev || match_int(&args[0], &value)) | ||
548 | return 0; | ||
549 | d->blksize = value; | ||
550 | break; | ||
551 | |||
552 | default: | ||
553 | return 0; | ||
554 | } | ||
555 | } | ||
556 | 549 | ||
557 | if (!d->fd_present || !d->rootmode_present || | 550 | if (ctx) { |
558 | !d->user_id_present || !d->group_id_present) | 551 | kfree(ctx->subtype); |
559 | return 0; | 552 | kfree(ctx); |
560 | 553 | } | |
561 | return 1; | ||
562 | } | 554 | } |
563 | 555 | ||
564 | static int fuse_show_options(struct seq_file *m, struct dentry *root) | 556 | static int fuse_show_options(struct seq_file *m, struct dentry *root) |
@@ -579,14 +571,19 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) | |||
579 | return 0; | 571 | return 0; |
580 | } | 572 | } |
581 | 573 | ||
582 | static void fuse_iqueue_init(struct fuse_iqueue *fiq) | 574 | static void fuse_iqueue_init(struct fuse_iqueue *fiq, |
575 | const struct fuse_iqueue_ops *ops, | ||
576 | void *priv) | ||
583 | { | 577 | { |
584 | memset(fiq, 0, sizeof(struct fuse_iqueue)); | 578 | memset(fiq, 0, sizeof(struct fuse_iqueue)); |
579 | spin_lock_init(&fiq->lock); | ||
585 | init_waitqueue_head(&fiq->waitq); | 580 | init_waitqueue_head(&fiq->waitq); |
586 | INIT_LIST_HEAD(&fiq->pending); | 581 | INIT_LIST_HEAD(&fiq->pending); |
587 | INIT_LIST_HEAD(&fiq->interrupts); | 582 | INIT_LIST_HEAD(&fiq->interrupts); |
588 | fiq->forget_list_tail = &fiq->forget_list_head; | 583 | fiq->forget_list_tail = &fiq->forget_list_head; |
589 | fiq->connected = 1; | 584 | fiq->connected = 1; |
585 | fiq->ops = ops; | ||
586 | fiq->priv = priv; | ||
590 | } | 587 | } |
591 | 588 | ||
592 | static void fuse_pqueue_init(struct fuse_pqueue *fpq) | 589 | static void fuse_pqueue_init(struct fuse_pqueue *fpq) |
@@ -600,7 +597,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq) | |||
600 | fpq->connected = 1; | 597 | fpq->connected = 1; |
601 | } | 598 | } |
602 | 599 | ||
603 | void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) | 600 | void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, |
601 | const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) | ||
604 | { | 602 | { |
605 | memset(fc, 0, sizeof(*fc)); | 603 | memset(fc, 0, sizeof(*fc)); |
606 | spin_lock_init(&fc->lock); | 604 | spin_lock_init(&fc->lock); |
@@ -609,8 +607,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) | |||
609 | refcount_set(&fc->count, 1); | 607 | refcount_set(&fc->count, 1); |
610 | atomic_set(&fc->dev_count, 1); | 608 | atomic_set(&fc->dev_count, 1); |
611 | init_waitqueue_head(&fc->blocked_waitq); | 609 | init_waitqueue_head(&fc->blocked_waitq); |
612 | init_waitqueue_head(&fc->reserved_req_waitq); | 610 | fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); |
613 | fuse_iqueue_init(&fc->iq); | ||
614 | INIT_LIST_HEAD(&fc->bg_queue); | 611 | INIT_LIST_HEAD(&fc->bg_queue); |
615 | INIT_LIST_HEAD(&fc->entry); | 612 | INIT_LIST_HEAD(&fc->entry); |
616 | INIT_LIST_HEAD(&fc->devices); | 613 | INIT_LIST_HEAD(&fc->devices); |
@@ -633,8 +630,6 @@ EXPORT_SYMBOL_GPL(fuse_conn_init); | |||
633 | void fuse_conn_put(struct fuse_conn *fc) | 630 | void fuse_conn_put(struct fuse_conn *fc) |
634 | { | 631 | { |
635 | if (refcount_dec_and_test(&fc->count)) { | 632 | if (refcount_dec_and_test(&fc->count)) { |
636 | if (fc->destroy_req) | ||
637 | fuse_request_free(fc->destroy_req); | ||
638 | put_pid_ns(fc->pid_ns); | 633 | put_pid_ns(fc->pid_ns); |
639 | put_user_ns(fc->user_ns); | 634 | put_user_ns(fc->user_ns); |
640 | fc->release(fc); | 635 | fc->release(fc); |
@@ -822,9 +817,12 @@ static const struct super_operations fuse_super_operations = { | |||
822 | 817 | ||
823 | static void sanitize_global_limit(unsigned *limit) | 818 | static void sanitize_global_limit(unsigned *limit) |
824 | { | 819 | { |
820 | /* | ||
821 | * The default maximum number of async requests is calculated to consume | ||
822 | * 1/2^13 of the total memory, assuming 392 bytes per request. | ||
823 | */ | ||
825 | if (*limit == 0) | 824 | if (*limit == 0) |
826 | *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / | 825 | *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; |
827 | sizeof(struct fuse_req); | ||
828 | 826 | ||
829 | if (*limit >= 1 << 16) | 827 | if (*limit >= 1 << 16) |
830 | *limit = (1 << 16) - 1; | 828 | *limit = (1 << 16) - 1; |
@@ -870,11 +868,19 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) | |||
870 | spin_unlock(&fc->bg_lock); | 868 | spin_unlock(&fc->bg_lock); |
871 | } | 869 | } |
872 | 870 | ||
873 | static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | 871 | struct fuse_init_args { |
872 | struct fuse_args args; | ||
873 | struct fuse_init_in in; | ||
874 | struct fuse_init_out out; | ||
875 | }; | ||
876 | |||
877 | static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, | ||
878 | int error) | ||
874 | { | 879 | { |
875 | struct fuse_init_out *arg = &req->misc.init_out; | 880 | struct fuse_init_args *ia = container_of(args, typeof(*ia), args); |
881 | struct fuse_init_out *arg = &ia->out; | ||
876 | 882 | ||
877 | if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) | 883 | if (error || arg->major != FUSE_KERNEL_VERSION) |
878 | fc->conn_error = 1; | 884 | fc->conn_error = 1; |
879 | else { | 885 | else { |
880 | unsigned long ra_pages; | 886 | unsigned long ra_pages; |
@@ -951,18 +957,23 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
951 | fc->max_write = max_t(unsigned, 4096, fc->max_write); | 957 | fc->max_write = max_t(unsigned, 4096, fc->max_write); |
952 | fc->conn_init = 1; | 958 | fc->conn_init = 1; |
953 | } | 959 | } |
960 | kfree(ia); | ||
961 | |||
954 | fuse_set_initialized(fc); | 962 | fuse_set_initialized(fc); |
955 | wake_up_all(&fc->blocked_waitq); | 963 | wake_up_all(&fc->blocked_waitq); |
956 | } | 964 | } |
957 | 965 | ||
958 | static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) | 966 | void fuse_send_init(struct fuse_conn *fc) |
959 | { | 967 | { |
960 | struct fuse_init_in *arg = &req->misc.init_in; | 968 | struct fuse_init_args *ia; |
969 | |||
970 | ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); | ||
961 | 971 | ||
962 | arg->major = FUSE_KERNEL_VERSION; | 972 | ia->in.major = FUSE_KERNEL_VERSION; |
963 | arg->minor = FUSE_KERNEL_MINOR_VERSION; | 973 | ia->in.minor = FUSE_KERNEL_MINOR_VERSION; |
964 | arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; | 974 | ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; |
965 | arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | | 975 | ia->in.flags |= |
976 | FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | | ||
966 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | | 977 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | |
967 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | | 978 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | |
968 | FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | | 979 | FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | |
@@ -971,26 +982,32 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) | |||
971 | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | | 982 | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | |
972 | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | | 983 | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | |
973 | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; | 984 | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; |
974 | req->in.h.opcode = FUSE_INIT; | 985 | ia->args.opcode = FUSE_INIT; |
975 | req->in.numargs = 1; | 986 | ia->args.in_numargs = 1; |
976 | req->in.args[0].size = sizeof(*arg); | 987 | ia->args.in_args[0].size = sizeof(ia->in); |
977 | req->in.args[0].value = arg; | 988 | ia->args.in_args[0].value = &ia->in; |
978 | req->out.numargs = 1; | 989 | ia->args.out_numargs = 1; |
979 | /* Variable length argument used for backward compatibility | 990 | /* Variable length argument used for backward compatibility |
980 | with interface version < 7.5. Rest of init_out is zeroed | 991 | with interface version < 7.5. Rest of init_out is zeroed |
981 | by do_get_request(), so a short reply is not a problem */ | 992 | by do_get_request(), so a short reply is not a problem */ |
982 | req->out.argvar = 1; | 993 | ia->args.out_argvar = 1; |
983 | req->out.args[0].size = sizeof(struct fuse_init_out); | 994 | ia->args.out_args[0].size = sizeof(ia->out); |
984 | req->out.args[0].value = &req->misc.init_out; | 995 | ia->args.out_args[0].value = &ia->out; |
985 | req->end = process_init_reply; | 996 | ia->args.force = true; |
986 | fuse_request_send_background(fc, req); | 997 | ia->args.nocreds = true; |
998 | ia->args.end = process_init_reply; | ||
999 | |||
1000 | if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0) | ||
1001 | process_init_reply(fc, &ia->args, -ENOTCONN); | ||
987 | } | 1002 | } |
1003 | EXPORT_SYMBOL_GPL(fuse_send_init); | ||
988 | 1004 | ||
989 | static void fuse_free_conn(struct fuse_conn *fc) | 1005 | void fuse_free_conn(struct fuse_conn *fc) |
990 | { | 1006 | { |
991 | WARN_ON(!list_empty(&fc->devices)); | 1007 | WARN_ON(!list_empty(&fc->devices)); |
992 | kfree_rcu(fc, rcu); | 1008 | kfree_rcu(fc, rcu); |
993 | } | 1009 | } |
1010 | EXPORT_SYMBOL_GPL(fuse_free_conn); | ||
994 | 1011 | ||
995 | static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) | 1012 | static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) |
996 | { | 1013 | { |
@@ -1032,7 +1049,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) | |||
1032 | return 0; | 1049 | return 0; |
1033 | } | 1050 | } |
1034 | 1051 | ||
1035 | struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) | 1052 | struct fuse_dev *fuse_dev_alloc(void) |
1036 | { | 1053 | { |
1037 | struct fuse_dev *fud; | 1054 | struct fuse_dev *fud; |
1038 | struct list_head *pq; | 1055 | struct list_head *pq; |
@@ -1048,16 +1065,33 @@ struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) | |||
1048 | } | 1065 | } |
1049 | 1066 | ||
1050 | fud->pq.processing = pq; | 1067 | fud->pq.processing = pq; |
1051 | fud->fc = fuse_conn_get(fc); | ||
1052 | fuse_pqueue_init(&fud->pq); | 1068 | fuse_pqueue_init(&fud->pq); |
1053 | 1069 | ||
1070 | return fud; | ||
1071 | } | ||
1072 | EXPORT_SYMBOL_GPL(fuse_dev_alloc); | ||
1073 | |||
1074 | void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) | ||
1075 | { | ||
1076 | fud->fc = fuse_conn_get(fc); | ||
1054 | spin_lock(&fc->lock); | 1077 | spin_lock(&fc->lock); |
1055 | list_add_tail(&fud->entry, &fc->devices); | 1078 | list_add_tail(&fud->entry, &fc->devices); |
1056 | spin_unlock(&fc->lock); | 1079 | spin_unlock(&fc->lock); |
1080 | } | ||
1081 | EXPORT_SYMBOL_GPL(fuse_dev_install); | ||
1057 | 1082 | ||
1083 | struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) | ||
1084 | { | ||
1085 | struct fuse_dev *fud; | ||
1086 | |||
1087 | fud = fuse_dev_alloc(); | ||
1088 | if (!fud) | ||
1089 | return NULL; | ||
1090 | |||
1091 | fuse_dev_install(fud, fc); | ||
1058 | return fud; | 1092 | return fud; |
1059 | } | 1093 | } |
1060 | EXPORT_SYMBOL_GPL(fuse_dev_alloc); | 1094 | EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); |
1061 | 1095 | ||
1062 | void fuse_dev_free(struct fuse_dev *fud) | 1096 | void fuse_dev_free(struct fuse_dev *fud) |
1063 | { | 1097 | { |
@@ -1075,17 +1109,13 @@ void fuse_dev_free(struct fuse_dev *fud) | |||
1075 | } | 1109 | } |
1076 | EXPORT_SYMBOL_GPL(fuse_dev_free); | 1110 | EXPORT_SYMBOL_GPL(fuse_dev_free); |
1077 | 1111 | ||
1078 | static int fuse_fill_super(struct super_block *sb, void *data, int silent) | 1112 | int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) |
1079 | { | 1113 | { |
1080 | struct fuse_dev *fud; | 1114 | struct fuse_dev *fud; |
1081 | struct fuse_conn *fc; | 1115 | struct fuse_conn *fc = get_fuse_conn_super(sb); |
1082 | struct inode *root; | 1116 | struct inode *root; |
1083 | struct fuse_mount_data d; | ||
1084 | struct file *file; | ||
1085 | struct dentry *root_dentry; | 1117 | struct dentry *root_dentry; |
1086 | struct fuse_req *init_req; | ||
1087 | int err; | 1118 | int err; |
1088 | int is_bdev = sb->s_bdev != NULL; | ||
1089 | 1119 | ||
1090 | err = -EINVAL; | 1120 | err = -EINVAL; |
1091 | if (sb->s_flags & SB_MANDLOCK) | 1121 | if (sb->s_flags & SB_MANDLOCK) |
@@ -1093,19 +1123,19 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
1093 | 1123 | ||
1094 | sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); | 1124 | sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); |
1095 | 1125 | ||
1096 | if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns)) | 1126 | if (ctx->is_bdev) { |
1097 | goto err; | ||
1098 | |||
1099 | if (is_bdev) { | ||
1100 | #ifdef CONFIG_BLOCK | 1127 | #ifdef CONFIG_BLOCK |
1101 | err = -EINVAL; | 1128 | err = -EINVAL; |
1102 | if (!sb_set_blocksize(sb, d.blksize)) | 1129 | if (!sb_set_blocksize(sb, ctx->blksize)) |
1103 | goto err; | 1130 | goto err; |
1104 | #endif | 1131 | #endif |
1105 | } else { | 1132 | } else { |
1106 | sb->s_blocksize = PAGE_SIZE; | 1133 | sb->s_blocksize = PAGE_SIZE; |
1107 | sb->s_blocksize_bits = PAGE_SHIFT; | 1134 | sb->s_blocksize_bits = PAGE_SHIFT; |
1108 | } | 1135 | } |
1136 | |||
1137 | sb->s_subtype = ctx->subtype; | ||
1138 | ctx->subtype = NULL; | ||
1109 | sb->s_magic = FUSE_SUPER_MAGIC; | 1139 | sb->s_magic = FUSE_SUPER_MAGIC; |
1110 | sb->s_op = &fuse_super_operations; | 1140 | sb->s_op = &fuse_super_operations; |
1111 | sb->s_xattr = fuse_xattr_handlers; | 1141 | sb->s_xattr = fuse_xattr_handlers; |
@@ -1116,19 +1146,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
1116 | if (sb->s_user_ns != &init_user_ns) | 1146 | if (sb->s_user_ns != &init_user_ns) |
1117 | sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; | 1147 | sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; |
1118 | 1148 | ||
1119 | file = fget(d.fd); | ||
1120 | err = -EINVAL; | ||
1121 | if (!file) | ||
1122 | goto err; | ||
1123 | |||
1124 | /* | ||
1125 | * Require mount to happen from the same user namespace which | ||
1126 | * opened /dev/fuse to prevent potential attacks. | ||
1127 | */ | ||
1128 | if (file->f_op != &fuse_dev_operations || | ||
1129 | file->f_cred->user_ns != sb->s_user_ns) | ||
1130 | goto err_fput; | ||
1131 | |||
1132 | /* | 1149 | /* |
1133 | * If we are not in the initial user namespace posix | 1150 | * If we are not in the initial user namespace posix |
1134 | * acls must be translated. | 1151 | * acls must be translated. |
@@ -1136,17 +1153,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
1136 | if (sb->s_user_ns != &init_user_ns) | 1153 | if (sb->s_user_ns != &init_user_ns) |
1137 | sb->s_xattr = fuse_no_acl_xattr_handlers; | 1154 | sb->s_xattr = fuse_no_acl_xattr_handlers; |
1138 | 1155 | ||
1139 | fc = kmalloc(sizeof(*fc), GFP_KERNEL); | 1156 | fud = fuse_dev_alloc_install(fc); |
1140 | err = -ENOMEM; | ||
1141 | if (!fc) | ||
1142 | goto err_fput; | ||
1143 | |||
1144 | fuse_conn_init(fc, sb->s_user_ns); | ||
1145 | fc->release = fuse_free_conn; | ||
1146 | |||
1147 | fud = fuse_dev_alloc(fc); | ||
1148 | if (!fud) | 1157 | if (!fud) |
1149 | goto err_put_conn; | 1158 | goto err; |
1150 | 1159 | ||
1151 | fc->dev = sb->s_dev; | 1160 | fc->dev = sb->s_dev; |
1152 | fc->sb = sb; | 1161 | fc->sb = sb; |
@@ -1159,17 +1168,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
1159 | fc->dont_mask = 1; | 1168 | fc->dont_mask = 1; |
1160 | sb->s_flags |= SB_POSIXACL; | 1169 | sb->s_flags |= SB_POSIXACL; |
1161 | 1170 | ||
1162 | fc->default_permissions = d.default_permissions; | 1171 | fc->default_permissions = ctx->default_permissions; |
1163 | fc->allow_other = d.allow_other; | 1172 | fc->allow_other = ctx->allow_other; |
1164 | fc->user_id = d.user_id; | 1173 | fc->user_id = ctx->user_id; |
1165 | fc->group_id = d.group_id; | 1174 | fc->group_id = ctx->group_id; |
1166 | fc->max_read = max_t(unsigned, 4096, d.max_read); | 1175 | fc->max_read = max_t(unsigned, 4096, ctx->max_read); |
1167 | 1176 | fc->destroy = ctx->destroy; | |
1168 | /* Used by get_root_inode() */ | 1177 | fc->no_control = ctx->no_control; |
1169 | sb->s_fs_info = fc; | 1178 | fc->no_force_umount = ctx->no_force_umount; |
1170 | 1179 | ||
1171 | err = -ENOMEM; | 1180 | err = -ENOMEM; |
1172 | root = fuse_get_root_inode(sb, d.rootmode); | 1181 | root = fuse_get_root_inode(sb, ctx->rootmode); |
1173 | sb->s_d_op = &fuse_root_dentry_operations; | 1182 | sb->s_d_op = &fuse_root_dentry_operations; |
1174 | root_dentry = d_make_root(root); | 1183 | root_dentry = d_make_root(root); |
1175 | if (!root_dentry) | 1184 | if (!root_dentry) |
@@ -1177,20 +1186,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
1177 | /* Root dentry doesn't have .d_revalidate */ | 1186 | /* Root dentry doesn't have .d_revalidate */ |
1178 | sb->s_d_op = &fuse_dentry_operations; | 1187 | sb->s_d_op = &fuse_dentry_operations; |
1179 | 1188 | ||
1180 | init_req = fuse_request_alloc(0); | ||
1181 | if (!init_req) | ||
1182 | goto err_put_root; | ||
1183 | __set_bit(FR_BACKGROUND, &init_req->flags); | ||
1184 | |||
1185 | if (is_bdev) { | ||
1186 | fc->destroy_req = fuse_request_alloc(0); | ||
1187 | if (!fc->destroy_req) | ||
1188 | goto err_free_init_req; | ||
1189 | } | ||
1190 | |||
1191 | mutex_lock(&fuse_mutex); | 1189 | mutex_lock(&fuse_mutex); |
1192 | err = -EINVAL; | 1190 | err = -EINVAL; |
1193 | if (file->private_data) | 1191 | if (*ctx->fudptr) |
1194 | goto err_unlock; | 1192 | goto err_unlock; |
1195 | 1193 | ||
1196 | err = fuse_ctl_add_conn(fc); | 1194 | err = fuse_ctl_add_conn(fc); |
@@ -1199,27 +1197,62 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
1199 | 1197 | ||
1200 | list_add_tail(&fc->entry, &fuse_conn_list); | 1198 | list_add_tail(&fc->entry, &fuse_conn_list); |
1201 | sb->s_root = root_dentry; | 1199 | sb->s_root = root_dentry; |
1202 | file->private_data = fud; | 1200 | *ctx->fudptr = fud; |
1203 | mutex_unlock(&fuse_mutex); | 1201 | mutex_unlock(&fuse_mutex); |
1202 | return 0; | ||
1203 | |||
1204 | err_unlock: | ||
1205 | mutex_unlock(&fuse_mutex); | ||
1206 | dput(root_dentry); | ||
1207 | err_dev_free: | ||
1208 | fuse_dev_free(fud); | ||
1209 | err: | ||
1210 | return err; | ||
1211 | } | ||
1212 | EXPORT_SYMBOL_GPL(fuse_fill_super_common); | ||
1213 | |||
1214 | static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) | ||
1215 | { | ||
1216 | struct fuse_fs_context *ctx = fsc->fs_private; | ||
1217 | struct file *file; | ||
1218 | int err; | ||
1219 | struct fuse_conn *fc; | ||
1220 | |||
1221 | err = -EINVAL; | ||
1222 | file = fget(ctx->fd); | ||
1223 | if (!file) | ||
1224 | goto err; | ||
1225 | |||
1226 | /* | ||
1227 | * Require mount to happen from the same user namespace which | ||
1228 | * opened /dev/fuse to prevent potential attacks. | ||
1229 | */ | ||
1230 | if ((file->f_op != &fuse_dev_operations) || | ||
1231 | (file->f_cred->user_ns != sb->s_user_ns)) | ||
1232 | goto err_fput; | ||
1233 | ctx->fudptr = &file->private_data; | ||
1234 | |||
1235 | fc = kmalloc(sizeof(*fc), GFP_KERNEL); | ||
1236 | err = -ENOMEM; | ||
1237 | if (!fc) | ||
1238 | goto err_fput; | ||
1239 | |||
1240 | fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); | ||
1241 | fc->release = fuse_free_conn; | ||
1242 | sb->s_fs_info = fc; | ||
1243 | |||
1244 | err = fuse_fill_super_common(sb, ctx); | ||
1245 | if (err) | ||
1246 | goto err_put_conn; | ||
1204 | /* | 1247 | /* |
1205 | * atomic_dec_and_test() in fput() provides the necessary | 1248 | * atomic_dec_and_test() in fput() provides the necessary |
1206 | * memory barrier for file->private_data to be visible on all | 1249 | * memory barrier for file->private_data to be visible on all |
1207 | * CPUs after this | 1250 | * CPUs after this |
1208 | */ | 1251 | */ |
1209 | fput(file); | 1252 | fput(file); |
1210 | 1253 | fuse_send_init(get_fuse_conn_super(sb)); | |
1211 | fuse_send_init(fc, init_req); | ||
1212 | |||
1213 | return 0; | 1254 | return 0; |
1214 | 1255 | ||
1215 | err_unlock: | ||
1216 | mutex_unlock(&fuse_mutex); | ||
1217 | err_free_init_req: | ||
1218 | fuse_request_free(init_req); | ||
1219 | err_put_root: | ||
1220 | dput(root_dentry); | ||
1221 | err_dev_free: | ||
1222 | fuse_dev_free(fud); | ||
1223 | err_put_conn: | 1256 | err_put_conn: |
1224 | fuse_conn_put(fc); | 1257 | fuse_conn_put(fc); |
1225 | sb->s_fs_info = NULL; | 1258 | sb->s_fs_info = NULL; |
@@ -1229,11 +1262,52 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
1229 | return err; | 1262 | return err; |
1230 | } | 1263 | } |
1231 | 1264 | ||
1232 | static struct dentry *fuse_mount(struct file_system_type *fs_type, | 1265 | static int fuse_get_tree(struct fs_context *fc) |
1233 | int flags, const char *dev_name, | 1266 | { |
1234 | void *raw_data) | 1267 | struct fuse_fs_context *ctx = fc->fs_private; |
1268 | |||
1269 | if (!ctx->fd_present || !ctx->rootmode_present || | ||
1270 | !ctx->user_id_present || !ctx->group_id_present) | ||
1271 | return -EINVAL; | ||
1272 | |||
1273 | #ifdef CONFIG_BLOCK | ||
1274 | if (ctx->is_bdev) | ||
1275 | return get_tree_bdev(fc, fuse_fill_super); | ||
1276 | #endif | ||
1277 | |||
1278 | return get_tree_nodev(fc, fuse_fill_super); | ||
1279 | } | ||
1280 | |||
1281 | static const struct fs_context_operations fuse_context_ops = { | ||
1282 | .free = fuse_free_fc, | ||
1283 | .parse_param = fuse_parse_param, | ||
1284 | .get_tree = fuse_get_tree, | ||
1285 | }; | ||
1286 | |||
1287 | /* | ||
1288 | * Set up the filesystem mount context. | ||
1289 | */ | ||
1290 | static int fuse_init_fs_context(struct fs_context *fc) | ||
1235 | { | 1291 | { |
1236 | return mount_nodev(fs_type, flags, raw_data, fuse_fill_super); | 1292 | struct fuse_fs_context *ctx; |
1293 | |||
1294 | ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); | ||
1295 | if (!ctx) | ||
1296 | return -ENOMEM; | ||
1297 | |||
1298 | ctx->max_read = ~0; | ||
1299 | ctx->blksize = FUSE_DEFAULT_BLKSIZE; | ||
1300 | |||
1301 | #ifdef CONFIG_BLOCK | ||
1302 | if (fc->fs_type == &fuseblk_fs_type) { | ||
1303 | ctx->is_bdev = true; | ||
1304 | ctx->destroy = true; | ||
1305 | } | ||
1306 | #endif | ||
1307 | |||
1308 | fc->fs_private = ctx; | ||
1309 | fc->ops = &fuse_context_ops; | ||
1310 | return 0; | ||
1237 | } | 1311 | } |
1238 | 1312 | ||
1239 | static void fuse_sb_destroy(struct super_block *sb) | 1313 | static void fuse_sb_destroy(struct super_block *sb) |
@@ -1241,7 +1315,8 @@ static void fuse_sb_destroy(struct super_block *sb) | |||
1241 | struct fuse_conn *fc = get_fuse_conn_super(sb); | 1315 | struct fuse_conn *fc = get_fuse_conn_super(sb); |
1242 | 1316 | ||
1243 | if (fc) { | 1317 | if (fc) { |
1244 | fuse_send_destroy(fc); | 1318 | if (fc->destroy) |
1319 | fuse_send_destroy(fc); | ||
1245 | 1320 | ||
1246 | fuse_abort_conn(fc); | 1321 | fuse_abort_conn(fc); |
1247 | fuse_wait_aborted(fc); | 1322 | fuse_wait_aborted(fc); |
@@ -1252,29 +1327,24 @@ static void fuse_sb_destroy(struct super_block *sb) | |||
1252 | } | 1327 | } |
1253 | } | 1328 | } |
1254 | 1329 | ||
1255 | static void fuse_kill_sb_anon(struct super_block *sb) | 1330 | void fuse_kill_sb_anon(struct super_block *sb) |
1256 | { | 1331 | { |
1257 | fuse_sb_destroy(sb); | 1332 | fuse_sb_destroy(sb); |
1258 | kill_anon_super(sb); | 1333 | kill_anon_super(sb); |
1259 | } | 1334 | } |
1335 | EXPORT_SYMBOL_GPL(fuse_kill_sb_anon); | ||
1260 | 1336 | ||
1261 | static struct file_system_type fuse_fs_type = { | 1337 | static struct file_system_type fuse_fs_type = { |
1262 | .owner = THIS_MODULE, | 1338 | .owner = THIS_MODULE, |
1263 | .name = "fuse", | 1339 | .name = "fuse", |
1264 | .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, | 1340 | .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, |
1265 | .mount = fuse_mount, | 1341 | .init_fs_context = fuse_init_fs_context, |
1342 | .parameters = &fuse_fs_parameters, | ||
1266 | .kill_sb = fuse_kill_sb_anon, | 1343 | .kill_sb = fuse_kill_sb_anon, |
1267 | }; | 1344 | }; |
1268 | MODULE_ALIAS_FS("fuse"); | 1345 | MODULE_ALIAS_FS("fuse"); |
1269 | 1346 | ||
1270 | #ifdef CONFIG_BLOCK | 1347 | #ifdef CONFIG_BLOCK |
1271 | static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, | ||
1272 | int flags, const char *dev_name, | ||
1273 | void *raw_data) | ||
1274 | { | ||
1275 | return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super); | ||
1276 | } | ||
1277 | |||
1278 | static void fuse_kill_sb_blk(struct super_block *sb) | 1348 | static void fuse_kill_sb_blk(struct super_block *sb) |
1279 | { | 1349 | { |
1280 | fuse_sb_destroy(sb); | 1350 | fuse_sb_destroy(sb); |
@@ -1284,7 +1354,8 @@ static void fuse_kill_sb_blk(struct super_block *sb) | |||
1284 | static struct file_system_type fuseblk_fs_type = { | 1354 | static struct file_system_type fuseblk_fs_type = { |
1285 | .owner = THIS_MODULE, | 1355 | .owner = THIS_MODULE, |
1286 | .name = "fuseblk", | 1356 | .name = "fuseblk", |
1287 | .mount = fuse_mount_blk, | 1357 | .init_fs_context = fuse_init_fs_context, |
1358 | .parameters = &fuse_fs_parameters, | ||
1288 | .kill_sb = fuse_kill_sb_blk, | 1359 | .kill_sb = fuse_kill_sb_blk, |
1289 | .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, | 1360 | .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, |
1290 | }; | 1361 | }; |
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 574d03f8a573..5c38b9d84c6e 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c | |||
@@ -249,6 +249,27 @@ retry: | |||
249 | return 0; | 249 | return 0; |
250 | } | 250 | } |
251 | 251 | ||
252 | static void fuse_force_forget(struct file *file, u64 nodeid) | ||
253 | { | ||
254 | struct inode *inode = file_inode(file); | ||
255 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
256 | struct fuse_forget_in inarg; | ||
257 | FUSE_ARGS(args); | ||
258 | |||
259 | memset(&inarg, 0, sizeof(inarg)); | ||
260 | inarg.nlookup = 1; | ||
261 | args.opcode = FUSE_FORGET; | ||
262 | args.nodeid = nodeid; | ||
263 | args.in_numargs = 1; | ||
264 | args.in_args[0].size = sizeof(inarg); | ||
265 | args.in_args[0].value = &inarg; | ||
266 | args.force = true; | ||
267 | args.noreply = true; | ||
268 | |||
269 | fuse_simple_request(fc, &args); | ||
270 | /* ignore errors */ | ||
271 | } | ||
272 | |||
252 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | 273 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, |
253 | struct dir_context *ctx, u64 attr_version) | 274 | struct dir_context *ctx, u64 attr_version) |
254 | { | 275 | { |
@@ -295,62 +316,55 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | |||
295 | 316 | ||
296 | static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) | 317 | static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) |
297 | { | 318 | { |
298 | int plus, err; | 319 | int plus; |
299 | size_t nbytes; | 320 | ssize_t res; |
300 | struct page *page; | 321 | struct page *page; |
301 | struct inode *inode = file_inode(file); | 322 | struct inode *inode = file_inode(file); |
302 | struct fuse_conn *fc = get_fuse_conn(inode); | 323 | struct fuse_conn *fc = get_fuse_conn(inode); |
303 | struct fuse_req *req; | 324 | struct fuse_io_args ia = {}; |
325 | struct fuse_args_pages *ap = &ia.ap; | ||
326 | struct fuse_page_desc desc = { .length = PAGE_SIZE }; | ||
304 | u64 attr_version = 0; | 327 | u64 attr_version = 0; |
305 | bool locked; | 328 | bool locked; |
306 | 329 | ||
307 | req = fuse_get_req(fc, 1); | ||
308 | if (IS_ERR(req)) | ||
309 | return PTR_ERR(req); | ||
310 | |||
311 | page = alloc_page(GFP_KERNEL); | 330 | page = alloc_page(GFP_KERNEL); |
312 | if (!page) { | 331 | if (!page) |
313 | fuse_put_request(fc, req); | ||
314 | return -ENOMEM; | 332 | return -ENOMEM; |
315 | } | ||
316 | 333 | ||
317 | plus = fuse_use_readdirplus(inode, ctx); | 334 | plus = fuse_use_readdirplus(inode, ctx); |
318 | req->out.argpages = 1; | 335 | ap->args.out_pages = 1; |
319 | req->num_pages = 1; | 336 | ap->num_pages = 1; |
320 | req->pages[0] = page; | 337 | ap->pages = &page; |
321 | req->page_descs[0].length = PAGE_SIZE; | 338 | ap->descs = &desc; |
322 | if (plus) { | 339 | if (plus) { |
323 | attr_version = fuse_get_attr_version(fc); | 340 | attr_version = fuse_get_attr_version(fc); |
324 | fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, | 341 | fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, |
325 | FUSE_READDIRPLUS); | 342 | FUSE_READDIRPLUS); |
326 | } else { | 343 | } else { |
327 | fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, | 344 | fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, |
328 | FUSE_READDIR); | 345 | FUSE_READDIR); |
329 | } | 346 | } |
330 | locked = fuse_lock_inode(inode); | 347 | locked = fuse_lock_inode(inode); |
331 | fuse_request_send(fc, req); | 348 | res = fuse_simple_request(fc, &ap->args); |
332 | fuse_unlock_inode(inode, locked); | 349 | fuse_unlock_inode(inode, locked); |
333 | nbytes = req->out.args[0].size; | 350 | if (res >= 0) { |
334 | err = req->out.h.error; | 351 | if (!res) { |
335 | fuse_put_request(fc, req); | ||
336 | if (!err) { | ||
337 | if (!nbytes) { | ||
338 | struct fuse_file *ff = file->private_data; | 352 | struct fuse_file *ff = file->private_data; |
339 | 353 | ||
340 | if (ff->open_flags & FOPEN_CACHE_DIR) | 354 | if (ff->open_flags & FOPEN_CACHE_DIR) |
341 | fuse_readdir_cache_end(file, ctx->pos); | 355 | fuse_readdir_cache_end(file, ctx->pos); |
342 | } else if (plus) { | 356 | } else if (plus) { |
343 | err = parse_dirplusfile(page_address(page), nbytes, | 357 | res = parse_dirplusfile(page_address(page), res, |
344 | file, ctx, attr_version); | 358 | file, ctx, attr_version); |
345 | } else { | 359 | } else { |
346 | err = parse_dirfile(page_address(page), nbytes, file, | 360 | res = parse_dirfile(page_address(page), res, file, |
347 | ctx); | 361 | ctx); |
348 | } | 362 | } |
349 | } | 363 | } |
350 | 364 | ||
351 | __free_page(page); | 365 | __free_page(page); |
352 | fuse_invalidate_atime(inode); | 366 | fuse_invalidate_atime(inode); |
353 | return err; | 367 | return res; |
354 | } | 368 | } |
355 | 369 | ||
356 | enum fuse_parse_result { | 370 | enum fuse_parse_result { |
@@ -372,11 +386,13 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, | |||
372 | for (;;) { | 386 | for (;;) { |
373 | struct fuse_dirent *dirent = addr + offset; | 387 | struct fuse_dirent *dirent = addr + offset; |
374 | unsigned int nbytes = size - offset; | 388 | unsigned int nbytes = size - offset; |
375 | size_t reclen = FUSE_DIRENT_SIZE(dirent); | 389 | size_t reclen; |
376 | 390 | ||
377 | if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) | 391 | if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) |
378 | break; | 392 | break; |
379 | 393 | ||
394 | reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ | ||
395 | |||
380 | if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) | 396 | if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) |
381 | return FOUND_ERR; | 397 | return FOUND_ERR; |
382 | if (WARN_ON(reclen > nbytes)) | 398 | if (WARN_ON(reclen > nbytes)) |
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 433717640f78..20d052e08b3b 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c | |||
@@ -25,15 +25,15 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value, | |||
25 | memset(&inarg, 0, sizeof(inarg)); | 25 | memset(&inarg, 0, sizeof(inarg)); |
26 | inarg.size = size; | 26 | inarg.size = size; |
27 | inarg.flags = flags; | 27 | inarg.flags = flags; |
28 | args.in.h.opcode = FUSE_SETXATTR; | 28 | args.opcode = FUSE_SETXATTR; |
29 | args.in.h.nodeid = get_node_id(inode); | 29 | args.nodeid = get_node_id(inode); |
30 | args.in.numargs = 3; | 30 | args.in_numargs = 3; |
31 | args.in.args[0].size = sizeof(inarg); | 31 | args.in_args[0].size = sizeof(inarg); |
32 | args.in.args[0].value = &inarg; | 32 | args.in_args[0].value = &inarg; |
33 | args.in.args[1].size = strlen(name) + 1; | 33 | args.in_args[1].size = strlen(name) + 1; |
34 | args.in.args[1].value = name; | 34 | args.in_args[1].value = name; |
35 | args.in.args[2].size = size; | 35 | args.in_args[2].size = size; |
36 | args.in.args[2].value = value; | 36 | args.in_args[2].value = value; |
37 | err = fuse_simple_request(fc, &args); | 37 | err = fuse_simple_request(fc, &args); |
38 | if (err == -ENOSYS) { | 38 | if (err == -ENOSYS) { |
39 | fc->no_setxattr = 1; | 39 | fc->no_setxattr = 1; |
@@ -60,22 +60,22 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, | |||
60 | 60 | ||
61 | memset(&inarg, 0, sizeof(inarg)); | 61 | memset(&inarg, 0, sizeof(inarg)); |
62 | inarg.size = size; | 62 | inarg.size = size; |
63 | args.in.h.opcode = FUSE_GETXATTR; | 63 | args.opcode = FUSE_GETXATTR; |
64 | args.in.h.nodeid = get_node_id(inode); | 64 | args.nodeid = get_node_id(inode); |
65 | args.in.numargs = 2; | 65 | args.in_numargs = 2; |
66 | args.in.args[0].size = sizeof(inarg); | 66 | args.in_args[0].size = sizeof(inarg); |
67 | args.in.args[0].value = &inarg; | 67 | args.in_args[0].value = &inarg; |
68 | args.in.args[1].size = strlen(name) + 1; | 68 | args.in_args[1].size = strlen(name) + 1; |
69 | args.in.args[1].value = name; | 69 | args.in_args[1].value = name; |
70 | /* This is really two different operations rolled into one */ | 70 | /* This is really two different operations rolled into one */ |
71 | args.out.numargs = 1; | 71 | args.out_numargs = 1; |
72 | if (size) { | 72 | if (size) { |
73 | args.out.argvar = 1; | 73 | args.out_argvar = true; |
74 | args.out.args[0].size = size; | 74 | args.out_args[0].size = size; |
75 | args.out.args[0].value = value; | 75 | args.out_args[0].value = value; |
76 | } else { | 76 | } else { |
77 | args.out.args[0].size = sizeof(outarg); | 77 | args.out_args[0].size = sizeof(outarg); |
78 | args.out.args[0].value = &outarg; | 78 | args.out_args[0].value = &outarg; |
79 | } | 79 | } |
80 | ret = fuse_simple_request(fc, &args); | 80 | ret = fuse_simple_request(fc, &args); |
81 | if (!ret && !size) | 81 | if (!ret && !size) |
@@ -121,20 +121,20 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) | |||
121 | 121 | ||
122 | memset(&inarg, 0, sizeof(inarg)); | 122 | memset(&inarg, 0, sizeof(inarg)); |
123 | inarg.size = size; | 123 | inarg.size = size; |
124 | args.in.h.opcode = FUSE_LISTXATTR; | 124 | args.opcode = FUSE_LISTXATTR; |
125 | args.in.h.nodeid = get_node_id(inode); | 125 | args.nodeid = get_node_id(inode); |
126 | args.in.numargs = 1; | 126 | args.in_numargs = 1; |
127 | args.in.args[0].size = sizeof(inarg); | 127 | args.in_args[0].size = sizeof(inarg); |
128 | args.in.args[0].value = &inarg; | 128 | args.in_args[0].value = &inarg; |
129 | /* This is really two different operations rolled into one */ | 129 | /* This is really two different operations rolled into one */ |
130 | args.out.numargs = 1; | 130 | args.out_numargs = 1; |
131 | if (size) { | 131 | if (size) { |
132 | args.out.argvar = 1; | 132 | args.out_argvar = true; |
133 | args.out.args[0].size = size; | 133 | args.out_args[0].size = size; |
134 | args.out.args[0].value = list; | 134 | args.out_args[0].value = list; |
135 | } else { | 135 | } else { |
136 | args.out.args[0].size = sizeof(outarg); | 136 | args.out_args[0].size = sizeof(outarg); |
137 | args.out.args[0].value = &outarg; | 137 | args.out_args[0].value = &outarg; |
138 | } | 138 | } |
139 | ret = fuse_simple_request(fc, &args); | 139 | ret = fuse_simple_request(fc, &args); |
140 | if (!ret && !size) | 140 | if (!ret && !size) |
@@ -157,11 +157,11 @@ int fuse_removexattr(struct inode *inode, const char *name) | |||
157 | if (fc->no_removexattr) | 157 | if (fc->no_removexattr) |
158 | return -EOPNOTSUPP; | 158 | return -EOPNOTSUPP; |
159 | 159 | ||
160 | args.in.h.opcode = FUSE_REMOVEXATTR; | 160 | args.opcode = FUSE_REMOVEXATTR; |
161 | args.in.h.nodeid = get_node_id(inode); | 161 | args.nodeid = get_node_id(inode); |
162 | args.in.numargs = 1; | 162 | args.in_numargs = 1; |
163 | args.in.args[0].size = strlen(name) + 1; | 163 | args.in_args[0].size = strlen(name) + 1; |
164 | args.in.args[0].value = name; | 164 | args.in_args[0].value = name; |
165 | err = fuse_simple_request(fc, &args); | 165 | err = fuse_simple_request(fc, &args); |
166 | if (err == -ENOSYS) { | 166 | if (err == -ENOSYS) { |
167 | fc->no_removexattr = 1; | 167 | fc->no_removexattr = 1; |
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 10517cea9682..1fc28c2da279 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c | |||
@@ -24,7 +24,7 @@ | |||
24 | 24 | ||
25 | struct iomap_dio { | 25 | struct iomap_dio { |
26 | struct kiocb *iocb; | 26 | struct kiocb *iocb; |
27 | iomap_dio_end_io_t *end_io; | 27 | const struct iomap_dio_ops *dops; |
28 | loff_t i_size; | 28 | loff_t i_size; |
29 | loff_t size; | 29 | loff_t size; |
30 | atomic_t ref; | 30 | atomic_t ref; |
@@ -72,18 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, | |||
72 | 72 | ||
73 | static ssize_t iomap_dio_complete(struct iomap_dio *dio) | 73 | static ssize_t iomap_dio_complete(struct iomap_dio *dio) |
74 | { | 74 | { |
75 | const struct iomap_dio_ops *dops = dio->dops; | ||
75 | struct kiocb *iocb = dio->iocb; | 76 | struct kiocb *iocb = dio->iocb; |
76 | struct inode *inode = file_inode(iocb->ki_filp); | 77 | struct inode *inode = file_inode(iocb->ki_filp); |
77 | loff_t offset = iocb->ki_pos; | 78 | loff_t offset = iocb->ki_pos; |
78 | ssize_t ret; | 79 | ssize_t ret = dio->error; |
79 | 80 | ||
80 | if (dio->end_io) { | 81 | if (dops && dops->end_io) |
81 | ret = dio->end_io(iocb, | 82 | ret = dops->end_io(iocb, dio->size, ret, dio->flags); |
82 | dio->error ? dio->error : dio->size, | ||
83 | dio->flags); | ||
84 | } else { | ||
85 | ret = dio->error; | ||
86 | } | ||
87 | 83 | ||
88 | if (likely(!ret)) { | 84 | if (likely(!ret)) { |
89 | ret = dio->size; | 85 | ret = dio->size; |
@@ -101,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) | |||
101 | * one is a pretty crazy thing to do, so we don't support it 100%. If | 97 | * one is a pretty crazy thing to do, so we don't support it 100%. If |
102 | * this invalidation fails, tough, the write still worked... | 98 | * this invalidation fails, tough, the write still worked... |
103 | * | 99 | * |
104 | * And this page cache invalidation has to be after dio->end_io(), as | 100 | * And this page cache invalidation has to be after ->end_io(), as some |
105 | * some filesystems convert unwritten extents to real allocations in | 101 | * filesystems convert unwritten extents to real allocations in |
106 | * end_io() when necessary, otherwise a racing buffer read would cache | 102 | * ->end_io() when necessary, otherwise a racing buffer read would cache |
107 | * zeros from unwritten extents. | 103 | * zeros from unwritten extents. |
108 | */ | 104 | */ |
109 | if (!dio->error && | 105 | if (!dio->error && |
@@ -396,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, | |||
396 | */ | 392 | */ |
397 | ssize_t | 393 | ssize_t |
398 | iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | 394 | iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
399 | const struct iomap_ops *ops, iomap_dio_end_io_t end_io) | 395 | const struct iomap_ops *ops, const struct iomap_dio_ops *dops) |
400 | { | 396 | { |
401 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 397 | struct address_space *mapping = iocb->ki_filp->f_mapping; |
402 | struct inode *inode = file_inode(iocb->ki_filp); | 398 | struct inode *inode = file_inode(iocb->ki_filp); |
@@ -421,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
421 | atomic_set(&dio->ref, 1); | 417 | atomic_set(&dio->ref, 1); |
422 | dio->size = 0; | 418 | dio->size = 0; |
423 | dio->i_size = i_size_read(inode); | 419 | dio->i_size = i_size_read(inode); |
424 | dio->end_io = end_io; | 420 | dio->dops = dops; |
425 | dio->error = 0; | 421 | dio->error = 0; |
426 | dio->flags = 0; | 422 | dio->flags = 0; |
427 | 423 | ||
diff --git a/fs/namespace.c b/fs/namespace.c index abcdc5f44865..fe0e9e1410fe 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -2802,8 +2802,6 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, | |||
2802 | put_filesystem(type); | 2802 | put_filesystem(type); |
2803 | return -EINVAL; | 2803 | return -EINVAL; |
2804 | } | 2804 | } |
2805 | } else { | ||
2806 | subtype = ""; | ||
2807 | } | 2805 | } |
2808 | } | 2806 | } |
2809 | 2807 | ||
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index e16fb8f2049e..273ee82d8aa9 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c | |||
@@ -88,7 +88,7 @@ static inline void mangle(struct seq_file *m, const char *s) | |||
88 | static void show_type(struct seq_file *m, struct super_block *sb) | 88 | static void show_type(struct seq_file *m, struct super_block *sb) |
89 | { | 89 | { |
90 | mangle(m, sb->s_type->name); | 90 | mangle(m, sb->s_type->name); |
91 | if (sb->s_subtype && sb->s_subtype[0]) { | 91 | if (sb->s_subtype) { |
92 | seq_putc(m, '.'); | 92 | seq_putc(m, '.'); |
93 | mangle(m, sb->s_subtype); | 93 | mangle(m, sb->s_subtype); |
94 | } | 94 | } |
diff --git a/fs/super.c b/fs/super.c index 8020974b2a68..f627b7c53d2b 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -1555,11 +1555,6 @@ int vfs_get_tree(struct fs_context *fc) | |||
1555 | sb = fc->root->d_sb; | 1555 | sb = fc->root->d_sb; |
1556 | WARN_ON(!sb->s_bdi); | 1556 | WARN_ON(!sb->s_bdi); |
1557 | 1557 | ||
1558 | if (fc->subtype && !sb->s_subtype) { | ||
1559 | sb->s_subtype = fc->subtype; | ||
1560 | fc->subtype = NULL; | ||
1561 | } | ||
1562 | |||
1563 | /* | 1558 | /* |
1564 | * Write barrier is for super_cache_count(). We place it before setting | 1559 | * Write barrier is for super_cache_count(). We place it before setting |
1565 | * SB_BORN as the data dependency between the two functions is the | 1560 | * SB_BORN as the data dependency between the two functions is the |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d952d5962e93..1ffb179f35d2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -370,21 +370,23 @@ static int | |||
370 | xfs_dio_write_end_io( | 370 | xfs_dio_write_end_io( |
371 | struct kiocb *iocb, | 371 | struct kiocb *iocb, |
372 | ssize_t size, | 372 | ssize_t size, |
373 | int error, | ||
373 | unsigned flags) | 374 | unsigned flags) |
374 | { | 375 | { |
375 | struct inode *inode = file_inode(iocb->ki_filp); | 376 | struct inode *inode = file_inode(iocb->ki_filp); |
376 | struct xfs_inode *ip = XFS_I(inode); | 377 | struct xfs_inode *ip = XFS_I(inode); |
377 | loff_t offset = iocb->ki_pos; | 378 | loff_t offset = iocb->ki_pos; |
378 | unsigned int nofs_flag; | 379 | unsigned int nofs_flag; |
379 | int error = 0; | ||
380 | 380 | ||
381 | trace_xfs_end_io_direct_write(ip, offset, size); | 381 | trace_xfs_end_io_direct_write(ip, offset, size); |
382 | 382 | ||
383 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 383 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
384 | return -EIO; | 384 | return -EIO; |
385 | 385 | ||
386 | if (size <= 0) | 386 | if (error) |
387 | return size; | 387 | return error; |
388 | if (!size) | ||
389 | return 0; | ||
388 | 390 | ||
389 | /* | 391 | /* |
390 | * Capture amount written on completion as we can't reliably account | 392 | * Capture amount written on completion as we can't reliably account |
@@ -441,6 +443,10 @@ out: | |||
441 | return error; | 443 | return error; |
442 | } | 444 | } |
443 | 445 | ||
446 | static const struct iomap_dio_ops xfs_dio_write_ops = { | ||
447 | .end_io = xfs_dio_write_end_io, | ||
448 | }; | ||
449 | |||
444 | /* | 450 | /* |
445 | * xfs_file_dio_aio_write - handle direct IO writes | 451 | * xfs_file_dio_aio_write - handle direct IO writes |
446 | * | 452 | * |
@@ -541,7 +547,7 @@ xfs_file_dio_aio_write( | |||
541 | } | 547 | } |
542 | 548 | ||
543 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); | 549 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); |
544 | ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); | 550 | ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops); |
545 | 551 | ||
546 | /* | 552 | /* |
547 | * If unaligned, this is the only IO in-flight. If it has not yet | 553 | * If unaligned, this is the only IO in-flight. If it has not yet |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 82156da3c650..b9dbda1c26aa 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
@@ -293,6 +293,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private); | |||
293 | struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client); | 293 | struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client); |
294 | u64 ceph_client_gid(struct ceph_client *client); | 294 | u64 ceph_client_gid(struct ceph_client *client); |
295 | extern void ceph_destroy_client(struct ceph_client *client); | 295 | extern void ceph_destroy_client(struct ceph_client *client); |
296 | extern void ceph_reset_client_addr(struct ceph_client *client); | ||
296 | extern int __ceph_open_session(struct ceph_client *client, | 297 | extern int __ceph_open_session(struct ceph_client *client, |
297 | unsigned long started); | 298 | unsigned long started); |
298 | extern int ceph_open_session(struct ceph_client *client); | 299 | extern int ceph_open_session(struct ceph_client *client); |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 23895d178149..c4458dc6a757 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -337,6 +337,7 @@ extern void ceph_msgr_flush(void); | |||
337 | extern void ceph_messenger_init(struct ceph_messenger *msgr, | 337 | extern void ceph_messenger_init(struct ceph_messenger *msgr, |
338 | struct ceph_entity_addr *myaddr); | 338 | struct ceph_entity_addr *myaddr); |
339 | extern void ceph_messenger_fini(struct ceph_messenger *msgr); | 339 | extern void ceph_messenger_fini(struct ceph_messenger *msgr); |
340 | extern void ceph_messenger_reset_nonce(struct ceph_messenger *msgr); | ||
340 | 341 | ||
341 | extern void ceph_con_init(struct ceph_connection *con, void *private, | 342 | extern void ceph_con_init(struct ceph_connection *con, void *private, |
342 | const struct ceph_connection_operations *ops, | 343 | const struct ceph_connection_operations *ops, |
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b4d134d3312a..dbb8a6959a73 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h | |||
@@ -109,6 +109,7 @@ extern int ceph_monmap_contains(struct ceph_monmap *m, | |||
109 | 109 | ||
110 | extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); | 110 | extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); |
111 | extern void ceph_monc_stop(struct ceph_mon_client *monc); | 111 | extern void ceph_monc_stop(struct ceph_mon_client *monc); |
112 | extern void ceph_monc_reopen_session(struct ceph_mon_client *monc); | ||
112 | 113 | ||
113 | enum { | 114 | enum { |
114 | CEPH_SUB_MONMAP = 0, | 115 | CEPH_SUB_MONMAP = 0, |
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ad7fe5d10dcd..eaffbdddf89a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
@@ -381,6 +381,7 @@ extern void ceph_osdc_cleanup(void); | |||
381 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | 381 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, |
382 | struct ceph_client *client); | 382 | struct ceph_client *client); |
383 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); | 383 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); |
384 | extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); | ||
384 | 385 | ||
385 | extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | 386 | extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, |
386 | struct ceph_msg *msg); | 387 | struct ceph_msg *msg); |
@@ -388,6 +389,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | |||
388 | struct ceph_msg *msg); | 389 | struct ceph_msg *msg); |
389 | void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); | 390 | void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); |
390 | void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); | 391 | void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); |
392 | void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); | ||
391 | 393 | ||
392 | #define osd_req_op_data(oreq, whch, typ, fld) \ | 394 | #define osd_req_op_data(oreq, whch, typ, fld) \ |
393 | ({ \ | 395 | ({ \ |
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 0424df7f6e6b..e5c14e2c53d3 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h | |||
@@ -95,7 +95,6 @@ struct fs_context { | |||
95 | const struct cred *cred; /* The mounter's credentials */ | 95 | const struct cred *cred; /* The mounter's credentials */ |
96 | struct fc_log *log; /* Logging buffer */ | 96 | struct fc_log *log; /* Logging buffer */ |
97 | const char *source; /* The source name (eg. dev path) */ | 97 | const char *source; /* The source name (eg. dev path) */ |
98 | const char *subtype; /* The subtype to set on the superblock */ | ||
99 | void *security; /* Linux S&M options */ | 98 | void *security; /* Linux S&M options */ |
100 | void *s_fs_info; /* Proposed s_fs_info */ | 99 | void *s_fs_info; /* Proposed s_fs_info */ |
101 | unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ | 100 | unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ |
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index bc499ceae392..7aa5d6117936 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h | |||
@@ -188,10 +188,14 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, | |||
188 | */ | 188 | */ |
189 | #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ | 189 | #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ |
190 | #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ | 190 | #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ |
191 | typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, | 191 | |
192 | unsigned flags); | 192 | struct iomap_dio_ops { |
193 | int (*end_io)(struct kiocb *iocb, ssize_t size, int error, | ||
194 | unsigned flags); | ||
195 | }; | ||
196 | |||
193 | ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | 197 | ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
194 | const struct iomap_ops *ops, iomap_dio_end_io_t end_io); | 198 | const struct iomap_ops *ops, const struct iomap_dio_ops *dops); |
195 | int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); | 199 | int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); |
196 | 200 | ||
197 | #ifdef CONFIG_SWAP | 201 | #ifdef CONFIG_SWAP |
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2971d29a42e4..df2e12fb3381 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h | |||
@@ -425,6 +425,10 @@ enum fuse_opcode { | |||
425 | 425 | ||
426 | /* CUSE specific operations */ | 426 | /* CUSE specific operations */ |
427 | CUSE_INIT = 4096, | 427 | CUSE_INIT = 4096, |
428 | |||
429 | /* Reserved opcodes: helpful to detect structure endian-ness */ | ||
430 | CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ | ||
431 | FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ | ||
428 | }; | 432 | }; |
429 | 433 | ||
430 | enum fuse_notify_code { | 434 | enum fuse_notify_code { |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4eeea4d5c3ef..2d568246803f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/nsproxy.h> | 13 | #include <linux/nsproxy.h> |
14 | #include <linux/parser.h> | 14 | #include <linux/parser.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/sched/mm.h> | ||
16 | #include <linux/seq_file.h> | 17 | #include <linux/seq_file.h> |
17 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
18 | #include <linux/statfs.h> | 19 | #include <linux/statfs.h> |
@@ -185,18 +186,34 @@ int ceph_compare_options(struct ceph_options *new_opt, | |||
185 | } | 186 | } |
186 | EXPORT_SYMBOL(ceph_compare_options); | 187 | EXPORT_SYMBOL(ceph_compare_options); |
187 | 188 | ||
189 | /* | ||
190 | * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are | ||
191 | * compatible with (a superset of) GFP_KERNEL. This is because while the | ||
192 | * actual pages are allocated with the specified flags, the page table pages | ||
193 | * are always allocated with GFP_KERNEL. map_vm_area() doesn't even take | ||
194 | * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc(). | ||
195 | * | ||
196 | * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO. | ||
197 | */ | ||
188 | void *ceph_kvmalloc(size_t size, gfp_t flags) | 198 | void *ceph_kvmalloc(size_t size, gfp_t flags) |
189 | { | 199 | { |
190 | if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { | 200 | void *p; |
191 | void *ptr = kmalloc(size, flags | __GFP_NOWARN); | 201 | |
192 | if (ptr) | 202 | if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) { |
193 | return ptr; | 203 | p = kvmalloc(size, flags); |
204 | } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) { | ||
205 | unsigned int nofs_flag = memalloc_nofs_save(); | ||
206 | p = kvmalloc(size, GFP_KERNEL); | ||
207 | memalloc_nofs_restore(nofs_flag); | ||
208 | } else { | ||
209 | unsigned int noio_flag = memalloc_noio_save(); | ||
210 | p = kvmalloc(size, GFP_KERNEL); | ||
211 | memalloc_noio_restore(noio_flag); | ||
194 | } | 212 | } |
195 | 213 | ||
196 | return __vmalloc(size, flags, PAGE_KERNEL); | 214 | return p; |
197 | } | 215 | } |
198 | 216 | ||
199 | |||
200 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | 217 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) |
201 | { | 218 | { |
202 | int i = 0; | 219 | int i = 0; |
@@ -694,6 +711,14 @@ void ceph_destroy_client(struct ceph_client *client) | |||
694 | } | 711 | } |
695 | EXPORT_SYMBOL(ceph_destroy_client); | 712 | EXPORT_SYMBOL(ceph_destroy_client); |
696 | 713 | ||
714 | void ceph_reset_client_addr(struct ceph_client *client) | ||
715 | { | ||
716 | ceph_messenger_reset_nonce(&client->msgr); | ||
717 | ceph_monc_reopen_session(&client->monc); | ||
718 | ceph_osdc_reopen_osds(&client->osdc); | ||
719 | } | ||
720 | EXPORT_SYMBOL(ceph_reset_client_addr); | ||
721 | |||
697 | /* | 722 | /* |
698 | * true if we have the mon map (and have thus joined the cluster) | 723 | * true if we have the mon map (and have thus joined the cluster) |
699 | */ | 724 | */ |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 962f521c863e..e4cb3db2ee77 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -3031,6 +3031,12 @@ static void con_fault(struct ceph_connection *con) | |||
3031 | } | 3031 | } |
3032 | 3032 | ||
3033 | 3033 | ||
3034 | void ceph_messenger_reset_nonce(struct ceph_messenger *msgr) | ||
3035 | { | ||
3036 | u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000; | ||
3037 | msgr->inst.addr.nonce = cpu_to_le32(nonce); | ||
3038 | encode_my_addr(msgr); | ||
3039 | } | ||
3034 | 3040 | ||
3035 | /* | 3041 | /* |
3036 | * initialize a new messenger instance | 3042 | * initialize a new messenger instance |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 0520bf9825aa..7256c402ebaa 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
@@ -213,6 +213,13 @@ static void reopen_session(struct ceph_mon_client *monc) | |||
213 | __open_session(monc); | 213 | __open_session(monc); |
214 | } | 214 | } |
215 | 215 | ||
216 | void ceph_monc_reopen_session(struct ceph_mon_client *monc) | ||
217 | { | ||
218 | mutex_lock(&monc->mutex); | ||
219 | reopen_session(monc); | ||
220 | mutex_unlock(&monc->mutex); | ||
221 | } | ||
222 | |||
216 | static void un_backoff(struct ceph_mon_client *monc) | 223 | static void un_backoff(struct ceph_mon_client *monc) |
217 | { | 224 | { |
218 | monc->hunt_mult /= 2; /* reduce by 50% */ | 225 | monc->hunt_mult /= 2; /* reduce by 50% */ |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 78ae6e8c953d..ba45b074a362 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -841,6 +841,7 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, | |||
841 | struct ceph_pagelist *pagelist; | 841 | struct ceph_pagelist *pagelist; |
842 | size_t payload_len = 0; | 842 | size_t payload_len = 0; |
843 | size_t size; | 843 | size_t size; |
844 | int ret; | ||
844 | 845 | ||
845 | op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); | 846 | op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); |
846 | 847 | ||
@@ -852,20 +853,27 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, | |||
852 | size = strlen(class); | 853 | size = strlen(class); |
853 | BUG_ON(size > (size_t) U8_MAX); | 854 | BUG_ON(size > (size_t) U8_MAX); |
854 | op->cls.class_len = size; | 855 | op->cls.class_len = size; |
855 | ceph_pagelist_append(pagelist, class, size); | 856 | ret = ceph_pagelist_append(pagelist, class, size); |
857 | if (ret) | ||
858 | goto err_pagelist_free; | ||
856 | payload_len += size; | 859 | payload_len += size; |
857 | 860 | ||
858 | op->cls.method_name = method; | 861 | op->cls.method_name = method; |
859 | size = strlen(method); | 862 | size = strlen(method); |
860 | BUG_ON(size > (size_t) U8_MAX); | 863 | BUG_ON(size > (size_t) U8_MAX); |
861 | op->cls.method_len = size; | 864 | op->cls.method_len = size; |
862 | ceph_pagelist_append(pagelist, method, size); | 865 | ret = ceph_pagelist_append(pagelist, method, size); |
866 | if (ret) | ||
867 | goto err_pagelist_free; | ||
863 | payload_len += size; | 868 | payload_len += size; |
864 | 869 | ||
865 | osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist); | 870 | osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist); |
866 | |||
867 | op->indata_len = payload_len; | 871 | op->indata_len = payload_len; |
868 | return 0; | 872 | return 0; |
873 | |||
874 | err_pagelist_free: | ||
875 | ceph_pagelist_release(pagelist); | ||
876 | return ret; | ||
869 | } | 877 | } |
870 | EXPORT_SYMBOL(osd_req_op_cls_init); | 878 | EXPORT_SYMBOL(osd_req_op_cls_init); |
871 | 879 | ||
@@ -877,6 +885,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, | |||
877 | opcode, 0); | 885 | opcode, 0); |
878 | struct ceph_pagelist *pagelist; | 886 | struct ceph_pagelist *pagelist; |
879 | size_t payload_len; | 887 | size_t payload_len; |
888 | int ret; | ||
880 | 889 | ||
881 | BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR); | 890 | BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR); |
882 | 891 | ||
@@ -886,10 +895,14 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, | |||
886 | 895 | ||
887 | payload_len = strlen(name); | 896 | payload_len = strlen(name); |
888 | op->xattr.name_len = payload_len; | 897 | op->xattr.name_len = payload_len; |
889 | ceph_pagelist_append(pagelist, name, payload_len); | 898 | ret = ceph_pagelist_append(pagelist, name, payload_len); |
899 | if (ret) | ||
900 | goto err_pagelist_free; | ||
890 | 901 | ||
891 | op->xattr.value_len = size; | 902 | op->xattr.value_len = size; |
892 | ceph_pagelist_append(pagelist, value, size); | 903 | ret = ceph_pagelist_append(pagelist, value, size); |
904 | if (ret) | ||
905 | goto err_pagelist_free; | ||
893 | payload_len += size; | 906 | payload_len += size; |
894 | 907 | ||
895 | op->xattr.cmp_op = cmp_op; | 908 | op->xattr.cmp_op = cmp_op; |
@@ -898,6 +911,10 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, | |||
898 | ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); | 911 | ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); |
899 | op->indata_len = payload_len; | 912 | op->indata_len = payload_len; |
900 | return 0; | 913 | return 0; |
914 | |||
915 | err_pagelist_free: | ||
916 | ceph_pagelist_release(pagelist); | ||
917 | return ret; | ||
901 | } | 918 | } |
902 | EXPORT_SYMBOL(osd_req_op_xattr_init); | 919 | EXPORT_SYMBOL(osd_req_op_xattr_init); |
903 | 920 | ||
@@ -1488,7 +1505,6 @@ enum calc_target_result { | |||
1488 | 1505 | ||
1489 | static enum calc_target_result calc_target(struct ceph_osd_client *osdc, | 1506 | static enum calc_target_result calc_target(struct ceph_osd_client *osdc, |
1490 | struct ceph_osd_request_target *t, | 1507 | struct ceph_osd_request_target *t, |
1491 | struct ceph_connection *con, | ||
1492 | bool any_change) | 1508 | bool any_change) |
1493 | { | 1509 | { |
1494 | struct ceph_pg_pool_info *pi; | 1510 | struct ceph_pg_pool_info *pi; |
@@ -2272,7 +2288,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) | |||
2272 | dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); | 2288 | dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); |
2273 | 2289 | ||
2274 | again: | 2290 | again: |
2275 | ct_res = calc_target(osdc, &req->r_t, NULL, false); | 2291 | ct_res = calc_target(osdc, &req->r_t, false); |
2276 | if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked) | 2292 | if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked) |
2277 | goto promote; | 2293 | goto promote; |
2278 | 2294 | ||
@@ -2476,6 +2492,14 @@ void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err) | |||
2476 | } | 2492 | } |
2477 | EXPORT_SYMBOL(ceph_osdc_abort_requests); | 2493 | EXPORT_SYMBOL(ceph_osdc_abort_requests); |
2478 | 2494 | ||
2495 | void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc) | ||
2496 | { | ||
2497 | down_write(&osdc->lock); | ||
2498 | osdc->abort_err = 0; | ||
2499 | up_write(&osdc->lock); | ||
2500 | } | ||
2501 | EXPORT_SYMBOL(ceph_osdc_clear_abort_err); | ||
2502 | |||
2479 | static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) | 2503 | static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) |
2480 | { | 2504 | { |
2481 | if (likely(eb > osdc->epoch_barrier)) { | 2505 | if (likely(eb > osdc->epoch_barrier)) { |
@@ -3087,7 +3111,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq) | |||
3087 | lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; | 3111 | lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; |
3088 | } | 3112 | } |
3089 | 3113 | ||
3090 | calc_target(osdc, &lreq->t, NULL, false); | 3114 | calc_target(osdc, &lreq->t, false); |
3091 | osd = lookup_create_osd(osdc, lreq->t.osd, true); | 3115 | osd = lookup_create_osd(osdc, lreq->t.osd, true); |
3092 | link_linger(osd, lreq); | 3116 | link_linger(osd, lreq); |
3093 | 3117 | ||
@@ -3704,7 +3728,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq) | |||
3704 | struct ceph_osd_client *osdc = lreq->osdc; | 3728 | struct ceph_osd_client *osdc = lreq->osdc; |
3705 | enum calc_target_result ct_res; | 3729 | enum calc_target_result ct_res; |
3706 | 3730 | ||
3707 | ct_res = calc_target(osdc, &lreq->t, NULL, true); | 3731 | ct_res = calc_target(osdc, &lreq->t, true); |
3708 | if (ct_res == CALC_TARGET_NEED_RESEND) { | 3732 | if (ct_res == CALC_TARGET_NEED_RESEND) { |
3709 | struct ceph_osd *osd; | 3733 | struct ceph_osd *osd; |
3710 | 3734 | ||
@@ -3776,8 +3800,7 @@ static void scan_requests(struct ceph_osd *osd, | |||
3776 | n = rb_next(n); /* unlink_request(), check_pool_dne() */ | 3800 | n = rb_next(n); /* unlink_request(), check_pool_dne() */ |
3777 | 3801 | ||
3778 | dout("%s req %p tid %llu\n", __func__, req, req->r_tid); | 3802 | dout("%s req %p tid %llu\n", __func__, req, req->r_tid); |
3779 | ct_res = calc_target(osdc, &req->r_t, &req->r_osd->o_con, | 3803 | ct_res = calc_target(osdc, &req->r_t, false); |
3780 | false); | ||
3781 | switch (ct_res) { | 3804 | switch (ct_res) { |
3782 | case CALC_TARGET_NO_ACTION: | 3805 | case CALC_TARGET_NO_ACTION: |
3783 | force_resend_writes = cleared_full || | 3806 | force_resend_writes = cleared_full || |
@@ -3886,7 +3909,7 @@ static void kick_requests(struct ceph_osd_client *osdc, | |||
3886 | n = rb_next(n); | 3909 | n = rb_next(n); |
3887 | 3910 | ||
3888 | if (req->r_t.epoch < osdc->osdmap->epoch) { | 3911 | if (req->r_t.epoch < osdc->osdmap->epoch) { |
3889 | ct_res = calc_target(osdc, &req->r_t, NULL, false); | 3912 | ct_res = calc_target(osdc, &req->r_t, false); |
3890 | if (ct_res == CALC_TARGET_POOL_DNE) { | 3913 | if (ct_res == CALC_TARGET_POOL_DNE) { |
3891 | erase_request(need_resend, req); | 3914 | erase_request(need_resend, req); |
3892 | check_pool_dne(req); | 3915 | check_pool_dne(req); |
@@ -5087,6 +5110,24 @@ out_put_req: | |||
5087 | EXPORT_SYMBOL(ceph_osdc_call); | 5110 | EXPORT_SYMBOL(ceph_osdc_call); |
5088 | 5111 | ||
5089 | /* | 5112 | /* |
5113 | * reset all osd connections | ||
5114 | */ | ||
5115 | void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc) | ||
5116 | { | ||
5117 | struct rb_node *n; | ||
5118 | |||
5119 | down_write(&osdc->lock); | ||
5120 | for (n = rb_first(&osdc->osds); n; ) { | ||
5121 | struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); | ||
5122 | |||
5123 | n = rb_next(n); | ||
5124 | if (!reopen_osd(osd)) | ||
5125 | kick_osd_requests(osd); | ||
5126 | } | ||
5127 | up_write(&osdc->lock); | ||
5128 | } | ||
5129 | |||
5130 | /* | ||
5090 | * init, shutdown | 5131 | * init, shutdown |
5091 | */ | 5132 | */ |
5092 | int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | 5133 | int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 90437906b7bc..4e0de14f80bb 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -973,11 +973,11 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) | |||
973 | struct ceph_pg_pool_info, node); | 973 | struct ceph_pg_pool_info, node); |
974 | __remove_pg_pool(&map->pg_pools, pi); | 974 | __remove_pg_pool(&map->pg_pools, pi); |
975 | } | 975 | } |
976 | kfree(map->osd_state); | 976 | kvfree(map->osd_state); |
977 | kfree(map->osd_weight); | 977 | kvfree(map->osd_weight); |
978 | kfree(map->osd_addr); | 978 | kvfree(map->osd_addr); |
979 | kfree(map->osd_primary_affinity); | 979 | kvfree(map->osd_primary_affinity); |
980 | kfree(map->crush_workspace); | 980 | kvfree(map->crush_workspace); |
981 | kfree(map); | 981 | kfree(map); |
982 | } | 982 | } |
983 | 983 | ||
@@ -986,28 +986,41 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) | |||
986 | * | 986 | * |
987 | * The new elements are properly initialized. | 987 | * The new elements are properly initialized. |
988 | */ | 988 | */ |
989 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | 989 | static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) |
990 | { | 990 | { |
991 | u32 *state; | 991 | u32 *state; |
992 | u32 *weight; | 992 | u32 *weight; |
993 | struct ceph_entity_addr *addr; | 993 | struct ceph_entity_addr *addr; |
994 | u32 to_copy; | ||
994 | int i; | 995 | int i; |
995 | 996 | ||
996 | state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS); | 997 | dout("%s old %u new %u\n", __func__, map->max_osd, max); |
997 | if (!state) | 998 | if (max == map->max_osd) |
998 | return -ENOMEM; | 999 | return 0; |
999 | map->osd_state = state; | ||
1000 | 1000 | ||
1001 | weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS); | 1001 | state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS); |
1002 | if (!weight) | 1002 | weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS); |
1003 | addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS); | ||
1004 | if (!state || !weight || !addr) { | ||
1005 | kvfree(state); | ||
1006 | kvfree(weight); | ||
1007 | kvfree(addr); | ||
1003 | return -ENOMEM; | 1008 | return -ENOMEM; |
1004 | map->osd_weight = weight; | 1009 | } |
1005 | 1010 | ||
1006 | addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS); | 1011 | to_copy = min(map->max_osd, max); |
1007 | if (!addr) | 1012 | if (map->osd_state) { |
1008 | return -ENOMEM; | 1013 | memcpy(state, map->osd_state, to_copy * sizeof(*state)); |
1009 | map->osd_addr = addr; | 1014 | memcpy(weight, map->osd_weight, to_copy * sizeof(*weight)); |
1015 | memcpy(addr, map->osd_addr, to_copy * sizeof(*addr)); | ||
1016 | kvfree(map->osd_state); | ||
1017 | kvfree(map->osd_weight); | ||
1018 | kvfree(map->osd_addr); | ||
1019 | } | ||
1010 | 1020 | ||
1021 | map->osd_state = state; | ||
1022 | map->osd_weight = weight; | ||
1023 | map->osd_addr = addr; | ||
1011 | for (i = map->max_osd; i < max; i++) { | 1024 | for (i = map->max_osd; i < max; i++) { |
1012 | map->osd_state[i] = 0; | 1025 | map->osd_state[i] = 0; |
1013 | map->osd_weight[i] = CEPH_OSD_OUT; | 1026 | map->osd_weight[i] = CEPH_OSD_OUT; |
@@ -1017,12 +1030,16 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | |||
1017 | if (map->osd_primary_affinity) { | 1030 | if (map->osd_primary_affinity) { |
1018 | u32 *affinity; | 1031 | u32 *affinity; |
1019 | 1032 | ||
1020 | affinity = krealloc(map->osd_primary_affinity, | 1033 | affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)), |
1021 | max*sizeof(*affinity), GFP_NOFS); | 1034 | GFP_NOFS); |
1022 | if (!affinity) | 1035 | if (!affinity) |
1023 | return -ENOMEM; | 1036 | return -ENOMEM; |
1024 | map->osd_primary_affinity = affinity; | ||
1025 | 1037 | ||
1038 | memcpy(affinity, map->osd_primary_affinity, | ||
1039 | to_copy * sizeof(*affinity)); | ||
1040 | kvfree(map->osd_primary_affinity); | ||
1041 | |||
1042 | map->osd_primary_affinity = affinity; | ||
1026 | for (i = map->max_osd; i < max; i++) | 1043 | for (i = map->max_osd; i < max; i++) |
1027 | map->osd_primary_affinity[i] = | 1044 | map->osd_primary_affinity[i] = |
1028 | CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; | 1045 | CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; |
@@ -1043,7 +1060,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush) | |||
1043 | 1060 | ||
1044 | work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE); | 1061 | work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE); |
1045 | dout("%s work_size %zu bytes\n", __func__, work_size); | 1062 | dout("%s work_size %zu bytes\n", __func__, work_size); |
1046 | workspace = kmalloc(work_size, GFP_NOIO); | 1063 | workspace = ceph_kvmalloc(work_size, GFP_NOIO); |
1047 | if (!workspace) { | 1064 | if (!workspace) { |
1048 | crush_destroy(crush); | 1065 | crush_destroy(crush); |
1049 | return -ENOMEM; | 1066 | return -ENOMEM; |
@@ -1052,7 +1069,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush) | |||
1052 | 1069 | ||
1053 | if (map->crush) | 1070 | if (map->crush) |
1054 | crush_destroy(map->crush); | 1071 | crush_destroy(map->crush); |
1055 | kfree(map->crush_workspace); | 1072 | kvfree(map->crush_workspace); |
1056 | map->crush = crush; | 1073 | map->crush = crush; |
1057 | map->crush_workspace = workspace; | 1074 | map->crush_workspace = workspace; |
1058 | return 0; | 1075 | return 0; |
@@ -1298,9 +1315,9 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff) | |||
1298 | if (!map->osd_primary_affinity) { | 1315 | if (!map->osd_primary_affinity) { |
1299 | int i; | 1316 | int i; |
1300 | 1317 | ||
1301 | map->osd_primary_affinity = kmalloc_array(map->max_osd, | 1318 | map->osd_primary_affinity = ceph_kvmalloc( |
1302 | sizeof(u32), | 1319 | array_size(map->max_osd, sizeof(*map->osd_primary_affinity)), |
1303 | GFP_NOFS); | 1320 | GFP_NOFS); |
1304 | if (!map->osd_primary_affinity) | 1321 | if (!map->osd_primary_affinity) |
1305 | return -ENOMEM; | 1322 | return -ENOMEM; |
1306 | 1323 | ||
@@ -1321,7 +1338,7 @@ static int decode_primary_affinity(void **p, void *end, | |||
1321 | 1338 | ||
1322 | ceph_decode_32_safe(p, end, len, e_inval); | 1339 | ceph_decode_32_safe(p, end, len, e_inval); |
1323 | if (len == 0) { | 1340 | if (len == 0) { |
1324 | kfree(map->osd_primary_affinity); | 1341 | kvfree(map->osd_primary_affinity); |
1325 | map->osd_primary_affinity = NULL; | 1342 | map->osd_primary_affinity = NULL; |
1326 | return 0; | 1343 | return 0; |
1327 | } | 1344 | } |
diff --git a/security/keys/trusted.c b/security/keys/trusted.c index ade699131065..1fbd77816610 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c | |||
@@ -1228,11 +1228,16 @@ hashalg_fail: | |||
1228 | 1228 | ||
1229 | static int __init init_digests(void) | 1229 | static int __init init_digests(void) |
1230 | { | 1230 | { |
1231 | int i; | ||
1232 | |||
1231 | digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests), | 1233 | digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests), |
1232 | GFP_KERNEL); | 1234 | GFP_KERNEL); |
1233 | if (!digests) | 1235 | if (!digests) |
1234 | return -ENOMEM; | 1236 | return -ENOMEM; |
1235 | 1237 | ||
1238 | for (i = 0; i < chip->nr_allocated_banks; i++) | ||
1239 | digests[i].alg_id = chip->allocated_banks[i].alg_id; | ||
1240 | |||
1236 | return 0; | 1241 | return 0; |
1237 | } | 1242 | } |
1238 | 1243 | ||
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index 8059ce834247..61df01cdf0b2 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore | |||
@@ -2,3 +2,5 @@ gpiogpio-event-mon | |||
2 | gpiogpio-hammer | 2 | gpiogpio-hammer |
3 | gpioinclude/ | 3 | gpioinclude/ |
4 | gpiolsgpio | 4 | gpiolsgpio |
5 | tpm2/SpaceTest.log | ||
6 | tpm2/*.pyc | ||
diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile index 9dd848427a7b..bf401f725eef 100644 --- a/tools/testing/selftests/tpm2/Makefile +++ b/tools/testing/selftests/tpm2/Makefile | |||
@@ -2,3 +2,4 @@ | |||
2 | include ../lib.mk | 2 | include ../lib.mk |
3 | 3 | ||
4 | TEST_PROGS := test_smoke.sh test_space.sh | 4 | TEST_PROGS := test_smoke.sh test_space.sh |
5 | TEST_FILES := tpm2.py tpm2_tests.py | ||