diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 125 |
1 files changed, 57 insertions, 68 deletions
@@ -29,85 +29,70 @@ struct follow_page_context { | |||
29 | unsigned int page_mask; | 29 | unsigned int page_mask; |
30 | }; | 30 | }; |
31 | 31 | ||
32 | typedef int (*set_dirty_func_t)(struct page *page); | ||
33 | |||
34 | static void __put_user_pages_dirty(struct page **pages, | ||
35 | unsigned long npages, | ||
36 | set_dirty_func_t sdf) | ||
37 | { | ||
38 | unsigned long index; | ||
39 | |||
40 | for (index = 0; index < npages; index++) { | ||
41 | struct page *page = compound_head(pages[index]); | ||
42 | |||
43 | /* | ||
44 | * Checking PageDirty at this point may race with | ||
45 | * clear_page_dirty_for_io(), but that's OK. Two key cases: | ||
46 | * | ||
47 | * 1) This code sees the page as already dirty, so it skips | ||
48 | * the call to sdf(). That could happen because | ||
49 | * clear_page_dirty_for_io() called page_mkclean(), | ||
50 | * followed by set_page_dirty(). However, now the page is | ||
51 | * going to get written back, which meets the original | ||
52 | * intention of setting it dirty, so all is well: | ||
53 | * clear_page_dirty_for_io() goes on to call | ||
54 | * TestClearPageDirty(), and write the page back. | ||
55 | * | ||
56 | * 2) This code sees the page as clean, so it calls sdf(). | ||
57 | * The page stays dirty, despite being written back, so it | ||
58 | * gets written back again in the next writeback cycle. | ||
59 | * This is harmless. | ||
60 | */ | ||
61 | if (!PageDirty(page)) | ||
62 | sdf(page); | ||
63 | |||
64 | put_user_page(page); | ||
65 | } | ||
66 | } | ||
67 | |||
68 | /** | 32 | /** |
69 | * put_user_pages_dirty() - release and dirty an array of gup-pinned pages | 33 | * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages |
70 | * @pages: array of pages to be marked dirty and released. | 34 | * @pages: array of pages to be maybe marked dirty, and definitely released. |
71 | * @npages: number of pages in the @pages array. | 35 | * @npages: number of pages in the @pages array. |
36 | * @make_dirty: whether to mark the pages dirty | ||
72 | * | 37 | * |
73 | * "gup-pinned page" refers to a page that has had one of the get_user_pages() | 38 | * "gup-pinned page" refers to a page that has had one of the get_user_pages() |
74 | * variants called on that page. | 39 | * variants called on that page. |
75 | * | 40 | * |
76 | * For each page in the @pages array, make that page (or its head page, if a | 41 | * For each page in the @pages array, make that page (or its head page, if a |
77 | * compound page) dirty, if it was previously listed as clean. Then, release | 42 | * compound page) dirty, if @make_dirty is true, and if the page was previously |
78 | * the page using put_user_page(). | 43 | * listed as clean. In any case, releases all pages using put_user_page(), |
44 | * possibly via put_user_pages(), for the non-dirty case. | ||
79 | * | 45 | * |
80 | * Please see the put_user_page() documentation for details. | 46 | * Please see the put_user_page() documentation for details. |
81 | * | 47 | * |
82 | * set_page_dirty(), which does not lock the page, is used here. | 48 | * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is |
83 | * Therefore, it is the caller's responsibility to ensure that this is | 49 | * required, then the caller should a) verify that this is really correct, |
84 | * safe. If not, then put_user_pages_dirty_lock() should be called instead. | 50 | * because _lock() is usually required, and b) hand code it: |
51 | * set_page_dirty_lock(), put_user_page(). | ||
85 | * | 52 | * |
86 | */ | 53 | */ |
87 | void put_user_pages_dirty(struct page **pages, unsigned long npages) | 54 | void put_user_pages_dirty_lock(struct page **pages, unsigned long npages, |
55 | bool make_dirty) | ||
88 | { | 56 | { |
89 | __put_user_pages_dirty(pages, npages, set_page_dirty); | 57 | unsigned long index; |
90 | } | ||
91 | EXPORT_SYMBOL(put_user_pages_dirty); | ||
92 | 58 | ||
93 | /** | 59 | /* |
94 | * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages | 60 | * TODO: this can be optimized for huge pages: if a series of pages is |
95 | * @pages: array of pages to be marked dirty and released. | 61 | * physically contiguous and part of the same compound page, then a |
96 | * @npages: number of pages in the @pages array. | 62 | * single operation to the head page should suffice. |
97 | * | 63 | */ |
98 | * For each page in the @pages array, make that page (or its head page, if a | 64 | |
99 | * compound page) dirty, if it was previously listed as clean. Then, release | 65 | if (!make_dirty) { |
100 | * the page using put_user_page(). | 66 | put_user_pages(pages, npages); |
101 | * | 67 | return; |
102 | * Please see the put_user_page() documentation for details. | 68 | } |
103 | * | 69 | |
104 | * This is just like put_user_pages_dirty(), except that it invokes | 70 | for (index = 0; index < npages; index++) { |
105 | * set_page_dirty_lock(), instead of set_page_dirty(). | 71 | struct page *page = compound_head(pages[index]); |
106 | * | 72 | /* |
107 | */ | 73 | * Checking PageDirty at this point may race with |
108 | void put_user_pages_dirty_lock(struct page **pages, unsigned long npages) | 74 | * clear_page_dirty_for_io(), but that's OK. Two key |
109 | { | 75 | * cases: |
110 | __put_user_pages_dirty(pages, npages, set_page_dirty_lock); | 76 | * |
77 | * 1) This code sees the page as already dirty, so it | ||
78 | * skips the call to set_page_dirty(). That could happen | ||
79 | * because clear_page_dirty_for_io() called | ||
80 | * page_mkclean(), followed by set_page_dirty(). | ||
81 | * However, now the page is going to get written back, | ||
82 | * which meets the original intention of setting it | ||
83 | * dirty, so all is well: clear_page_dirty_for_io() goes | ||
84 | * on to call TestClearPageDirty(), and write the page | ||
85 | * back. | ||
86 | * | ||
87 | * 2) This code sees the page as clean, so it calls | ||
88 | * set_page_dirty(). The page stays dirty, despite being | ||
89 | * written back, so it gets written back again in the | ||
90 | * next writeback cycle. This is harmless. | ||
91 | */ | ||
92 | if (!PageDirty(page)) | ||
93 | set_page_dirty_lock(page); | ||
94 | put_user_page(page); | ||
95 | } | ||
111 | } | 96 | } |
112 | EXPORT_SYMBOL(put_user_pages_dirty_lock); | 97 | EXPORT_SYMBOL(put_user_pages_dirty_lock); |
113 | 98 | ||
@@ -399,7 +384,7 @@ retry_locked: | |||
399 | spin_unlock(ptl); | 384 | spin_unlock(ptl); |
400 | return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); | 385 | return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); |
401 | } | 386 | } |
402 | if (flags & FOLL_SPLIT) { | 387 | if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { |
403 | int ret; | 388 | int ret; |
404 | page = pmd_page(*pmd); | 389 | page = pmd_page(*pmd); |
405 | if (is_huge_zero_page(page)) { | 390 | if (is_huge_zero_page(page)) { |
@@ -408,7 +393,7 @@ retry_locked: | |||
408 | split_huge_pmd(vma, pmd, address); | 393 | split_huge_pmd(vma, pmd, address); |
409 | if (pmd_trans_unstable(pmd)) | 394 | if (pmd_trans_unstable(pmd)) |
410 | ret = -EBUSY; | 395 | ret = -EBUSY; |
411 | } else { | 396 | } else if (flags & FOLL_SPLIT) { |
412 | if (unlikely(!try_get_page(page))) { | 397 | if (unlikely(!try_get_page(page))) { |
413 | spin_unlock(ptl); | 398 | spin_unlock(ptl); |
414 | return ERR_PTR(-ENOMEM); | 399 | return ERR_PTR(-ENOMEM); |
@@ -420,6 +405,10 @@ retry_locked: | |||
420 | put_page(page); | 405 | put_page(page); |
421 | if (pmd_none(*pmd)) | 406 | if (pmd_none(*pmd)) |
422 | return no_page_table(vma, flags); | 407 | return no_page_table(vma, flags); |
408 | } else { /* flags & FOLL_SPLIT_PMD */ | ||
409 | spin_unlock(ptl); | ||
410 | split_huge_pmd(vma, pmd, address); | ||
411 | ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; | ||
423 | } | 412 | } |
424 | 413 | ||
425 | return ret ? ERR_PTR(ret) : | 414 | return ret ? ERR_PTR(ret) : |
@@ -1460,7 +1449,7 @@ check_again: | |||
1460 | * gup may start from a tail page. Advance step by the left | 1449 | * gup may start from a tail page. Advance step by the left |
1461 | * part. | 1450 | * part. |
1462 | */ | 1451 | */ |
1463 | step = (1 << compound_order(head)) - (pages[i] - head); | 1452 | step = compound_nr(head) - (pages[i] - head); |
1464 | /* | 1453 | /* |
1465 | * If we get a page from the CMA zone, since we are going to | 1454 | * If we get a page from the CMA zone, since we are going to |
1466 | * be pinning these entries, we might as well move them out | 1455 | * be pinning these entries, we might as well move them out |